Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Compara/GenomicAlignTree.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Compara/GenomicAlignTree.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,1071 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=head1 NAME + +Bio::EnsEMBL::Compara::GenomicAlignTree + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +Specific subclass of NestedSet to add functionality when the nodes of this tree +are GenomicAlign objects and the tree is a representation of a Protein derived +Phylogenetic tree + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +package Bio::EnsEMBL::Compara::GenomicAlignTree; + +use strict; +use Bio::EnsEMBL::Utils::Argument; +use Bio::EnsEMBL::Utils::Exception; +use Bio::SimpleAlign; +use IO::File; + +use Bio::EnsEMBL::Compara::NestedSet; +use Bio::EnsEMBL::Compara::BaseGenomicAlignSet; + +our @ISA = qw(Bio::EnsEMBL::Compara::NestedSet Bio::EnsEMBL::Compara::BaseGenomicAlignSet); + + +=head2 left_node_id + + Arg [1] : (optional) $left_node_id + Example : $object->left_node_id($left_node_id); + Example : $left_node_id = $object->left_node_id(); + Description : Getter/setter for the left_node_id attribute + Returntype : integer + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub left_node_id { + my $self = shift; + if (@_) { + $self->{_left_node_id} = shift; + } + return $self->{_left_node_id}; +} + + +=head2 right_node_id + + Arg [1] : (optional) $right_node_id + Example : $object->right_node_id($right_node_id); + Example : $right_node_id = $object->right_node_id(); + Description : Getter/setter for the right_node_id attribute + Returntype : integer + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub right_node_id { + my $self = shift; + if (@_) { + $self->{_right_node_id} = shift; + } + return $self->{_right_node_id}; +} + + +=head2 left_node + + Arg [1] : -none- + Example : $left_node = $object->left_node(); + Description : Get the left_node object from the database. + Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub left_node { + my $self = shift; + if ($self->{_left_node_id} and $self->adaptor) { + return $self->adaptor->fetch_node_by_node_id($self->{_left_node_id}); + } + return undef; +} + + +=head2 right_node + + Arg [1] : -none- + Example : $left_node = $object->right_node(); + Description : Get the right_node object from the database. + Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub right_node { + my $self = shift; + if ($self->{_right_node_id} and $self->adaptor) { + return $self->adaptor->fetch_node_by_node_id($self->{_right_node_id}); + } + return undef; +} + +=head2 ancestral_genomic_align_block_id + + Arg [1] : (optional) $ancestral_genomic_align_block_id + Example : $object->ancestral_genomic_align_block_id($ancestral_genomic_align_block_id); + Example : $ancestral_genomic_align_block_id = $object->ancestral_genomic_align_block_id(); + Description : Getter/setter for the ancestral_genomic_align_block_id attribute + This attribute is intended for the root of the tree only! + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub ancestral_genomic_align_block_id { + my $self = shift; + if (@_) { + $self->{_ancestral_genomic_align_block_id} = shift; + } + return $self->{_ancestral_genomic_align_block_id}; +} + + +=head2 modern_genomic_align_block_id + + Arg [1] : (optional) $modern_genomic_align_block_id + Example : $object->modern_genomic_align_block_id($modern_genomic_align_block_id); + Example : $modern_genomic_align_block_id = $object->modern_genomic_align_block_id(); + Description : Getter/setter for the modern_genomic_align_block_id attribute + Returntype : + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub modern_genomic_align_block_id { + my $self = shift; + if (@_) { + $self->{_modern_genomic_align_block_id} = shift; + } + return $self->{_modern_genomic_align_block_id}; +} + + +=head2 genomic_align_group + + Arg [1] : (optional) $genomic_align_group + Example : $object->genomic_align_group($genomic_align_group); + Example : $genomic_align_group = $object->genomic_align_group(); + Description : Getter/setter for the genomic_align_group attribute + Returntype : Bio::EnsEMBL::Compara::GenomicAlignGroup object + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub genomic_align_group { + my $self = shift; + if (@_) { + $self->{_genomic_align_group} = shift; + } + return $self->{_genomic_align_group}; +} + + +=head2 get_all_genomic_aligns_for_node + + Arg [1] : -none- + Example : $genomic_aligns = $object->get_all_genomic_aligns_for_node + Description : Getter for all the GenomicAligns contained in the + genomic_align_group object on a node. This method is a short + cut for $object->genomic_align_group->get_all_GenomicAligns() + Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub get_all_genomic_aligns_for_node { + my $self = shift(@_); + return [] if (!$self->genomic_align_group); + return $self->genomic_align_group->get_all_GenomicAligns; +} + +=head2 genomic_align_array (DEPRECATED) + + Arg [1] : -none- + Example : $genomic_aligns = $object->genomic_align_array + Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED + Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub genomic_align_array { + my $self = shift(@_); + + deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead"); + return($self->get_all_genomic_aligns_for_node); + +} + +=head2 get_all_GenomicAligns (DEPRECATED) + + Arg [1] : -none- + Example : $genomic_aligns = $object->get_all_GenomicAligns + Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED + Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub get_all_GenomicAligns { + my $self = shift(@_); + + deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead"); + return($self->get_all_genomic_aligns_for_node); + +} + +=head2 reference_genomic_align + + Arg [1] : (optional) $reference_genomic_align + Example : $object->reference_genomic_align($reference_genomic_align); + Example : $reference_genomic_align = $object->reference_genomic_align(); + Description : Getter/setter for the reference_genomic_align attribute + Returntype : Bio::EnsEMBL::Compara::GenomicAlign object + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub reference_genomic_align { + my $self = shift; + + if (@_) { + $self->{reference_genomic_align} = shift; + } + + return $self->{reference_genomic_align}; +} + +=head2 reference_genomic_align_id + + Arg [1] : integer $reference_genomic_align_id + Example : $genomic_align_block->reference_genomic_align_id(4321); + Description: get/set for attribute reference_genomic_align_id. A value of 0 will set the + reference_genomic_align_id attribute to undef. When looking for genomic + alignments in a given slice or dnafrag, the reference_genomic_align + corresponds to the Bio::EnsEMBL::Compara::GenomicAlign included in the + starting slice or dnafrag. The reference_genomic_align_id is the dbID + corresponding to the reference_genomic_align. All remaining + Bio::EnsEMBL::Compara::GenomicAlign objects included in the + Bio::EnsEMBL::Compara::GenomicAlignBlock are the + non_reference_genomic_aligns. + Synchronises reference_genomic_align and reference_genomic_align_id + attributes. + Returntype : integer + Exceptions : throw if $reference_genomic_align_id id not a postive number + Caller : $genomic_align_block->reference_genomic_align_id(int) + Status : Stable + +=cut + +sub reference_genomic_align_id { + my ($self, $reference_genomic_align_id) = @_; + + if (defined($reference_genomic_align_id)) { + if ($reference_genomic_align_id !~ /^\d+$/) { + throw "[$reference_genomic_align_id] should be a positive number."; + } + $self->{'reference_genomic_align_id'} = ($reference_genomic_align_id or undef); + + ## Synchronises reference_genomic_align and reference_genomic_align_id + if (defined($self->{'reference_genomic_align'}) and + defined($self->{'reference_genomic_align'}->dbID) and + ($self->{'reference_genomic_align'}->dbID != ($self->{'reference_genomic_align_id'} or 0))) { + $self->{'reference_genomic_align'} = undef; ## Attribute will be set on request + } + + ## Try to get data from other sources... + } elsif (!defined($self->{'reference_genomic_align_id'})) { + + ## ...from the reference_genomic_align attribute + if (defined($self->{'reference_genomic_align'}) and + defined($self->{'reference_genomic_align'}->dbID)) { + $self->{'reference_genomic_align_id'} = $self->{'reference_genomic_align'}->dbID; + } + } + + return $self->{'reference_genomic_align_id'}; +} + + +=head2 reference_genomic_align_node + + Arg [1] : (optional) $reference_genomic_align_node + Example : $object->reference_genomic_align_node($reference_genomic_align_node); + Example : $reference_genomic_align_node = $object->reference_genomic_align_node(); + Description : Getter/setter for the reference_genomic_align_node attribute + Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub reference_genomic_align_node { + my $self = shift; + + if (@_) { + $self->{reference_genomic_align_node} = shift; + } + + return $self->{reference_genomic_align_node}; +} + + +=head2 aligned_sequence + + Arg [1] : -none- + Example : $aligned_sequence = $object->aligned_sequence(); + Description : Get the aligned sequence for this node. When the node + contains one single sequence, it returns its aligned sequence. + For composite segments, it returns the combined aligned seq. + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub aligned_sequence { + my $self = shift; + return $self->genomic_align_group->aligned_sequence(@_); +} + + +=head2 group_id + + Arg [1] : integer $group_id + Example : my $group_id = $genomic_align_tree->group_id; + Example : $genomic_align_tree->group_id(1234); + Description: get/set for attribute group_id of the underlying + GenomicAlignBlock objects + Returntype : integer + Exceptions : A GenomicAlignTree is made of two GenomicAlignBlock + object. The method fail when gettign the value if the + two group_ids do not match + Caller : general + +=cut + +sub group_id { + my ($self, $group_id) = @_; + + if (defined($group_id)) { + $self->{'group_id'} = $group_id; + # Set the group_id on the genomic_align_blocks... + my %genomic_align_blocks; + #foreach my $this_genomic_align_node (@{$self->get_all_sorted_genomic_align_nodes()}) { + foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) { + next if (!defined $this_genomic_align_node->genomic_align_group); + foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) { + my $this_genomic_align_block = $genomic_align->genomic_align_block; + if ($this_genomic_align_block and !defined($genomic_align_blocks{$this_genomic_align_block})) { + $this_genomic_align_block->group_id($group_id); + $genomic_align_blocks{$this_genomic_align_block} = 1; + } + } + } + } elsif (!defined($self->{'group_id'}) and defined($self->{adaptor})) { + # Try to get the ID from other sources... + my %group_ids; + my $genomic_align_block_adaptor = $self->adaptor->dba->get_GenomicAlignBlockAdaptor; + foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) { + next if (!defined $this_genomic_align_node->genomic_align_group); + foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) { + my $this_genomic_align_block_id = $genomic_align->genomic_align_block_id; + my $this_genomic_align_block = $genomic_align_block_adaptor->fetch_by_dbID($this_genomic_align_block_id); + if ($this_genomic_align_block->group_id) { + $group_ids{$this_genomic_align_block->group_id} = 1; + } else { + $group_ids{"undef"} = 1; + } + } + } + if (keys %group_ids == 1) { + if (!defined($group_ids{"undef"})) { + $self->{'group_id'} = (keys %group_ids)[0]; + } + } else { + warning("Different group_ids found for this GenomicAlignTree\n"); + } + } + return $self->{'group_id'}; +} + + +=head2 name + + Arg [1] : (optional) string $name + Example : $object->name($name); + Example : $name = $object->name(); + Description : Getter/setter for the name attribute. + Returntype : string + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub name { + my $self = shift; + + if (@_) { + $self->{_name} = shift; + } elsif (!$self->{_name}) { + my $genomic_align_group = $self->genomic_align_group; + if (defined($self->SUPER::name()) and $self->SUPER::name() ne "") { + ## Uses the name defined before blessing this object as a + ## Bio::EnsEMBL::Compara::GenomicAlignTree in the Ortheus pipeline + $self->{_name} = $self->SUPER::name(); + } elsif ($self->is_leaf) { + my $gdb_name; + if($genomic_align_group->genome_db->name =~ /(.)[^ ]+_(.{3})/) { + $gdb_name = "${1}${2}"; + } + else { + $gdb_name = $genomic_align_group->genome_db->name(); + $gdb_name =~ tr/_//; + } + $gdb_name = ucfirst($gdb_name); + $self->{_name} = $gdb_name.'_'.$genomic_align_group->dnafrag->name."_". + $genomic_align_group->dnafrag_start."_".$genomic_align_group->dnafrag_end."[". + (($genomic_align_group->dnafrag_strand eq "-1")?"-":"+")."]"; + } else { + $self->{_name} = join("-", map { + my $name = $_->genomic_align_group->genome_db->name; + if($name =~ /(.)[^ ]+_(.{3})/) { + $name = "$1$2"; + } + else { + $name =~ tr/_//; + } + $name = ucfirst($name); + $name; + } @{$self->get_all_leaves})."[".scalar(@{$self->get_all_leaves})."]"; + } + } + + return $self->{_name}; +} + + +=head2 get_all_sorted_genomic_align_nodes + + Arg [1] : (optional) Bio::EnsEMBL::Compara::GenomicAlignTree $reference_genomic_align_node + Example : $object->get_all_sorted_genomic_align_nodes($ref_genomic_align_node); + Example : $nodes = $object->get_all_sorted_genomic_align_nodes(); + Description : If ref_genomic_align_node is set, sorts the tree based on the + reference_genomic_align_node + If ref_genomic_align_node is not set, sorts the tree based on + the species name + Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub get_all_sorted_genomic_align_nodes { + my ($self, $reference_genomic_align_node) = @_; + my $sorted_genomic_align_nodes = []; + + if (!$reference_genomic_align_node and $self->reference_genomic_align_node) { + $reference_genomic_align_node = $self->reference_genomic_align_node; + } + + my $children = $self->children; + if (@$children >= 1) { + $children = [sort _sort_children @$children]; + push(@$sorted_genomic_align_nodes, @{$children->[0]->get_all_sorted_genomic_align_nodes( + $reference_genomic_align_node)}); + push(@$sorted_genomic_align_nodes, $self); + for (my $i = 1; $i < @$children; $i++) { + push(@$sorted_genomic_align_nodes, @{$children->[$i]->get_all_sorted_genomic_align_nodes( + $reference_genomic_align_node)}); + } + } elsif (@$children == 0) { + push(@$sorted_genomic_align_nodes, $self); + } + + return $sorted_genomic_align_nodes; +} + + +=head2 restrict_between_alignment_positions + + Arg[1] : [optional] int $start, refers to the start of the alignment + Arg[2] : [optional] int $end, refers to the start of the alignment + Arg[3] : [optional] boolean $skip_empty_GenomicAligns + Example : none + Description: restrict this GenomicAlignBlock. It returns a new object unless no + restriction is needed. In that case, it returns the original unchanged + object. + This method uses coordinates relative to the alignment itself. + For instance if you have an alignment like: + 1 1 2 2 3 + 1 5 0 5 0 5 0 + AAC--CTTGTGGTA-CTACTT-----ACTTT + AACCCCTT-TGGTATCTACTTACCTAACTTT + and you restrict it between 5 and 25, you will get back a + object containing the following alignment: + 1 1 + 1 5 0 5 + CTTGTGGTA-CTACTT---- + CTT-TGGTATCTACTTACCT + + See restrict_between_reference_positions() elsewhere in this document + for an alternative method using absolute genomic coordinates. + + NB: This method works only for GenomicAlignBlock which have been + fetched from the DB as it is adjusting the dnafrag coordinates + and the cigar_line only and not the actual sequences stored in the + object if any. If you want to restrict an object with no coordinates + a simple substr() will do! + + Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub restrict_between_alignment_positions { + my ($self, $start, $end, $skip_empty_GenomicAligns, $reference_genomic_align) = @_; + my $genomic_align_tree; + $genomic_align_tree = $self->copy(); + $genomic_align_tree->adaptor($self->adaptor); + foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) { + $this_node->adaptor($self->adaptor); + } + my $final_alignment_length = $end - $start + 1; + + #Get all the nodes and restrict but only remove leaves if necessary. Call minimize_tree at the end to + #remove the internal nodes + foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) { + my $genomic_align_group = $this_node->genomic_align_group; + next if (!$genomic_align_group); + my $new_genomic_aligns = []; + my $length = $this_node->length; + foreach my $this_genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) { + my $restricted_genomic_align = $this_genomic_align->restrict($start, $end, $length); + + if ($genomic_align_tree->reference_genomic_align eq $this_genomic_align) { + ## Update the reference_genomic_align + + $genomic_align_tree->reference_genomic_align($restricted_genomic_align); + $genomic_align_tree->reference_genomic_align_node($this_node); + } + if (!$skip_empty_GenomicAligns or + $restricted_genomic_align->dnafrag_start <= $restricted_genomic_align->dnafrag_end + ) { + ## Always skip composite segments outside of the range of restriction + ## The cigar_line will contain only X's + next if ($restricted_genomic_align->cigar_line =~ /^\d*X$/); + + #Set the genomic_align_block_id of the restricted genomic_align + $restricted_genomic_align->genomic_align_block_id($this_genomic_align->genomic_align_block_id); + #$restricted_genomic_align->genomic_align_block($genomic_align_tree); + + push(@$new_genomic_aligns, $restricted_genomic_align); + } + } + if (@$new_genomic_aligns) { + $genomic_align_group->{genomic_align_array} = undef; + foreach my $this_genomic_align (@$new_genomic_aligns) { + $genomic_align_group->add_GenomicAlign($this_genomic_align); + } + } else { + #Only remove leaves. Use minimise_tree to tidy up the internal nodes + if ($this_node->is_leaf) { + $this_node->disavow_parent(); + my $reference_genomic_align = $genomic_align_tree->reference_genomic_align; + if ($reference_genomic_align) { + my $reference_genomic_align_node = $genomic_align_tree->reference_genomic_align_node; + $genomic_align_tree = $genomic_align_tree->minimize_tree(); + ## Make sure links are not broken after tree minimization + $genomic_align_tree->reference_genomic_align($reference_genomic_align); + + #Set the genomic_align_block_id of the restricted genomic_align + $genomic_align_tree->reference_genomic_align->genomic_align_block_id($reference_genomic_align->genomic_align_block_id); + + #$genomic_align_tree->reference_genomic_align->genomic_align_block($genomic_align_tree); + $genomic_align_tree->reference_genomic_align_node($reference_genomic_align_node); + } + } + } + } + $genomic_align_tree = $genomic_align_tree->minimize_tree(); + $genomic_align_tree->length($final_alignment_length); + + return $genomic_align_tree; +} + + +=head2 restrict_between_reference_positions + + Arg[1] : [optional] int $start, refers to the reference_dnafrag + Arg[2] : [optional] int $end, refers to the reference_dnafrag + Arg[3] : [optional] Bio::EnsEMBL::Compara::GenomicAlign $reference_GenomicAlign + Arg[4] : [optional] boolean $skip_empty_GenomicAligns [ALWAYS FALSE] + Example : none + Description: restrict this GenomicAlignBlock. It returns a new object unless no + restriction is needed. In that case, it returns the original unchanged + object + It might be the case that the restricted region coincide with a gap + in one or several GenomicAligns. By default these GenomicAligns are + returned with a dnafrag_end equals to its dnafrag_start + 1. For instance, + a GenomicAlign with dnafrag_start = 12345 and dnafrag_end = 12344 + correspond to a block which goes on this region from before 12345 to + after 12344, ie just between 12344 and 12345. You can choose to remove + these empty GenomicAligns by setting $skip_empty_GenomicAligns to any + true value. + Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object in scalar context. In + list context, returns the previous object and the start and end + positions of the restriction in alignment coordinates (from 1 to + alignment_length) + Exceptions : return undef if reference positions lie outside of the alignment + Caller : general + Status : At risk + +=cut + +=comment + +sub restrict_between_reference_positions { + my ($self, $start, $end, $reference_genomic_align, $skip_empty_GenomicAligns) = @_; + my $genomic_align_tree; + + $reference_genomic_align ||= $self->reference_genomic_align; + throw("A reference Bio::EnsEMBL::Compara::GenomicAlignTree must be given") + if (!$reference_genomic_align); + + my @restricted_genomic_align_tree_params = $self->SUPER::restrict_between_reference_positions($start, $end, $reference_genomic_align, $skip_empty_GenomicAligns); + my $restricted_genomic_align_tree = $restricted_genomic_align_tree_params[0]; + + #return $self if (!$restricted_genomic_align_tree or $restricted_genomic_align_tree eq $self); + + return wantarray ? @restricted_genomic_align_tree_params : $restricted_genomic_align_tree; +} + +=cut + +=head2 copy + + Arg : none + Example : my $new_tree = $this_tree->copy() + Description : Create a copy of this Bio::EnsEMBL::Compara::GenomicAlignTree + object + Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub copy { + my $self = shift(@_); + my $new_copy; + + $new_copy = $self->SUPER::copy(); + + $new_copy->genomic_align_group($self->genomic_align_group->copy) if ($self->genomic_align_group); + + if ($self->reference_genomic_align_node) { + my $ref_ga = $self->reference_genomic_align; + #Need to find the reference_genomic_align in the genomic_align_group + foreach my $leaf (@{$new_copy->get_all_leaves}) { + foreach my $gag ($leaf->genomic_align_group) { + foreach my $ga (@{$gag->genomic_align_array}) { + if ($ref_ga->dnafrag_id == $ga->dnafrag_id && + $ref_ga->dnafrag_start == $ga->dnafrag_start && + $ref_ga->dnafrag_end == $ga->dnafrag_end && + $ref_ga->dnafrag_strand == $ga->dnafrag_strand) { + $new_copy->reference_genomic_align($ga); + last; + } + } + } + } + } + + + $new_copy->reference_genomic_align_node($self->reference_genomic_align_node->copy) if ($self->reference_genomic_align_node); + + #These are not deep copies + #$new_copy->reference_genomic_align($self->reference_genomic_align) if ($self->reference_genomic_align); + #$new_copy->reference_genomic_align_node($self->reference_genomic_align_node) if ($self->reference_genomic_align_node); + + #There are lots of bits missing from this copy + #Still to add? + #parent_link + #obj_id_to_link + + $new_copy->{_original_strand} = $self->{_original_strand} if (defined $self->{_original_strand}); + $new_copy->{_parent_id} = $self->{_parent_id} if (defined $self->{_parent_id}); + $new_copy->{_root_id} = $self->{_root_id} if (defined $self->{_root_id}); + $new_copy->{_left_node_id} = $self->{_left_node_id} if (defined $self->{_left_node_id}); + $new_copy->{_right_node_id} = $self->{_right_node_id} if (defined $self->{_right_node_id}); + $new_copy->{_node_id} = $self->{_node_id} if (defined $self->{_node_id}); + $new_copy->{_reference_slice} = $self->{_reference_slice} if (defined $self->{_reference_slice}); + $new_copy->{_reference_slice_start} = $self->{_reference_slice_start} if (defined $self->{_reference_slice_start}); + $new_copy->{_reference_slice_end} = $self->{_reference_slice_end} if (defined $self->{_reference_slice_end}); + $new_copy->{_reference_slice_strand} = $self->{_reference_slice_strand} if (defined $self->{_reference_slice_strand}); + + return $new_copy; +} + +=head2 print + + Arg : none + Example : print() + Description : Print the fields in a Bio::EnsEMBL::Compara::GenomicAlignTree + Returntype : none + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub print { + my $self = shift(@_); + my $level = shift; + my $reference_genomic_align = shift; + if (!$level) { + print STDERR $self->newick_format(), "\n"; + $reference_genomic_align = ($self->reference_genomic_align or ""); + } + $level++; + my $mark = "- "; + if (grep {$_ eq $reference_genomic_align} @{$self->get_all_genomic_aligns_for_node}) { + $mark = "* "; + } + print STDERR " " x $level, $mark, + "[", $self->node_id, "/", ($self->get_original_strand?"+":"-"), "] ", + $self->genomic_align_group->genome_db->name,":", + $self->genomic_align_group->dnafrag->name,":", + $self->genomic_align_group->dnafrag_start,":", + $self->genomic_align_group->dnafrag_end,":", + $self->genomic_align_group->dnafrag_strand,":", + " (", ($self->left_node_id?$self->left_node->node_id."/".$self->left_node->root->node_id:"...."), + " - ", ($self->right_node_id?$self->right_node->node_id."/".$self->right_node->root->node_id:"...."),")\n"; + foreach my $this_genomic_align (@{$self->get_all_genomic_aligns_for_node}) { + if ($this_genomic_align eq $reference_genomic_align) { + print " " x 8, "* ", $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n"; + } else { + print " " x 10, $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n"; + } + } + foreach my $node (sort _sort_children @{$self->children}) { + $node->print($level, $reference_genomic_align); + } + $level--; +} + + +=head2 get_all_nodes_from_leaves_to_this + + Arg[1] : Bio::EnsEMBL::Compara::GenomicAlignTree $all_nodes + Example : my $all_nodes = get_all_nodes_from_leaves_to_this() + Description : + Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub get_all_nodes_from_leaves_to_this { + my $self = shift(@_); + my $all_nodes = (shift or []); + foreach my $node (sort _sort_children @{$self->children}) { + $all_nodes = $node->get_all_nodes_from_leaves_to_this($all_nodes); + } + push(@$all_nodes, $self); + return $all_nodes; +} + + +=head2 get_all_leaves + + Title : get_all_leaves + Usage : my @leaves = @{$tree->get_all_leaves}; + Function: searching from the given starting node, searches and creates list + of all leaves in this subtree and returns by reference. + This method overwrites the parent method because it sorts + the leaves according to their node_id. Here, we use this method + to get all leaves in another sorting function. Not only it doesn't + make much sense to sort something that will be sorted again, but + it can also produce some Perl errors as sort methods uses $a and + $b which are package global variables. + Example : + Returns : reference to list of NestedSet objects (all leaves) + Args : none + Status : At risk + +=cut + +sub get_all_leaves { + my $self = shift; + + my $leaves = {}; + $self->_recursive_get_all_leaves($leaves); + my @leaf_list = values(%{$leaves}); + + return \@leaf_list; +} + + +=head2 _sort_children + + Arg : none + Example : sort _sort_children @$children + Description : sort function for sorting the nodes of a Bio::EnsEMBL::Compara::GenomicAlignTree object + Returntype : int (-1,0,1) + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub _sort_children { + my $reference_genomic_align_node; + + if (defined ($a->root) && defined($b->root) && $a->root eq $b->root and $a->root->reference_genomic_align_node) { + $reference_genomic_align_node = $a->root->reference_genomic_align_node; + } + + ## Reference GenomicAlign based sorting + if ($reference_genomic_align_node) { + if (grep {$_ eq $reference_genomic_align_node} @{$a->get_all_leaves}) { + return -1; + } elsif (grep {$_ eq $reference_genomic_align_node} @{$b->get_all_leaves}) { + return 1; + } + } + + ## Species name based sorting + my $species_a = $a->_name_for_sorting; + my $species_b = $b->_name_for_sorting; + + return $species_a cmp $species_b; +} + +=head2 _name_for_sorting + + Arg : none + Example : my $species_a = $a->_name_for_sorting; + Description : if the node is a leaf, create a name based on the species + name, dnafrag name, group_id and the start position. If the + node is an internal node, create a name based on the species + name, dnafrag name and the start position + Returntype : string + Exceptions : none + Caller : _sort_children + Status : At risk + +=cut + +sub _name_for_sorting { + my ($self) = @_; + my $name; + + if ($self->is_leaf) { + $name = sprintf("%s.%s.%s.%020d", + $self->genomic_align_group->genome_db->name, + $self->genomic_align_group->dnafrag->name, + ($self->genomic_align_group->dbID or + $self->genomic_align_group->{original_dbID} or 0), + $self->genomic_align_group->dnafrag_start); + } else { + $name = join(" - ", sort map {sprintf("%s.%s.%020d", + $_->genomic_align_group->genome_db->name, + $_->genomic_align_group->dnafrag->name, + $_->genomic_align_group->dnafrag_start)} @{$self->get_all_leaves}); + } + + return $name; +} + +=head2 reverse_complement + + Args : none + Example : none + Description: reverse complement the tree, + modifying dnafrag_strand and cigar_line of each GenomicAlign in consequence + Returntype : none + Exceptions : none + Caller : general + Status : At risk + +=cut + +sub reverse_complement { + my ($self) = @_; + + if (defined($self->{_original_strand})) { + $self->{_original_strand} = 1 - $self->{_original_strand}; + } else { + $self->{_original_strand} = 0; + } + + foreach my $this_node (@{$self->get_all_nodes}) { + my $genomic_align_group = $this_node->genomic_align_group; + next if (!$genomic_align_group); + my $gas = $genomic_align_group->get_all_GenomicAligns; + foreach my $ga (@{$gas}) { + $ga->reverse_complement; + } + } +} + +sub length { + my ($self, $length) = @_; + + if (defined($length)) { + $self->{'length'} = $length; + } elsif (!defined($self->{'length'})) { + # Try to get the ID from other sources... + if (defined($self->{'adaptor'}) and defined($self->dbID)) { + # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object + $self->adaptor->retrieve_all_direct_attributes($self); + } elsif (@{$self->get_all_genomic_aligns_for_node} and $self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ")) { + $self->{'length'} = CORE::length($self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ")); + } else { + foreach my $this_node (@{$self->get_all_nodes}) { + my $genomic_align_group = $this_node->genomic_align_group; + next if (!$genomic_align_group); + $self->{'length'} = CORE::length($genomic_align_group->get_all_GenomicAligns->[0]->aligned_sequence("+FAKE_SEQ")); + last; + } + } + } + return $self->{'length'}; +} + +=head2 alignment_strings + + Arg [1] : none + Example : $genomic_align_tree->alignment_strings + Description: Returns the alignment string of all the sequences in the + alignment + Returntype : array reference containing several strings + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub alignment_strings { + my ($self) = @_; + my $alignment_strings = []; + + foreach my $this_node (@{$self->get_all_nodes}) { + my $genomic_align_group = $this_node->genomic_align_group; + next if (!$genomic_align_group); + foreach my $genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) { + push(@$alignment_strings, $genomic_align->aligned_sequence); + } + } + + return $alignment_strings; +} + +=head2 method_link_species_set + + Arg [1] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set + Example : $method_link_species_set = $genomic_align_tree->method_link_species_set; + Description: Getter for attribute method_link_species_set. Takes this from the first Bio::EnsEMBL::Compara::GenomicAlign + object + Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object + Exceptions : thrown if $method_link_species_set is not a + Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object + Caller : general + Status : Stable + +=cut + +sub method_link_species_set { + my ($self) = @_; + + my $method_link_species_set = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set; + + throw("$method_link_species_set is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object") + unless ($method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet")); + + return $method_link_species_set; +} + +=head2 method_link_species_set_id + + Arg [1] : integer $method_link_species_set_id + Example : $method_link_species_set_id = $genomic_align_tree->method_link_species_set_id; + Description: Getter for the attribute method_link_species_set_id. Takes this from the first + Bio::EnsEMBL::Compara::GenomicAlign object + Returntype : integer + Caller : object::methodname + Status : Stable + +=cut + +sub method_link_species_set_id { + my ($self) = @_; + + my $method_link_species_set_id = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set->dbID; + + return $method_link_species_set_id; +} + +#sub DESTROY { +# my ($self) = @_; +# $self->release_tree unless ($self->{_parent_link}); +# } + +1;