comparison variant_effect_predictor/Bio/EnsEMBL/Compara/NestedSet.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 NAME
2
3 NestedSet - DESCRIPTION of Object
4
5 =head1 SYNOPSIS
6
7 =head1 DESCRIPTION
8
9 Abstract superclass to encapsulate the process of storing and manipulating a
10 nested-set representation tree. Also implements a 'reference count' system
11 based on the ObjectiveC retain/release design.
12 Designed to be used as the Root class for all Compara 'proxy' classes
13 (Member, GenomeDB, DnaFrag, NCBITaxon) to allow them to be made into sets and trees.
14
15 =head1 CONTACT
16
17 Contact Albert Vilella on implementation detail: avilella@ebi.ac.uk
18 Contact Jessica Severin on implementation/design detail: jessica@ebi.ac.uk
19 Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
20
21 =head1 APPENDIX
22
23 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
24
25 =cut
26
27
28
29 package Bio::EnsEMBL::Compara::NestedSet;
30
31 use strict;
32 use warnings;
33 use Bio::EnsEMBL::Utils::Exception;
34 use Bio::EnsEMBL::Utils::Argument;
35
36 use Bio::EnsEMBL::Utils::Exception qw(deprecate throw);
37
38 use Bio::TreeIO;
39 use Bio::EnsEMBL::Compara::Graph::Node;
40 use Bio::EnsEMBL::Compara::Member;
41 our @ISA = qw(Bio::EnsEMBL::Compara::Graph::Node);
42
43 #################################################
44 # Factory methods
45 #################################################
46
47 sub init {
48 my $self = shift;
49 $self->SUPER::init;
50 return $self;
51 }
52
53 sub dealloc {
54 my $self = shift;
55
56 #printf("DEALLOC NestedSet refcount:%d ", $self->refcount); $self->print_node;
57 #$self->release_children;
58 return $self->SUPER::dealloc;
59 }
60
61 =head2 copy
62
63 Overview : creates copy of tree starting at this node going down
64 Example : my $clone = $self->copy;
65 Returntype : Bio::EnsEMBL::Compara::NestedSet
66 Exceptions : none
67 Caller : general
68
69 =cut
70
71 sub copy {
72 my $self = shift;
73
74 my $mycopy = $self->SUPER::copy;
75 bless $mycopy, ref $self;
76
77 $mycopy->distance_to_parent($self->distance_to_parent);
78 $mycopy->left_index($self->left_index);
79 $mycopy->right_index($self->right_index);
80
81 foreach my $child (@{$self->children}) {
82 $mycopy->add_child($child->copy);
83 }
84 return $mycopy;
85 }
86
87
88 =head2 release_tree
89
90 Overview : deletes and frees the memory used by this tree
91 and all the underlying nodes.
92 Example : $self->release_tree;
93 Returntype : undef
94 Exceptions : none
95 Caller : general
96
97 =cut
98
99 sub release_tree {
100 my $self = shift;
101
102 my $child_count = $self->get_child_count;
103 $self->disavow_parent;
104 $self->cascade_unlink if($child_count);
105 return undef;
106 }
107
108 #################################################
109 # Object variable methods
110 #################################################
111
112 sub left_index {
113 my $self = shift;
114 $self->{'_left_index'} = shift if(@_);
115 $self->{'_left_index'} = 0 unless(defined($self->{'_left_index'}));
116 return $self->{'_left_index'};
117 }
118
119 sub right_index {
120 my $self = shift;
121 $self->{'_right_index'} = shift if(@_);
122 $self->{'_right_index'} = 0 unless(defined($self->{'_right_index'}));
123 return $self->{'_right_index'};
124 }
125
126
127 #######################################
128 # Set manipulation methods
129 #######################################
130
131 =head2 add_child
132
133 Overview : attaches child nestedset node to this nested set
134 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $child
135 Arg [2] : (opt.) distance between this node and child
136 Example : $self->add_child($child);
137 Returntype : undef
138 Exceptions : if child is undef or not a NestedSet subclass
139 Caller : general
140
141 =cut
142
143 sub add_child {
144 my $self = shift;
145 my $child = shift;
146 my $dist = shift;
147
148 throw("child not defined")
149 unless(defined($child));
150 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$child]")
151 unless($child->isa('Bio::EnsEMBL::Compara::NestedSet'));
152
153 #printf("add_child: parent(%s) <-> child(%s)\n", $self->node_id, $child->node_id);
154
155 unless(defined($dist)) { $dist = $child->_distance; }
156
157 $child->disavow_parent;
158 #create_link_to_node is a safe method which checks if connection exists
159 my $link = $self->create_link_to_node($child);
160 $child->_set_parent_link($link);
161 $self->{'_children_loaded'} = 1;
162 $link->distance_between($dist);
163 return $link;
164 }
165
166
167 =head2 disavow_parent
168
169 Overview : unlink and release self from its parent
170 might cause self to delete if refcount reaches Zero.
171 Example : $self->disavow_parent
172 Returntype : undef
173 Caller : general
174
175 =cut
176
177 sub disavow_parent {
178 my $self = shift;
179
180 if($self->{'_parent_link'}) {
181 my $link = $self->{'_parent_link'};
182 #print("DISAVOW parent : "); $parent->print_node;
183 #print(" child : "); $self->print_node;
184 $link->dealloc;
185 }
186 $self->_set_parent_link(undef);
187 return undef;
188 }
189
190
191 =head2 release_children
192
193 Overview : recursive releases all children
194 will cause potential deletion of children if refcount reaches Zero.
195 Example : $self->release_children
196 Returntype : $self
197 Exceptions : none
198 Caller : general
199
200 =cut
201
202 sub release_children {
203 my $self = shift;
204
205 # by calling with parent, this preserved the link to the parent
206 # and thus doesn't unlink self
207 foreach my $child (@{$self->children}) {
208 $child->disavow_parent;
209 $child->release_children;
210 }
211 #$self->cascade_unlink($self->{'_parent_node'});
212 return $self;
213 }
214
215
216 =head2 parent
217
218 Overview : returns the parent NestedSet object for this node
219 Example : my $my_parent = $object->parent();
220 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
221 Exceptions : none
222 Caller : general
223
224 =cut
225
226 sub parent {
227 my $self = shift;
228 return $self->adaptor->fetch_parent_for_node($self) if defined $self->adaptor;
229 return $self->{'_parent_link'}->get_neighbor($self) if defined $self->{'_parent_link'};
230 return undef;
231 }
232
233
234 sub parent_link {
235 my $self = shift;
236 return $self->{'_parent_link'};
237 }
238
239 sub has_parent {
240 my $self = shift;
241 return 1 if($self->{'_parent_link'} or $self->_parent_id);
242 return 0;
243 }
244
245
246 sub has_ancestor {
247 my $self = shift;
248 my $ancestor = shift;
249 throw "[$ancestor] must be a Bio::EnsEMBL::Compara::NestedSet object"
250 unless ($ancestor and $ancestor->isa("Bio::EnsEMBL::Compara::NestedSet"));
251 my $node = $self->parent;
252 while($node) {
253 return 1 if($node->equals($ancestor));
254 $node = $node->parent;
255 }
256 return 0;
257 }
258
259
260 =head2 root
261
262 Overview : returns the root NestedSet object for this node
263 returns $self if node has no parent (this is the root)
264 Example : my $root = $object->root();
265 Description: Returns the root of the tree for this node
266 with links to all the intermediate nodes. Sister nodes
267 are not included in the result.
268 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
269 Exceptions : none
270 Caller : general
271
272 =cut
273
274 sub root {
275 my $self = shift;
276
277 # Only if we don't have it cached
278 # Only if we have left and right and it's not a leaf
279 # Only if it's for release clusterset (1 genetrees - 0 genomic align trees)
280 if (!defined($self->{'_parent_link'}) and $self->adaptor
281 and ($self->right_index-$self->left_index)>1
282 and (defined $self->{'_parent_id'})
283 and (1==$self->{'_parent_id'})
284 ) {
285 return $self->adaptor->fetch_root_by_node($self);
286 }
287
288 # Otherwise, go through the step-by-step method
289 return $self unless(defined($self->parent));
290 # return $self if($self->node_id eq $self->parent->node_id);
291 return $self->parent->root;
292 }
293
294 sub subroot {
295 deprecate('subroot() should not be used and will be removed in release 70.
296 If you are using it, please contact the dev mailing-list dev@ensembl.org');
297 my $self = shift;
298
299 return undef unless($self->parent);
300 return $self unless(defined($self->parent->parent));
301 return $self->parent->subroot;
302 }
303
304
305 =head2 children
306
307 Overview : returns a list of NestedSet nodes directly under this parent node
308 Example : my @children = @{$object->children()};
309 Returntype : array reference of Bio::EnsEMBL::Compara::NestedSet objects (could be empty)
310 Exceptions : none
311 Caller : general
312 Algorithm : new algorithm for fetching children:
313 for each link connected to this NestedsSet node, a child is defined if
314 old: the link is not my parent_link
315 new: the link's neighbors' parent_link is the link
316 This allows one (with a carefully coded algorithm) to overlay a tree on top
317 of a fully connected graph and use the parent/children methods of NestedSet
318 to walk the 'tree' substructure of the graph.
319 Trees that are really just trees are still trees.
320
321 =cut
322
323 sub children {
324 my $self = shift;
325 $self->load_children_if_needed;
326 my @kids;
327 foreach my $link (@{$self->links}) {
328 next unless(defined($link));
329 my $neighbor = $link->get_neighbor($self);
330 my $parent_link = $neighbor->parent_link;
331 next unless($parent_link);
332 next unless($parent_link eq $link);
333 push @kids, $neighbor;
334 }
335 return \@kids;
336 }
337
338 sub each_child {
339 my $self = shift;
340
341 # Return an iterator over the children (most effective when children list is LONG)
342 my $count = -1;
343 $self->load_children_if_needed;
344 my @links = @{$self->links};
345
346 return sub {
347 while ($count < scalar(@links)) {
348 $count++;
349 my $link = $links[$count];
350 next unless(defined $link);
351
352 my $neighbor = $link->get_neighbor($self);
353 next unless($neighbor->parent_link);
354 next unless($neighbor->parent_link->equals($link));
355 return $neighbor;
356 }
357 return undef;
358 };
359 }
360
361 =head2 sorted_children
362
363 Overview : returns a sorted list of NestedSet nodes directly under this parent node
364 sort so that internal nodes<leaves and then on distance
365 Example : my @kids = @{$object->ordered_children()};
366 Returntype : array reference of Bio::EnsEMBL::Compara::NestedSet objects (could be empty)
367 Exceptions : none
368 Caller : general
369
370 =cut
371
372 sub sorted_children {
373 my $self = shift;
374
375 my @sortedkids =
376 sort { $a->is_leaf <=> $b->is_leaf
377 ||
378 $a->get_child_count <=> $b->get_child_count
379 ||
380 $a->distance_to_parent <=> $b->distance_to_parent
381 } @{$self->children;};
382 return \@sortedkids;
383 }
384
385
386 =head2 get_all_nodes
387
388 Arg 1 : hashref $node_hash [used for recursivity, do not use it!]
389 Example : my $all_nodes = $root->get_all_nodes();
390 Description : Returns this and all underlying sub nodes
391 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
392 Exceptions : none
393 Caller : general
394 Status : Stable
395
396 =cut
397
398 sub get_all_nodes {
399 my $self = shift;
400 my $node_hash = shift;
401
402 my $toplevel = 0;
403 unless($node_hash) {
404 $node_hash = {};
405 $toplevel =1;
406 }
407
408 $node_hash->{$self->obj_id} = $self;
409 foreach my $child (@{$self->children}) {
410 $child->get_all_nodes($node_hash);
411 }
412
413 if ($toplevel) {
414 return [values(%$node_hash)];
415 }
416 return undef;
417 }
418
419
420 =head2 get_all_nodes_by_tag_value
421
422 Arg 1 : tag_name
423 Arg 2 : tag_value (optional)
424 Example : my $all_nodes = $root->get_all_nodes_by_tagvalue('taxon_name'=>'Mamalia');
425 Description : Returns all underlying nodes that have a tag of the given name, and optionally a value of the given value.
426 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
427 Exceptions : none
428 Caller : general
429 Status :
430
431 =cut
432
433 sub get_all_nodes_by_tag_value {
434 my $self = shift;
435 my $tag = shift || die( "Need a tag name" );
436 my $value = shift;
437 my @found;
438 foreach my $node( @{$self->get_all_nodes} ){
439 my $tval = $node->get_tagvalue($tag);
440 if( defined $tval and $value ? $tval eq $value : 1 ){
441 push @found, $node;
442 }
443 }
444 return [@found];
445 }
446
447
448 =head2 get_all_subnodes
449
450 Arg 1 : hashref $node_hash [used for recursivity, do not use it!]
451 Example : my $all_nodes = $root->get_all_nodes();
452 Description : Returns all underlying sub nodes
453 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
454 Exceptions : none
455 Caller : general
456 Status : Stable
457
458 =cut
459
460 sub get_all_subnodes {
461 my $self = shift;
462 my $node_hash = shift;
463
464 my $toplevel = 0;
465 unless($node_hash) {
466 $node_hash = {};
467 $toplevel =1;
468 }
469
470 foreach my $child (@{$self->children}) {
471 $node_hash->{$child->obj_id} = $child;
472 $child->get_all_subnodes($node_hash);
473 }
474 return values(%$node_hash) if($toplevel);
475 return undef;
476 }
477
478 =head2 get_all_ancestors
479
480 Arg 1 :
481 Example : my @ancestors = @{$node->get_all_ancestors};
482 Description : Returns all ancestor nodes for a given node
483 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
484 Exceptions : none
485 Caller : general
486 Status :
487
488 =cut
489
490 sub get_all_ancestors {
491 my $self = shift;
492 my $this = $self;
493 my @ancestors;
494 while( $this = $this->parent ){
495 push @ancestors, $this;
496 }
497 return [@ancestors]
498 }
499
500 =head2 get_all_adjacent_subtrees
501
502 Arg 1 :
503 Example : my @subtrees = @{$node->get_all_adjacent_subtrees};
504 Description : Returns subtree 'root' nodes where the subtree is adjacent
505 to this node. Used e.g. by the web code for the 'collapse
506 other nodes' action
507 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
508 Exceptions : none
509 Caller : EnsEMBL::Web::Component::Gene::ComparaTree
510 Status :
511
512 =cut
513
514 sub get_all_adjacent_subtrees {
515 my $self = shift;
516 my $node_id = $self->node_id;
517 my @node_path_to_root = ($self, @{$self->get_all_ancestors} );
518 my %path_node_ids = map{ $_->node_id => 1 } @node_path_to_root;
519
520 my $this = $self->root; # Start at the root node
521
522 my @adjacent_subtrees;
523 while( $this ){
524 last if $this->node_id == $node_id; # Stop on reaching current node
525 my $next;
526 foreach my $child (@{$this->children}){
527 next if $child->is_leaf; # Leaves cannot be subtrees
528 if( $path_node_ids{$child->node_id} ){ # Ancestor node
529 $next = $child;
530 } else {
531 push @adjacent_subtrees, $child;
532 }
533 }
534 $this = $next || undef;
535 }
536
537 return [@adjacent_subtrees]
538 }
539
540
541 =head2 num_leaves
542
543 Example : my $num_leaves = $node->num_leaves
544 Description : Returns the number of leaves underlying the node
545 ReturnType : integer
546 Exceptions : none
547 Caller : general
548 Status : At risk (relies on left and right indexes)
549
550 =cut
551 #'
552 sub num_leaves {
553 my $self = shift;
554
555 my $left = $self->left_index;
556 my $right = $self->right_index;
557
558 return unless( $left && $right );
559
560 my $num = $right - $left + 1;
561 my $num_leaves = ( ($num/2) + 1 ) / 2;
562
563 return $num_leaves;
564 }
565
566
567 sub get_child_count {
568 my $self = shift;
569 $self->load_children_if_needed;
570 return scalar @{$self->children};
571 # my $count = $self->link_count;
572 # $count-- if($self->has_parent);
573 # return $count;
574 }
575
576 sub load_children_if_needed {
577 my $self = shift;
578 if(!defined($self->{'_children_loaded'}) and $self->adaptor) {
579 #define _children_id_hash thereby signally that I've tried to load my children
580 $self->{'_children_loaded'} = 1;
581 #print("load_children_if_needed : "); $self->print_node;
582 $self->adaptor->fetch_all_children_for_node($self);
583 }
584 }
585
586 sub no_autoload_children {
587 my $self = shift;
588
589 return if($self->{'_children_loaded'});
590 $self->{'_children_loaded'} = 1;
591 }
592
593
594 =head2 distance_to_parent
595
596 Arg [1] : (opt.) <int or double> distance
597 Example : my $dist = $object->distance_to_parent();
598 Example : $object->distance_to_parent(1.618);
599 Description: Getter/Setter for the distance between this child and its parent
600 Returntype : integer node_id
601 Exceptions : none
602 Caller : general
603
604 =cut
605
606 sub distance_to_parent {
607 my $self = shift;
608 my $dist = shift;
609
610 if($self->{'_parent_link'}) {
611 if(defined($dist)) { $self->{'_parent_link'}->distance_between($dist); }
612 else { $dist = $self->{'_parent_link'}->distance_between; }
613 } else {
614 if(defined($dist)) { $self->_distance($dist); }
615 else { $dist = $self->_distance; }
616 }
617 return $dist;
618 }
619
620 sub _distance {
621 my $self = shift;
622 $self->{'_distance_to_parent'} = shift if(@_);
623 $self->{'_distance_to_parent'} = 0.0 unless(defined($self->{'_distance_to_parent'}));
624 return $self->{'_distance_to_parent'};
625 }
626
627 sub distance_to_root {
628 my $self = shift;
629 my $dist = $self->distance_to_parent;
630 $dist += $self->parent->distance_to_root if($self->parent);
631 return $dist;
632 }
633
634
635 =head2 distance_to_ancestor
636
637 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $ancestor
638 Example : my $distance = $this_node->distance_to_ancestor($ancestor);
639 Description : Calculates the distance in the tree between this node and
640 its ancestor $ancestor
641 Returntype : float
642 Exceptions : throws if $ancestor is not an ancestor of this node.
643 Caller : general
644 Status : Stable
645
646 =cut
647
648 sub distance_to_ancestor {
649 my $self = shift;
650 my $ancestor = shift;
651
652 if ($ancestor->node_id eq $self->node_id) {
653 return 0;
654 }
655 unless (defined $self->parent) {
656 throw("Ancestor not found\n");
657 }
658 return $self->distance_to_parent + $self->parent->distance_to_ancestor($ancestor);
659 }
660
661
662 =head2 distance_to_node
663
664 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node
665 Example : my $distance = $this_node->distance_to_node($other_node);
666 Description : Calculates the distance in the tree between these
667 two nodes.
668 Returntype : float
669 Exceptions : returns undef if no ancestor can be found, no distances are
670 defined in the tree, etc.
671 Caller : general
672 Status : Stable
673
674 =cut
675
676 sub distance_to_node {
677 my $self = shift;
678 my $node = shift;
679
680 my $ancestor = $self->find_first_shared_ancestor($node);
681 if (!$ancestor) {
682 return undef;
683 }
684 my $distance = $self->distance_to_ancestor($ancestor);
685 $distance += $node->distance_to_ancestor($ancestor);
686
687 return $distance;
688 }
689
690
691 # Returns a TreeI-compliant object based on this NestedSet.
692 sub get_TreeI {
693 my $self = shift;
694 my $newick = $self->newick_format();
695
696 open(my $fake_fh, "+<", \$newick);
697 my $treein = new Bio::TreeIO(-fh => $fake_fh, -format => 'newick');
698 my $treeI = $treein->next_tree;
699 $treein->close;
700
701 return $treeI;
702 }
703
704 sub new_from_newick {
705 my $class = shift;
706 my $file = shift;
707 my $treein = new Bio::TreeIO(-file => $file, -format => 'newick');
708 my $treeI = $treein->next_tree;
709 $treein->close;
710
711 return $class->new_from_TreeI($treeI);
712 }
713
714 sub new_from_TreeI {
715 my $class = shift;
716 my $treeI = shift;
717
718 my $rootI = $treeI->get_root_node;
719 my $node = new $class;
720
721 # Kick off the recursive, parallel node adding.
722 _add_nodeI_to_node($node,$rootI);
723
724 return $node;
725 }
726
727 # Recursive helper for new_from_TreeI.
728 sub _add_nodeI_to_node {
729 my $node = shift; # Our node object (Compara)
730 my $nodeI = shift; # Our nodeI object (BioPerl)
731
732 foreach my $c ($nodeI->each_Descendent) {
733 my $child = ref($node)->new;
734
735 my $name = $c->id || '';
736 $name =~ s/^\s+//;
737 $name =~ s/\s+$//;
738
739 # Set name.
740 $child->name($name);
741
742 # Set branch length.
743 $node->add_child($child,$c->branch_length);
744
745 # Recurse.
746 _add_nodeI_to_node($child,$c);
747 }
748 }
749
750 =head2 print_tree
751
752 Arg [1] : int $scale
753 Example : $this_node->print_tree(100);
754 Description : Prints this tree in ASCII format. The scale is used to define
755 the width of the tree in the output
756 Returntype : undef
757 Exceptions :
758 Caller : general
759 Status : At risk (as the output might change)
760
761 =cut
762
763 sub print_tree {
764 my $self = shift;
765 my $scale = shift;
766
767 $scale = 100 unless($scale);
768 $self->_internal_print_tree(undef, 0, $scale);
769 }
770
771 sub string_tree {
772 my $self = shift;
773 my $scale = shift;
774 $scale ||= 100;
775 my $buffer = '';
776 $self->_internal_string_tree(undef, 0, $scale, \$buffer);
777 return $buffer;
778 }
779
780 sub _internal_string_tree {
781 my $self = shift;
782 my $indent = shift;
783 my $lastone = shift;
784 my $scale = shift;
785 my $buffer = shift;
786
787 if(defined($indent)) {
788 $$buffer .= $indent;
789 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $$buffer .= '-'; }
790 }
791
792 $$buffer .= $self->string_node($indent);
793
794 if(defined($indent)) {
795 if($lastone) {
796 chop($indent);
797 $indent .= " ";
798 }
799 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $indent .= ' '; }
800 }
801 $indent = '' unless(defined($indent));
802 $indent .= "|";
803
804 my $children = $self->sorted_children;
805 my $count=0;
806 $lastone = 0;
807 foreach my $child_node (@$children) {
808 $count++;
809 $lastone = 1 if($count == scalar(@$children));
810 $child_node->_internal_string_tree($indent,$lastone,$scale,$buffer);
811 }
812 }
813
814 sub _internal_print_tree {
815 my $self = shift;
816 my $indent = shift;
817 my $lastone = shift;
818 my $scale = shift;
819
820 if(defined($indent)) {
821 print($indent);
822 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { print('-'); }
823 }
824
825 $self->print_node($indent);
826
827 if(defined($indent)) {
828 if($lastone) {
829 chop($indent);
830 $indent .= " ";
831 }
832 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $indent .= ' '; }
833 }
834 $indent = '' unless(defined($indent));
835 $indent .= "|";
836
837 my $children = $self->sorted_children;
838 my $count=0;
839 $lastone = 0;
840 foreach my $child_node (@$children) {
841 $count++;
842 $lastone = 1 if($count == scalar(@$children));
843 $child_node->_internal_print_tree($indent,$lastone,$scale);
844 }
845 }
846
847
848 sub print_node {
849 my $self = shift;
850 print $self->string_node;
851 }
852
853 sub string_node {
854 my $self = shift;
855 my $str = '(';
856
857 my $isdup = 0;
858 $isdup = 1 if ($self->get_tagvalue('Duplication', 0) > 0 and not $self->get_tagvalue('dubious_duplication', 0));
859 $isdup = 1 if $self->get_tagvalue('node_type', '') eq 'duplication';
860
861 my $isdub = ($self->get_tagvalue('node_type', '') eq 'dubious');
862
863 if ($isdup) {
864 my $taxon_name = $self->get_tagvalue('taxon_name', '');
865 if ($taxon_name =~ /\S+\ \S+/) {
866 $str .= "Dup ";
867 } else {
868 $str .= "DUP ";
869 }
870 my $sis = $self->get_tagvalue('duplication_confidence_score', 0) * 100;
871 $str .= sprintf('SIS=%d ', $sis);
872 } elsif ($isdub) {
873 $str .= "DD ";
874 $str .= 'SIS=0 ';
875 }
876 if($self->has_tag("bootstrap")) { my $bootstrap_value = $self->get_tagvalue("bootstrap"); $str .= "B=$bootstrap_value "; }
877 if($self->has_tag("taxon_name")) { my $taxon_name_value = $self->get_tagvalue("taxon_name"); $str .="T=$taxon_name_value "; }
878 $str .= sprintf("%s %d,%d)", $self->node_id, $self->left_index, $self->right_index);
879 $str .= sprintf("%s\n", $self->name || '');
880 return $str;
881 }
882
883
884 =head2 newick_format
885
886 Arg [1] : string $format_mode
887 Example : $this_node->newick_format("full");
888 Description : Prints this tree in Newick format. Several modes are
889 available: full, display_label_composite, simple, species,
890 species_short_name, ncbi_taxon, ncbi_name, njtree and phylip
891 Returntype : string
892 Exceptions :
893 Caller : general
894 Status : Stable
895
896 =cut
897
898 my %ryo_modes = (
899 'member_id' => '%{^-m}:%{d}',
900 'member_id_taxon_id' => '%{-m}%{"_"-x}:%{d}',
901 'display_label_composite' => '%{-l"_"}%{n}%{"_"-s}:%{d}',
902 'full_common' => '%{n}%{" "-c.^}%{"."-g}%{"_"-t"_MYA"}:%{d}',
903 'gene_stable_id_composite' => '%{-i"_"}%{n}%{"_"-s}:%{d}',
904 'gene_stable_id' => '%{-i}:%{d}',
905 'ncbi_taxon' => '%{o}',
906 'ncbi_name' => '%{n}',
907 'simple' => '%{^-n}:%{d}',
908 'full' => '%{n}:%{d}',
909 'species' => '%{^-S|p}',
910 'species_short_name' => '%{^-s|p}',
911 'otu_id' => '%{-s"|"}%{-l"|"}%{n}:%{d}',
912 'int_node_id' => '%{-n}%{o-}:%{d}',
913 'full_web' => '%{n-}%{-n|p}%{"_"-s"_"}%{":"d}',
914 'phylip' => '%21{n,}:%{d}',
915 'njtree' => '%{o}%{-T(is_incomplete)|E"*"}%{-T(is_incomplete,0,*)}',
916 );
917
918 my $nhx0 = '%{n-_|T(taxon_name)}:%{d}';
919 my $nhx1 = ':D=%{-E"N"}%{T(node_type,duplication,Y)-}%{T(node_type,dubious,Y)-}%{T(node_type,gene_split,Y)-}%{T(node_type,speciation,N)}%{":B="T(bootstrap)}';
920 my $nhx2 = ':T=%{-x}%{T(taxon_id)-}';
921
922 my %nhx_ryo_modes_1 = (
923 'member_id_taxon_id' => '%{-m}%{o-}_%{-x}%{T(taxon_id)-}:%{d}',
924 'protein_id' => '%{-n}'.$nhx0,
925 'transcript_id' => '%{-r}'.$nhx0,
926 'gene_id' => '%{-i}'.$nhx0,
927 'full' => $nhx0,
928 'full_web' => $nhx0,
929 'display_label' => '%{-L|i}%{"_"-s}'.$nhx0,
930 'display_label_composite' => '%{-L"_"}%{-i}%{"_"-s}'.$nhx0,
931 'treebest_ortho' => '%{-m}%{"_"-x}'.$nhx0,
932 'simple' => $ryo_modes{'simple'},
933 'phylip' => $ryo_modes{'phylip'},
934 );
935
936 my %nhx_ryo_modes_2 = (
937 'member_id_taxon_id' => $nhx1.$nhx2,
938 'protein_id' => $nhx1.'%{":G="-i}'.$nhx2,
939 'transcript_id' => $nhx1.'%{":G="-i}'.$nhx2,
940 'gene_id' => $nhx1.'%{":G="-r}'.$nhx2,
941 'full' => $nhx1.$nhx2,
942 'full_web' => $nhx1.$nhx2,
943 'display_label' => $nhx1.$nhx2,
944 'display_label_composite' => $nhx1.$nhx2,
945 'treebest_ortho' => $nhx1.$nhx2.':S=%{-x}%{T(taxon_id)-}',
946 );
947
948
949 sub newick_format {
950 my $self = shift;
951 my $format_mode = shift;
952
953 my $ryo_string;
954
955 if (not defined $format_mode) {
956 $ryo_string = $ryo_modes{'full'};
957
958 } elsif ($format_mode eq "ryo") {
959 $ryo_string = shift @_;
960
961 } elsif (defined $ryo_modes{$format_mode}) {
962 $ryo_string = $ryo_modes{$format_mode};
963
964 } else {
965 throw("Unrecognized format '$format_mode'. Please use 'ryo' to introduce a roll-your-own format string\n");
966 }
967 return $self->_internal_newick_format_ryo($ryo_string);
968 }
969
970 sub nhx_format {
971 my ($self, $format_mode) = @_;
972 my $ryo_string1;
973 my $ryo_string2;
974
975 if (not defined $format_mode) {
976 $ryo_string1 = $nhx_ryo_modes_1{'protein_id'};
977 $ryo_string2 = $nhx_ryo_modes_2{'protein_id'};
978
979 } elsif ($format_mode eq "ryo") {
980 $ryo_string1 = shift @_;
981 $ryo_string2 = shift @_;
982
983 } elsif (defined $nhx_ryo_modes_1{$format_mode}) {
984 $ryo_string1 = $nhx_ryo_modes_1{$format_mode};
985 $ryo_string2 = $nhx_ryo_modes_2{$format_mode};
986
987 } else {
988 throw("Unrecognized format '$format_mode'. Please use 'ryo' to introduce a roll-your-own format string\n");
989 }
990 my $fmt = $ryo_string1;
991 $fmt = $ryo_string1.'[&&NHX'.$ryo_string2.']' if defined $ryo_string2;
992 return $self->_internal_newick_format_ryo($fmt);
993 }
994
995 sub _internal_newick_format_ryo {
996 my ($self, $ryo_string) = @_;
997 my $newick_str;
998 eval {
999 use Bio::EnsEMBL::Compara::FormatTree;
1000 my $ryo_formatter = Bio::EnsEMBL::Compara::FormatTree->new($ryo_string);
1001 $newick_str = $ryo_formatter->format_newick($self);
1002 };
1003 if ($@) {
1004 throw("Something bad happened while trying to stringify the tree: $@\n");
1005 }
1006 return "$newick_str;";
1007 }
1008
1009 =head2 newick_simple_format
1010
1011 DEPRECATED. Use newick_format("simple") instead
1012
1013 =cut
1014
1015 sub newick_simple_format {
1016 my $self = shift;
1017 deprecate('Use newick_format("simple") instead.');
1018 return $self->newick_format('simple');
1019 }
1020
1021
1022 ##################################
1023 #
1024 # Set theory methods
1025 #
1026 ##################################
1027
1028 #sub equals {
1029 # my $self = shift;
1030 # my $other = shift;
1031 # throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$other]")
1032 # unless($other->isa('Bio::EnsEMBL::Compara::NestedSet'));
1033 # return 1 if($self->node_id eq $other->node_id);
1034 # foreach my $child (@{$self->children}) {
1035 # return 0 unless($other->has_child($child));
1036 # }
1037 # return 1;
1038 #}
1039
1040 sub has_child {
1041 my $self = shift;
1042 my $child = shift;
1043 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$child]")
1044 unless($child->isa('Bio::EnsEMBL::Compara::NestedSet'));
1045 $self->load_children_if_needed;
1046 my $link = $self->link_for_neighbor($child);
1047 return 0 unless($link);
1048 return 0 if($self->{'_parent_link'} and ($self->{'_parent_link'}->equals($link)));
1049 return 1;
1050 }
1051
1052 sub is_member_of {
1053 my $A = shift;
1054 my $B = shift;
1055 return 1 if($B->has_child($A));
1056 return 0;
1057 }
1058
1059 sub is_subset_of {
1060 my $A = shift;
1061 my $B = shift;
1062 foreach my $child (@{$A->children}) {
1063 return 0 unless($B->has_child($child));
1064 }
1065 return 1;
1066 }
1067
1068 sub is_leaf {
1069 my $self = shift;
1070 return 1 unless($self->get_child_count);
1071 return 0;
1072 }
1073
1074 sub merge_children {
1075 my $self = shift;
1076 my $nset = shift;
1077 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$nset]")
1078 unless($nset->isa('Bio::EnsEMBL::Compara::NestedSet'));
1079 foreach my $child_node (@{$nset->children}) {
1080 $self->add_child($child_node, $child_node->distance_to_parent);
1081 }
1082 return $self;
1083 }
1084
1085 sub merge_node_via_shared_ancestor {
1086 my $self = shift;
1087 my $node = shift;
1088
1089 my $node_dup = $self->find_node_by_node_id($node->node_id);
1090 if($node_dup) {
1091 #warn("trying to merge in a node with already exists\n");
1092 return $node_dup;
1093 }
1094 return undef unless($node->parent);
1095
1096 my $ancestor = $self->find_node_by_node_id($node->parent->node_id);
1097 if($ancestor) {
1098 $ancestor->add_child($node);
1099 #print("common ancestor at : "); $ancestor->print_node;
1100 return $ancestor;
1101 }
1102 return $self->merge_node_via_shared_ancestor($node->parent);
1103 }
1104
1105
1106 sub extract_subtree_from_leaves {
1107 my $self = shift;
1108 my $copy = $self->copy;
1109 my $node_ids = shift; # Array ref of node_ids.
1110 my @keepers = @{$node_ids};
1111 my @all = @{$copy->get_all_nodes};
1112
1113 # Add all ancestors of kept nodes to the keep list.
1114 my @all_keepers = ();
1115 foreach my $keeper (@keepers) {
1116 my $node = $copy->find_node_by_node_id($keeper);
1117 push @all_keepers, $keeper;
1118
1119 my $parent = $node->parent;
1120 while (defined $parent) {
1121 push @all_keepers, $parent->node_id;
1122 $parent = $parent->parent;
1123 }
1124 }
1125
1126 my @remove_me = ();
1127 foreach my $node (@all) {
1128 push @remove_me, $node unless (grep {$node->node_id == $_} @all_keepers);
1129 }
1130 $copy->remove_nodes(\@remove_me);
1131 return $copy;
1132 }
1133
1134
1135 ##################################
1136 #
1137 # nested_set manipulations
1138 #
1139 ##################################
1140
1141
1142 =head2 flatten_tree
1143
1144 Overview : Removes all internal nodes and attaches leaves to the tree root, creating
1145 a "flattened" star tree structure.
1146 Example : $node->flatten_tree();
1147 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
1148 Exceptions : none
1149 Caller : general
1150
1151 =cut
1152
1153 sub flatten_tree {
1154 my $self = shift;
1155
1156 my $leaves = $self->get_all_leaves;
1157 foreach my $leaf (@{$leaves}) {
1158 $leaf->disavow_parent;
1159 }
1160
1161 $self->release_children;
1162 foreach my $leaf (@{$leaves}) {
1163 $self->add_child($leaf, 0.0);
1164 }
1165
1166 return $self;
1167 }
1168
1169 =head2 re_root
1170
1171 Overview : rearranges the tree structure so that the root is moved to
1172 beetween this node and its parent. If the old root was more than
1173 bifurcated (2 children) a new node is created where it was to hold
1174 the multiple children that arises from the re-rooting.
1175 The old root is returned.
1176 Example : $node->re_root();
1177 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
1178 Exceptions : none
1179 Caller : general
1180
1181 =cut
1182
1183 sub re_root {
1184 my $self = shift;
1185
1186 return $self unless($self->parent); #I'm root so just return self
1187
1188 my $root = $self->root;
1189 my $tmp_root = new Bio::EnsEMBL::Compara::NestedSet;
1190 $tmp_root->merge_children($root);
1191
1192 my $parent = $self->parent;
1193 my $dist = $self->distance_to_parent;
1194 $self->disavow_parent;
1195
1196 my $old_root = $parent->_invert_tree_above;
1197 $old_root->minimize_node;
1198
1199 $root->add_child($parent, $dist / 2.0);
1200 $root->add_child($self, $dist / 2.0);
1201
1202 return $root;
1203 }
1204
1205
1206 sub _invert_tree_above {
1207 my $self = shift;
1208 return $self unless($self->parent);
1209
1210 my $old_root = $self->parent->_invert_tree_above;
1211 #now my parent has been inverted so it is the new root
1212
1213 #flip the direction of the link between myself and my parent
1214 $self->parent->_set_parent_link($self->{'_parent_link'});
1215 $self->_set_parent_link(undef);
1216
1217 #now I'm the new root and the old root might need to be modified
1218 return $old_root;
1219 }
1220
1221
1222 sub build_leftright_indexing {
1223 my $self = shift;
1224 my $counter = shift;
1225
1226 $counter = 1 unless($counter);
1227
1228 $self->left_index($counter++);
1229 foreach my $child_node (@{$self->sorted_children}) {
1230 $counter = $child_node->build_leftright_indexing($counter);
1231 }
1232 $self->right_index($counter++);
1233 return $counter;
1234 }
1235
1236
1237 =head2 remove_nodes
1238
1239 Arg [1] : arrayref Bio::EnsEMBL::Compara::NestedSet $nodes
1240 Example : my $ret_tree = $tree->remove_nodes($nodes);
1241 Description : Returns the tree with removed nodes in list. Nodes should be in the tree.
1242 Returntype : Bio::EnsEMBL::Compara::NestedSet object
1243 Exceptions :
1244 Caller : general
1245 Status : At risk (behaviour on exceptions could change)
1246
1247 =cut
1248
1249 sub remove_nodes {
1250 my $self = shift;
1251 my $nodes = shift;
1252
1253 foreach my $node (@$nodes) {
1254 if ($node->is_leaf) {
1255 $node->disavow_parent;
1256 $self = $self->minimize_tree;
1257 } else {
1258 my $node_children = $node->children;
1259 foreach my $child (@$node_children) {
1260 $node->parent->add_child($child);
1261 }
1262 $node->disavow_parent;
1263 }
1264 # Delete dangling one-child trees (help memory manager)
1265 if ($self->get_child_count == 1) {
1266 my $child = $self->children->[0];
1267 $child->parent->merge_children($child);
1268 $child->disavow_parent;
1269 return undef;
1270 }
1271 # Could be zero if all asked to delete, so return undef instead of
1272 # fake one-node tree.
1273 if ($self->get_child_count < 2) {
1274 return undef;
1275 }
1276 }
1277 return $self;
1278 }
1279
1280
1281 =head2 delete_lineage
1282
1283 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node
1284 Example : $tree->delete_lineage($node);
1285 Description : Removes $node from tree. Nodes should be in the tree.
1286 Returntype :
1287 Exceptions :
1288 Caller : general
1289 Status : At risk (behaviour on exceptions could change)
1290
1291 =cut
1292
1293 sub delete_lineage {
1294 my $self = shift;
1295 my $del_me = shift;
1296
1297 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$self]")
1298 unless ($self->isa('Bio::EnsEMBL::Compara::NestedSet'));
1299
1300 my $parent = $del_me->parent;
1301 while ($parent) {
1302 my $num_children = scalar @{$parent->children};
1303 if ($num_children > 1) {
1304 $self->remove_nodes([$del_me]);
1305 return $self;
1306 } elsif ($num_children == 1) {
1307 $self->remove_nodes([$del_me]);
1308 $del_me = $parent;
1309 $parent = $del_me->parent;
1310 }
1311 }
1312 return $self;
1313 }
1314
1315 =head2 minimize_tree
1316
1317 Arg [1] : -none-
1318 Example : $leaf->disavow_parent();
1319 $tree = $tree->minimize_tree();
1320 Description : Returns the tree after removing internal nodes that do not
1321 represent an multi- or bi-furcation anymore. This is typically
1322 required after disavowing a node. Please ensure you use the
1323 object returned by the method and not the original object
1324 anymore!
1325 Returntype : Bio::EnsEMBL::Compara::NestedSet object
1326 Exceptions :
1327 Caller : general
1328 Status : Stable
1329
1330 =cut
1331
1332 sub minimize_tree {
1333 my $self = shift;
1334 return $self if($self->is_leaf);
1335
1336 foreach my $child (@{$self->children}) {
1337 $child->minimize_tree;
1338 }
1339 return $self->minimize_node;
1340 }
1341
1342
1343 sub minimize_node {
1344 my $self = shift;
1345
1346 return $self unless($self->get_child_count() == 1);
1347
1348 my $child = $self->children->[0];
1349 my $dist = $child->distance_to_parent + $self->distance_to_parent;
1350 if ($self->parent) {
1351 $self->parent->add_child($child, $dist);
1352 $self->disavow_parent;
1353 } else {
1354 $child->disavow_parent;
1355 }
1356 return $child
1357 }
1358
1359
1360 sub scale {
1361 my $self = shift;
1362 my $scale = shift;
1363
1364 foreach my $node (@{$self->get_all_nodes}) {
1365 my $bl = $node->distance_to_parent;
1366 $bl = 0 unless (defined $bl);
1367 $node->distance_to_parent($bl*$scale);
1368 }
1369 return $self;
1370 }
1371
1372
1373 sub scale_max_to {
1374 my $self = shift;
1375 my $new_max = shift;
1376
1377 my $max_dist = $self->max_distance;
1378 my $scale_factor = $new_max / $max_dist;
1379 return $self->scale($scale_factor);
1380 }
1381
1382
1383
1384 ##################################
1385 #
1386 # search methods
1387 #
1388 ##################################
1389
1390 sub find_node_by_name {
1391 my $self = shift;
1392 my $name = shift;
1393
1394 return $self if((defined $self->name) && $name eq $self->name);
1395
1396 my $children = $self->children;
1397 foreach my $child_node (@$children) {
1398 my $found = $child_node->find_node_by_name($name);
1399 return $found if(defined($found));
1400 }
1401
1402 return undef;
1403 }
1404
1405 sub find_node_by_node_id {
1406 my $self = shift;
1407 my $node_id = shift;
1408
1409 return $self if($node_id eq $self->node_id);
1410
1411 my $children = $self->children;
1412 foreach my $child_node (@$children) {
1413 my $found = $child_node->find_node_by_node_id($node_id);
1414 return $found if(defined($found));
1415 }
1416
1417 return undef;
1418 }
1419
1420 sub find_leaf_by_name {
1421 my $self = shift;
1422 my $name = shift;
1423
1424 return $self if((defined $self->name) and ($name eq $self->name));
1425
1426 my $leaves = $self->get_all_leaves;
1427 foreach my $leaf (@$leaves) {
1428 return $leaf if((defined $leaf->name) and ($name eq $leaf->name));
1429 }
1430
1431 return undef;
1432 }
1433
1434 sub find_leaf_by_node_id {
1435 my $self = shift;
1436 my $node_id = shift;
1437
1438 return $self if($node_id eq $self->node_id);
1439
1440 my $leaves = $self->get_all_leaves;
1441 foreach my $leaf (@$leaves) {
1442 return $leaf if($node_id eq $leaf->node_id);
1443 }
1444
1445 return undef;
1446 }
1447
1448
1449 =head2 get_all_sorted_leaves
1450
1451 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $top_leaf
1452 Arg [...] : (optional) Bio::EnsEMBL::Compara::NestedSet $secondary_priority_leaf
1453 Example : my $sorted_leaves = $object->get_all_sorted_leaves($human_leaf);
1454 Example : my $sorted_leaves = $object->get_all_sorted_leaves($human_leaf, $mouse_leaf);
1455 Description : Sorts the tree such as $top_leaf is the first leave and returns
1456 all the other leaves in the order defined by the tree.
1457 It is possible to define as many secondary top leaves as you require
1458 to sort other branches of the tree. The priority to sort the trees
1459 is defined by the order in which you specify the leaves.
1460 Returntype : listref of Bio::EnsEMBL::Compara::NestedSet (all sorted leaves)
1461 Exceptions : none
1462 Caller : general
1463 Status : Stable
1464
1465 =cut
1466
1467 sub get_all_sorted_leaves {
1468 my ($self, @priority_leaves) = @_;
1469
1470 if (!@priority_leaves) {
1471 return $self->get_all_leaves;
1472 }
1473
1474 # Assign priority scores for all parent nodes of the priority leaves
1475 my $score_by_node;
1476 my $score = 0;
1477 # Loop through all the priority leaves, starting from the last one (lowest score)
1478 while (my $priority_leaf = pop @priority_leaves) {
1479 $score++; # Increases the score, next priority leaves (earlier in the argument list) will overwrite the score if needed
1480 my $this_node = $priority_leaf;
1481 # Loop through all the parent node up to the root of the tree
1482 do {
1483 $score_by_node->{$this_node} = $score;
1484 $this_node = $this_node->parent;
1485 } while ($this_node);
1486 }
1487
1488 my $sorted_leaves = $self->_recursive_get_all_sorted_leaves($score_by_node);
1489
1490 return $sorted_leaves;
1491 }
1492
1493 =head2 _recursive_get_all_sorted_leaves
1494
1495 Arg [1] : hashref $score_by_node
1496 Example : my $sorted_leaves = $object->_recursive_get_all_sorted_leaves($score_by_node);
1497 Description : Recursive code for the get_all_sorted_leaves() method
1498 Returntype : listref of Bio::EnsEMBL::Compara::NestedSet (sorted leaves)
1499 Exceptions : none
1500 Caller : private
1501 Status : Stable
1502
1503 =cut
1504
1505 sub _recursive_get_all_sorted_leaves {
1506 my $self = shift;
1507 my $score_by_node = shift;
1508
1509 my $sorted_leaves = [];
1510 my $children = $self->children;
1511
1512 if (@$children == 0) {
1513 $sorted_leaves = [$self];
1514 } else {
1515 $children = [sort {
1516 ($score_by_node->{$b} || $score_by_node->{$a}) ? (($score_by_node->{$b} || 0)<=>($score_by_node->{$a} || 0)) : ($a->node_id <=> $b->node_id)
1517 } @$children];
1518 for (my $i = 0; $i < @$children; $i++) {
1519 push(@$sorted_leaves, @{$children->[$i]->_recursive_get_all_sorted_leaves($score_by_node)});
1520 }
1521 }
1522
1523 return $sorted_leaves;
1524 }
1525
1526 =head2 get_all_leaves
1527
1528 Title : get_all_leaves
1529 Usage : my @leaves = @{$tree->get_all_leaves};
1530 Function: searching from the given starting node, searches and creates list
1531 of all leaves in this subtree and returns by reference
1532 Example :
1533 Returns : reference to list of NestedSet objects (all leaves)
1534 Args : none
1535
1536 =cut
1537
1538 sub get_all_leaves {
1539 my $self = shift;
1540
1541 my $leaves = {};
1542 $self->_recursive_get_all_leaves($leaves);
1543 my @leaf_list = sort {$a->node_id <=> $b->node_id} values(%{$leaves});
1544 return \@leaf_list;
1545 }
1546
1547 sub _recursive_get_all_leaves {
1548 my $self = shift;
1549 my $leaves = shift;
1550
1551 $leaves->{$self->obj_id} = $self if($self->is_leaf);
1552
1553 foreach my $child (@{$self->children}) {
1554 no warnings 'recursion';
1555 $child->_recursive_get_all_leaves($leaves);
1556 }
1557 }
1558
1559
1560 sub get_all_leaves_indexed {
1561 my $self = shift;
1562 deprecate("Use Bio::EnsEMBL::Compara::DBSQL::GeneTreeNodeAdaptor->fetch_all_leaves_indexed() method instead");
1563
1564 my @leaf_list = @{$self->adaptor->fetch_all_leaves_indexed($self)};
1565
1566 return \@leaf_list;
1567 }
1568
1569 =head2 max_distance
1570
1571 Title : max_distance
1572 Args : none
1573 Usage : $tree_node->max_distance;
1574 Function: searching from the given starting node, calculates the maximum distance to a leaf
1575 Returns : int
1576
1577 =cut
1578
1579 sub max_distance {
1580 my $self = shift;
1581
1582 my $max_distance = 0;
1583
1584 foreach my $child (@{$self->children}) {
1585 my $distance = $child->max_distance;
1586 $max_distance = $distance if($distance>$max_distance);
1587 }
1588
1589 return ($self->distance_to_parent + $max_distance);
1590 }
1591
1592
1593 =head2 max_depth
1594
1595 Title : max_depth
1596 Args : none
1597 Usage : $tree_node->max_depth;
1598 Function: searching from the given starting node, calculates the maximum depth to a leaf
1599 Returns : int
1600
1601 =cut
1602
1603 sub max_depth {
1604 my $self = shift;
1605
1606 my $max_depth = 0;
1607
1608 foreach my $child (@{$self->children}) {
1609 my $depth = $child->max_depth + 1;
1610 $max_depth=$depth if($depth>$max_depth);
1611 }
1612 return $max_depth;
1613 }
1614
1615
1616 =head2 find_first_shared_ancestor
1617
1618 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node
1619 Example : my $ancestor = $this_node->find_first_shared_ancestor($other_node);
1620 Description : Gets the first common ancestor between this node and the other one.
1621 Returntype : Bio::EnsEMBL::Compara::NestedSet object
1622 Exceptions :
1623 Caller : general
1624 Status : Stable
1625
1626 =cut
1627
1628 sub find_first_shared_ancestor {
1629 my $self = shift;
1630 my $node = shift;
1631
1632 return $self if($self->equals($node));
1633 return $node if($self->has_ancestor($node));
1634 return $self->find_first_shared_ancestor($node->parent);
1635 }
1636
1637
1638 sub find_first_shared_ancestor_from_leaves {
1639 my $self = shift;
1640 my $leaf_list = shift;
1641
1642 my @leaves = @{$leaf_list};
1643
1644 my $ancestor = shift @leaves;
1645 while (scalar @leaves > 0) {
1646 my $node = shift @leaves;
1647 $ancestor = $ancestor->find_first_shared_ancestor($node);
1648 }
1649 return $ancestor;
1650 }
1651
1652
1653 ##################################
1654 #
1655 # developer/adaptor API methods
1656 #
1657 ##################################
1658
1659
1660 # used for building tree from a DB fetch, want to restrict users to create trees
1661 # by only -add_child method
1662 sub _set_parent_link {
1663 my ($self, $link) = @_;
1664
1665 $self->{'_parent_id'} = 0;
1666 $self->{'_parent_link'} = $link;
1667 $self->{'_parent_id'} = $link->get_neighbor($self)->node_id if($link);
1668 return $self;
1669 }
1670
1671
1672 # used for building tree from a DB fetch until all the objects are in memory
1673 sub _parent_id {
1674 my $self = shift;
1675 $self->{'_parent_id'} = shift if(@_);
1676 return $self->{'_parent_id'};
1677 }
1678
1679 # used for building tree from a DB fetch until all the objects are in memory
1680 sub _root_id {
1681 my $self = shift;
1682 $self->{'_root_id'} = shift if(@_);
1683 return $self->{'_root_id'};
1684 }
1685
1686 1;
1687