Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Compara/NestedSet.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 NAME | |
2 | |
3 NestedSet - DESCRIPTION of Object | |
4 | |
5 =head1 SYNOPSIS | |
6 | |
7 =head1 DESCRIPTION | |
8 | |
9 Abstract superclass to encapsulate the process of storing and manipulating a | |
10 nested-set representation tree. Also implements a 'reference count' system | |
11 based on the ObjectiveC retain/release design. | |
12 Designed to be used as the Root class for all Compara 'proxy' classes | |
13 (Member, GenomeDB, DnaFrag, NCBITaxon) to allow them to be made into sets and trees. | |
14 | |
15 =head1 CONTACT | |
16 | |
17 Contact Albert Vilella on implementation detail: avilella@ebi.ac.uk | |
18 Contact Jessica Severin on implementation/design detail: jessica@ebi.ac.uk | |
19 Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk | |
20 | |
21 =head1 APPENDIX | |
22 | |
23 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
24 | |
25 =cut | |
26 | |
27 | |
28 | |
29 package Bio::EnsEMBL::Compara::NestedSet; | |
30 | |
31 use strict; | |
32 use warnings; | |
33 use Bio::EnsEMBL::Utils::Exception; | |
34 use Bio::EnsEMBL::Utils::Argument; | |
35 | |
36 use Bio::EnsEMBL::Utils::Exception qw(deprecate throw); | |
37 | |
38 use Bio::TreeIO; | |
39 use Bio::EnsEMBL::Compara::Graph::Node; | |
40 use Bio::EnsEMBL::Compara::Member; | |
41 our @ISA = qw(Bio::EnsEMBL::Compara::Graph::Node); | |
42 | |
43 ################################################# | |
44 # Factory methods | |
45 ################################################# | |
46 | |
47 sub init { | |
48 my $self = shift; | |
49 $self->SUPER::init; | |
50 return $self; | |
51 } | |
52 | |
53 sub dealloc { | |
54 my $self = shift; | |
55 | |
56 #printf("DEALLOC NestedSet refcount:%d ", $self->refcount); $self->print_node; | |
57 #$self->release_children; | |
58 return $self->SUPER::dealloc; | |
59 } | |
60 | |
61 =head2 copy | |
62 | |
63 Overview : creates copy of tree starting at this node going down | |
64 Example : my $clone = $self->copy; | |
65 Returntype : Bio::EnsEMBL::Compara::NestedSet | |
66 Exceptions : none | |
67 Caller : general | |
68 | |
69 =cut | |
70 | |
71 sub copy { | |
72 my $self = shift; | |
73 | |
74 my $mycopy = $self->SUPER::copy; | |
75 bless $mycopy, ref $self; | |
76 | |
77 $mycopy->distance_to_parent($self->distance_to_parent); | |
78 $mycopy->left_index($self->left_index); | |
79 $mycopy->right_index($self->right_index); | |
80 | |
81 foreach my $child (@{$self->children}) { | |
82 $mycopy->add_child($child->copy); | |
83 } | |
84 return $mycopy; | |
85 } | |
86 | |
87 | |
88 =head2 release_tree | |
89 | |
90 Overview : deletes and frees the memory used by this tree | |
91 and all the underlying nodes. | |
92 Example : $self->release_tree; | |
93 Returntype : undef | |
94 Exceptions : none | |
95 Caller : general | |
96 | |
97 =cut | |
98 | |
99 sub release_tree { | |
100 my $self = shift; | |
101 | |
102 my $child_count = $self->get_child_count; | |
103 $self->disavow_parent; | |
104 $self->cascade_unlink if($child_count); | |
105 return undef; | |
106 } | |
107 | |
108 ################################################# | |
109 # Object variable methods | |
110 ################################################# | |
111 | |
112 sub left_index { | |
113 my $self = shift; | |
114 $self->{'_left_index'} = shift if(@_); | |
115 $self->{'_left_index'} = 0 unless(defined($self->{'_left_index'})); | |
116 return $self->{'_left_index'}; | |
117 } | |
118 | |
119 sub right_index { | |
120 my $self = shift; | |
121 $self->{'_right_index'} = shift if(@_); | |
122 $self->{'_right_index'} = 0 unless(defined($self->{'_right_index'})); | |
123 return $self->{'_right_index'}; | |
124 } | |
125 | |
126 | |
127 ####################################### | |
128 # Set manipulation methods | |
129 ####################################### | |
130 | |
131 =head2 add_child | |
132 | |
133 Overview : attaches child nestedset node to this nested set | |
134 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $child | |
135 Arg [2] : (opt.) distance between this node and child | |
136 Example : $self->add_child($child); | |
137 Returntype : undef | |
138 Exceptions : if child is undef or not a NestedSet subclass | |
139 Caller : general | |
140 | |
141 =cut | |
142 | |
143 sub add_child { | |
144 my $self = shift; | |
145 my $child = shift; | |
146 my $dist = shift; | |
147 | |
148 throw("child not defined") | |
149 unless(defined($child)); | |
150 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$child]") | |
151 unless($child->isa('Bio::EnsEMBL::Compara::NestedSet')); | |
152 | |
153 #printf("add_child: parent(%s) <-> child(%s)\n", $self->node_id, $child->node_id); | |
154 | |
155 unless(defined($dist)) { $dist = $child->_distance; } | |
156 | |
157 $child->disavow_parent; | |
158 #create_link_to_node is a safe method which checks if connection exists | |
159 my $link = $self->create_link_to_node($child); | |
160 $child->_set_parent_link($link); | |
161 $self->{'_children_loaded'} = 1; | |
162 $link->distance_between($dist); | |
163 return $link; | |
164 } | |
165 | |
166 | |
167 =head2 disavow_parent | |
168 | |
169 Overview : unlink and release self from its parent | |
170 might cause self to delete if refcount reaches Zero. | |
171 Example : $self->disavow_parent | |
172 Returntype : undef | |
173 Caller : general | |
174 | |
175 =cut | |
176 | |
177 sub disavow_parent { | |
178 my $self = shift; | |
179 | |
180 if($self->{'_parent_link'}) { | |
181 my $link = $self->{'_parent_link'}; | |
182 #print("DISAVOW parent : "); $parent->print_node; | |
183 #print(" child : "); $self->print_node; | |
184 $link->dealloc; | |
185 } | |
186 $self->_set_parent_link(undef); | |
187 return undef; | |
188 } | |
189 | |
190 | |
191 =head2 release_children | |
192 | |
193 Overview : recursive releases all children | |
194 will cause potential deletion of children if refcount reaches Zero. | |
195 Example : $self->release_children | |
196 Returntype : $self | |
197 Exceptions : none | |
198 Caller : general | |
199 | |
200 =cut | |
201 | |
202 sub release_children { | |
203 my $self = shift; | |
204 | |
205 # by calling with parent, this preserved the link to the parent | |
206 # and thus doesn't unlink self | |
207 foreach my $child (@{$self->children}) { | |
208 $child->disavow_parent; | |
209 $child->release_children; | |
210 } | |
211 #$self->cascade_unlink($self->{'_parent_node'}); | |
212 return $self; | |
213 } | |
214 | |
215 | |
216 =head2 parent | |
217 | |
218 Overview : returns the parent NestedSet object for this node | |
219 Example : my $my_parent = $object->parent(); | |
220 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet | |
221 Exceptions : none | |
222 Caller : general | |
223 | |
224 =cut | |
225 | |
226 sub parent { | |
227 my $self = shift; | |
228 return $self->adaptor->fetch_parent_for_node($self) if defined $self->adaptor; | |
229 return $self->{'_parent_link'}->get_neighbor($self) if defined $self->{'_parent_link'}; | |
230 return undef; | |
231 } | |
232 | |
233 | |
234 sub parent_link { | |
235 my $self = shift; | |
236 return $self->{'_parent_link'}; | |
237 } | |
238 | |
239 sub has_parent { | |
240 my $self = shift; | |
241 return 1 if($self->{'_parent_link'} or $self->_parent_id); | |
242 return 0; | |
243 } | |
244 | |
245 | |
246 sub has_ancestor { | |
247 my $self = shift; | |
248 my $ancestor = shift; | |
249 throw "[$ancestor] must be a Bio::EnsEMBL::Compara::NestedSet object" | |
250 unless ($ancestor and $ancestor->isa("Bio::EnsEMBL::Compara::NestedSet")); | |
251 my $node = $self->parent; | |
252 while($node) { | |
253 return 1 if($node->equals($ancestor)); | |
254 $node = $node->parent; | |
255 } | |
256 return 0; | |
257 } | |
258 | |
259 | |
260 =head2 root | |
261 | |
262 Overview : returns the root NestedSet object for this node | |
263 returns $self if node has no parent (this is the root) | |
264 Example : my $root = $object->root(); | |
265 Description: Returns the root of the tree for this node | |
266 with links to all the intermediate nodes. Sister nodes | |
267 are not included in the result. | |
268 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet | |
269 Exceptions : none | |
270 Caller : general | |
271 | |
272 =cut | |
273 | |
274 sub root { | |
275 my $self = shift; | |
276 | |
277 # Only if we don't have it cached | |
278 # Only if we have left and right and it's not a leaf | |
279 # Only if it's for release clusterset (1 genetrees - 0 genomic align trees) | |
280 if (!defined($self->{'_parent_link'}) and $self->adaptor | |
281 and ($self->right_index-$self->left_index)>1 | |
282 and (defined $self->{'_parent_id'}) | |
283 and (1==$self->{'_parent_id'}) | |
284 ) { | |
285 return $self->adaptor->fetch_root_by_node($self); | |
286 } | |
287 | |
288 # Otherwise, go through the step-by-step method | |
289 return $self unless(defined($self->parent)); | |
290 # return $self if($self->node_id eq $self->parent->node_id); | |
291 return $self->parent->root; | |
292 } | |
293 | |
294 sub subroot { | |
295 deprecate('subroot() should not be used and will be removed in release 70. | |
296 If you are using it, please contact the dev mailing-list dev@ensembl.org'); | |
297 my $self = shift; | |
298 | |
299 return undef unless($self->parent); | |
300 return $self unless(defined($self->parent->parent)); | |
301 return $self->parent->subroot; | |
302 } | |
303 | |
304 | |
305 =head2 children | |
306 | |
307 Overview : returns a list of NestedSet nodes directly under this parent node | |
308 Example : my @children = @{$object->children()}; | |
309 Returntype : array reference of Bio::EnsEMBL::Compara::NestedSet objects (could be empty) | |
310 Exceptions : none | |
311 Caller : general | |
312 Algorithm : new algorithm for fetching children: | |
313 for each link connected to this NestedsSet node, a child is defined if | |
314 old: the link is not my parent_link | |
315 new: the link's neighbors' parent_link is the link | |
316 This allows one (with a carefully coded algorithm) to overlay a tree on top | |
317 of a fully connected graph and use the parent/children methods of NestedSet | |
318 to walk the 'tree' substructure of the graph. | |
319 Trees that are really just trees are still trees. | |
320 | |
321 =cut | |
322 | |
323 sub children { | |
324 my $self = shift; | |
325 $self->load_children_if_needed; | |
326 my @kids; | |
327 foreach my $link (@{$self->links}) { | |
328 next unless(defined($link)); | |
329 my $neighbor = $link->get_neighbor($self); | |
330 my $parent_link = $neighbor->parent_link; | |
331 next unless($parent_link); | |
332 next unless($parent_link eq $link); | |
333 push @kids, $neighbor; | |
334 } | |
335 return \@kids; | |
336 } | |
337 | |
338 sub each_child { | |
339 my $self = shift; | |
340 | |
341 # Return an iterator over the children (most effective when children list is LONG) | |
342 my $count = -1; | |
343 $self->load_children_if_needed; | |
344 my @links = @{$self->links}; | |
345 | |
346 return sub { | |
347 while ($count < scalar(@links)) { | |
348 $count++; | |
349 my $link = $links[$count]; | |
350 next unless(defined $link); | |
351 | |
352 my $neighbor = $link->get_neighbor($self); | |
353 next unless($neighbor->parent_link); | |
354 next unless($neighbor->parent_link->equals($link)); | |
355 return $neighbor; | |
356 } | |
357 return undef; | |
358 }; | |
359 } | |
360 | |
361 =head2 sorted_children | |
362 | |
363 Overview : returns a sorted list of NestedSet nodes directly under this parent node | |
364 sort so that internal nodes<leaves and then on distance | |
365 Example : my @kids = @{$object->ordered_children()}; | |
366 Returntype : array reference of Bio::EnsEMBL::Compara::NestedSet objects (could be empty) | |
367 Exceptions : none | |
368 Caller : general | |
369 | |
370 =cut | |
371 | |
372 sub sorted_children { | |
373 my $self = shift; | |
374 | |
375 my @sortedkids = | |
376 sort { $a->is_leaf <=> $b->is_leaf | |
377 || | |
378 $a->get_child_count <=> $b->get_child_count | |
379 || | |
380 $a->distance_to_parent <=> $b->distance_to_parent | |
381 } @{$self->children;}; | |
382 return \@sortedkids; | |
383 } | |
384 | |
385 | |
386 =head2 get_all_nodes | |
387 | |
388 Arg 1 : hashref $node_hash [used for recursivity, do not use it!] | |
389 Example : my $all_nodes = $root->get_all_nodes(); | |
390 Description : Returns this and all underlying sub nodes | |
391 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects | |
392 Exceptions : none | |
393 Caller : general | |
394 Status : Stable | |
395 | |
396 =cut | |
397 | |
398 sub get_all_nodes { | |
399 my $self = shift; | |
400 my $node_hash = shift; | |
401 | |
402 my $toplevel = 0; | |
403 unless($node_hash) { | |
404 $node_hash = {}; | |
405 $toplevel =1; | |
406 } | |
407 | |
408 $node_hash->{$self->obj_id} = $self; | |
409 foreach my $child (@{$self->children}) { | |
410 $child->get_all_nodes($node_hash); | |
411 } | |
412 | |
413 if ($toplevel) { | |
414 return [values(%$node_hash)]; | |
415 } | |
416 return undef; | |
417 } | |
418 | |
419 | |
420 =head2 get_all_nodes_by_tag_value | |
421 | |
422 Arg 1 : tag_name | |
423 Arg 2 : tag_value (optional) | |
424 Example : my $all_nodes = $root->get_all_nodes_by_tagvalue('taxon_name'=>'Mamalia'); | |
425 Description : Returns all underlying nodes that have a tag of the given name, and optionally a value of the given value. | |
426 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects | |
427 Exceptions : none | |
428 Caller : general | |
429 Status : | |
430 | |
431 =cut | |
432 | |
433 sub get_all_nodes_by_tag_value { | |
434 my $self = shift; | |
435 my $tag = shift || die( "Need a tag name" ); | |
436 my $value = shift; | |
437 my @found; | |
438 foreach my $node( @{$self->get_all_nodes} ){ | |
439 my $tval = $node->get_tagvalue($tag); | |
440 if( defined $tval and $value ? $tval eq $value : 1 ){ | |
441 push @found, $node; | |
442 } | |
443 } | |
444 return [@found]; | |
445 } | |
446 | |
447 | |
448 =head2 get_all_subnodes | |
449 | |
450 Arg 1 : hashref $node_hash [used for recursivity, do not use it!] | |
451 Example : my $all_nodes = $root->get_all_nodes(); | |
452 Description : Returns all underlying sub nodes | |
453 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects | |
454 Exceptions : none | |
455 Caller : general | |
456 Status : Stable | |
457 | |
458 =cut | |
459 | |
460 sub get_all_subnodes { | |
461 my $self = shift; | |
462 my $node_hash = shift; | |
463 | |
464 my $toplevel = 0; | |
465 unless($node_hash) { | |
466 $node_hash = {}; | |
467 $toplevel =1; | |
468 } | |
469 | |
470 foreach my $child (@{$self->children}) { | |
471 $node_hash->{$child->obj_id} = $child; | |
472 $child->get_all_subnodes($node_hash); | |
473 } | |
474 return values(%$node_hash) if($toplevel); | |
475 return undef; | |
476 } | |
477 | |
478 =head2 get_all_ancestors | |
479 | |
480 Arg 1 : | |
481 Example : my @ancestors = @{$node->get_all_ancestors}; | |
482 Description : Returns all ancestor nodes for a given node | |
483 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects | |
484 Exceptions : none | |
485 Caller : general | |
486 Status : | |
487 | |
488 =cut | |
489 | |
490 sub get_all_ancestors { | |
491 my $self = shift; | |
492 my $this = $self; | |
493 my @ancestors; | |
494 while( $this = $this->parent ){ | |
495 push @ancestors, $this; | |
496 } | |
497 return [@ancestors] | |
498 } | |
499 | |
500 =head2 get_all_adjacent_subtrees | |
501 | |
502 Arg 1 : | |
503 Example : my @subtrees = @{$node->get_all_adjacent_subtrees}; | |
504 Description : Returns subtree 'root' nodes where the subtree is adjacent | |
505 to this node. Used e.g. by the web code for the 'collapse | |
506 other nodes' action | |
507 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects | |
508 Exceptions : none | |
509 Caller : EnsEMBL::Web::Component::Gene::ComparaTree | |
510 Status : | |
511 | |
512 =cut | |
513 | |
514 sub get_all_adjacent_subtrees { | |
515 my $self = shift; | |
516 my $node_id = $self->node_id; | |
517 my @node_path_to_root = ($self, @{$self->get_all_ancestors} ); | |
518 my %path_node_ids = map{ $_->node_id => 1 } @node_path_to_root; | |
519 | |
520 my $this = $self->root; # Start at the root node | |
521 | |
522 my @adjacent_subtrees; | |
523 while( $this ){ | |
524 last if $this->node_id == $node_id; # Stop on reaching current node | |
525 my $next; | |
526 foreach my $child (@{$this->children}){ | |
527 next if $child->is_leaf; # Leaves cannot be subtrees | |
528 if( $path_node_ids{$child->node_id} ){ # Ancestor node | |
529 $next = $child; | |
530 } else { | |
531 push @adjacent_subtrees, $child; | |
532 } | |
533 } | |
534 $this = $next || undef; | |
535 } | |
536 | |
537 return [@adjacent_subtrees] | |
538 } | |
539 | |
540 | |
541 =head2 num_leaves | |
542 | |
543 Example : my $num_leaves = $node->num_leaves | |
544 Description : Returns the number of leaves underlying the node | |
545 ReturnType : integer | |
546 Exceptions : none | |
547 Caller : general | |
548 Status : At risk (relies on left and right indexes) | |
549 | |
550 =cut | |
551 #' | |
552 sub num_leaves { | |
553 my $self = shift; | |
554 | |
555 my $left = $self->left_index; | |
556 my $right = $self->right_index; | |
557 | |
558 return unless( $left && $right ); | |
559 | |
560 my $num = $right - $left + 1; | |
561 my $num_leaves = ( ($num/2) + 1 ) / 2; | |
562 | |
563 return $num_leaves; | |
564 } | |
565 | |
566 | |
567 sub get_child_count { | |
568 my $self = shift; | |
569 $self->load_children_if_needed; | |
570 return scalar @{$self->children}; | |
571 # my $count = $self->link_count; | |
572 # $count-- if($self->has_parent); | |
573 # return $count; | |
574 } | |
575 | |
576 sub load_children_if_needed { | |
577 my $self = shift; | |
578 if(!defined($self->{'_children_loaded'}) and $self->adaptor) { | |
579 #define _children_id_hash thereby signally that I've tried to load my children | |
580 $self->{'_children_loaded'} = 1; | |
581 #print("load_children_if_needed : "); $self->print_node; | |
582 $self->adaptor->fetch_all_children_for_node($self); | |
583 } | |
584 } | |
585 | |
586 sub no_autoload_children { | |
587 my $self = shift; | |
588 | |
589 return if($self->{'_children_loaded'}); | |
590 $self->{'_children_loaded'} = 1; | |
591 } | |
592 | |
593 | |
594 =head2 distance_to_parent | |
595 | |
596 Arg [1] : (opt.) <int or double> distance | |
597 Example : my $dist = $object->distance_to_parent(); | |
598 Example : $object->distance_to_parent(1.618); | |
599 Description: Getter/Setter for the distance between this child and its parent | |
600 Returntype : integer node_id | |
601 Exceptions : none | |
602 Caller : general | |
603 | |
604 =cut | |
605 | |
606 sub distance_to_parent { | |
607 my $self = shift; | |
608 my $dist = shift; | |
609 | |
610 if($self->{'_parent_link'}) { | |
611 if(defined($dist)) { $self->{'_parent_link'}->distance_between($dist); } | |
612 else { $dist = $self->{'_parent_link'}->distance_between; } | |
613 } else { | |
614 if(defined($dist)) { $self->_distance($dist); } | |
615 else { $dist = $self->_distance; } | |
616 } | |
617 return $dist; | |
618 } | |
619 | |
620 sub _distance { | |
621 my $self = shift; | |
622 $self->{'_distance_to_parent'} = shift if(@_); | |
623 $self->{'_distance_to_parent'} = 0.0 unless(defined($self->{'_distance_to_parent'})); | |
624 return $self->{'_distance_to_parent'}; | |
625 } | |
626 | |
627 sub distance_to_root { | |
628 my $self = shift; | |
629 my $dist = $self->distance_to_parent; | |
630 $dist += $self->parent->distance_to_root if($self->parent); | |
631 return $dist; | |
632 } | |
633 | |
634 | |
635 =head2 distance_to_ancestor | |
636 | |
637 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $ancestor | |
638 Example : my $distance = $this_node->distance_to_ancestor($ancestor); | |
639 Description : Calculates the distance in the tree between this node and | |
640 its ancestor $ancestor | |
641 Returntype : float | |
642 Exceptions : throws if $ancestor is not an ancestor of this node. | |
643 Caller : general | |
644 Status : Stable | |
645 | |
646 =cut | |
647 | |
648 sub distance_to_ancestor { | |
649 my $self = shift; | |
650 my $ancestor = shift; | |
651 | |
652 if ($ancestor->node_id eq $self->node_id) { | |
653 return 0; | |
654 } | |
655 unless (defined $self->parent) { | |
656 throw("Ancestor not found\n"); | |
657 } | |
658 return $self->distance_to_parent + $self->parent->distance_to_ancestor($ancestor); | |
659 } | |
660 | |
661 | |
662 =head2 distance_to_node | |
663 | |
664 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node | |
665 Example : my $distance = $this_node->distance_to_node($other_node); | |
666 Description : Calculates the distance in the tree between these | |
667 two nodes. | |
668 Returntype : float | |
669 Exceptions : returns undef if no ancestor can be found, no distances are | |
670 defined in the tree, etc. | |
671 Caller : general | |
672 Status : Stable | |
673 | |
674 =cut | |
675 | |
676 sub distance_to_node { | |
677 my $self = shift; | |
678 my $node = shift; | |
679 | |
680 my $ancestor = $self->find_first_shared_ancestor($node); | |
681 if (!$ancestor) { | |
682 return undef; | |
683 } | |
684 my $distance = $self->distance_to_ancestor($ancestor); | |
685 $distance += $node->distance_to_ancestor($ancestor); | |
686 | |
687 return $distance; | |
688 } | |
689 | |
690 | |
691 # Returns a TreeI-compliant object based on this NestedSet. | |
692 sub get_TreeI { | |
693 my $self = shift; | |
694 my $newick = $self->newick_format(); | |
695 | |
696 open(my $fake_fh, "+<", \$newick); | |
697 my $treein = new Bio::TreeIO(-fh => $fake_fh, -format => 'newick'); | |
698 my $treeI = $treein->next_tree; | |
699 $treein->close; | |
700 | |
701 return $treeI; | |
702 } | |
703 | |
704 sub new_from_newick { | |
705 my $class = shift; | |
706 my $file = shift; | |
707 my $treein = new Bio::TreeIO(-file => $file, -format => 'newick'); | |
708 my $treeI = $treein->next_tree; | |
709 $treein->close; | |
710 | |
711 return $class->new_from_TreeI($treeI); | |
712 } | |
713 | |
714 sub new_from_TreeI { | |
715 my $class = shift; | |
716 my $treeI = shift; | |
717 | |
718 my $rootI = $treeI->get_root_node; | |
719 my $node = new $class; | |
720 | |
721 # Kick off the recursive, parallel node adding. | |
722 _add_nodeI_to_node($node,$rootI); | |
723 | |
724 return $node; | |
725 } | |
726 | |
727 # Recursive helper for new_from_TreeI. | |
728 sub _add_nodeI_to_node { | |
729 my $node = shift; # Our node object (Compara) | |
730 my $nodeI = shift; # Our nodeI object (BioPerl) | |
731 | |
732 foreach my $c ($nodeI->each_Descendent) { | |
733 my $child = ref($node)->new; | |
734 | |
735 my $name = $c->id || ''; | |
736 $name =~ s/^\s+//; | |
737 $name =~ s/\s+$//; | |
738 | |
739 # Set name. | |
740 $child->name($name); | |
741 | |
742 # Set branch length. | |
743 $node->add_child($child,$c->branch_length); | |
744 | |
745 # Recurse. | |
746 _add_nodeI_to_node($child,$c); | |
747 } | |
748 } | |
749 | |
750 =head2 print_tree | |
751 | |
752 Arg [1] : int $scale | |
753 Example : $this_node->print_tree(100); | |
754 Description : Prints this tree in ASCII format. The scale is used to define | |
755 the width of the tree in the output | |
756 Returntype : undef | |
757 Exceptions : | |
758 Caller : general | |
759 Status : At risk (as the output might change) | |
760 | |
761 =cut | |
762 | |
763 sub print_tree { | |
764 my $self = shift; | |
765 my $scale = shift; | |
766 | |
767 $scale = 100 unless($scale); | |
768 $self->_internal_print_tree(undef, 0, $scale); | |
769 } | |
770 | |
771 sub string_tree { | |
772 my $self = shift; | |
773 my $scale = shift; | |
774 $scale ||= 100; | |
775 my $buffer = ''; | |
776 $self->_internal_string_tree(undef, 0, $scale, \$buffer); | |
777 return $buffer; | |
778 } | |
779 | |
780 sub _internal_string_tree { | |
781 my $self = shift; | |
782 my $indent = shift; | |
783 my $lastone = shift; | |
784 my $scale = shift; | |
785 my $buffer = shift; | |
786 | |
787 if(defined($indent)) { | |
788 $$buffer .= $indent; | |
789 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $$buffer .= '-'; } | |
790 } | |
791 | |
792 $$buffer .= $self->string_node($indent); | |
793 | |
794 if(defined($indent)) { | |
795 if($lastone) { | |
796 chop($indent); | |
797 $indent .= " "; | |
798 } | |
799 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $indent .= ' '; } | |
800 } | |
801 $indent = '' unless(defined($indent)); | |
802 $indent .= "|"; | |
803 | |
804 my $children = $self->sorted_children; | |
805 my $count=0; | |
806 $lastone = 0; | |
807 foreach my $child_node (@$children) { | |
808 $count++; | |
809 $lastone = 1 if($count == scalar(@$children)); | |
810 $child_node->_internal_string_tree($indent,$lastone,$scale,$buffer); | |
811 } | |
812 } | |
813 | |
814 sub _internal_print_tree { | |
815 my $self = shift; | |
816 my $indent = shift; | |
817 my $lastone = shift; | |
818 my $scale = shift; | |
819 | |
820 if(defined($indent)) { | |
821 print($indent); | |
822 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { print('-'); } | |
823 } | |
824 | |
825 $self->print_node($indent); | |
826 | |
827 if(defined($indent)) { | |
828 if($lastone) { | |
829 chop($indent); | |
830 $indent .= " "; | |
831 } | |
832 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $indent .= ' '; } | |
833 } | |
834 $indent = '' unless(defined($indent)); | |
835 $indent .= "|"; | |
836 | |
837 my $children = $self->sorted_children; | |
838 my $count=0; | |
839 $lastone = 0; | |
840 foreach my $child_node (@$children) { | |
841 $count++; | |
842 $lastone = 1 if($count == scalar(@$children)); | |
843 $child_node->_internal_print_tree($indent,$lastone,$scale); | |
844 } | |
845 } | |
846 | |
847 | |
848 sub print_node { | |
849 my $self = shift; | |
850 print $self->string_node; | |
851 } | |
852 | |
853 sub string_node { | |
854 my $self = shift; | |
855 my $str = '('; | |
856 | |
857 my $isdup = 0; | |
858 $isdup = 1 if ($self->get_tagvalue('Duplication', 0) > 0 and not $self->get_tagvalue('dubious_duplication', 0)); | |
859 $isdup = 1 if $self->get_tagvalue('node_type', '') eq 'duplication'; | |
860 | |
861 my $isdub = ($self->get_tagvalue('node_type', '') eq 'dubious'); | |
862 | |
863 if ($isdup) { | |
864 my $taxon_name = $self->get_tagvalue('taxon_name', ''); | |
865 if ($taxon_name =~ /\S+\ \S+/) { | |
866 $str .= "Dup "; | |
867 } else { | |
868 $str .= "DUP "; | |
869 } | |
870 my $sis = $self->get_tagvalue('duplication_confidence_score', 0) * 100; | |
871 $str .= sprintf('SIS=%d ', $sis); | |
872 } elsif ($isdub) { | |
873 $str .= "DD "; | |
874 $str .= 'SIS=0 '; | |
875 } | |
876 if($self->has_tag("bootstrap")) { my $bootstrap_value = $self->get_tagvalue("bootstrap"); $str .= "B=$bootstrap_value "; } | |
877 if($self->has_tag("taxon_name")) { my $taxon_name_value = $self->get_tagvalue("taxon_name"); $str .="T=$taxon_name_value "; } | |
878 $str .= sprintf("%s %d,%d)", $self->node_id, $self->left_index, $self->right_index); | |
879 $str .= sprintf("%s\n", $self->name || ''); | |
880 return $str; | |
881 } | |
882 | |
883 | |
884 =head2 newick_format | |
885 | |
886 Arg [1] : string $format_mode | |
887 Example : $this_node->newick_format("full"); | |
888 Description : Prints this tree in Newick format. Several modes are | |
889 available: full, display_label_composite, simple, species, | |
890 species_short_name, ncbi_taxon, ncbi_name, njtree and phylip | |
891 Returntype : string | |
892 Exceptions : | |
893 Caller : general | |
894 Status : Stable | |
895 | |
896 =cut | |
897 | |
898 my %ryo_modes = ( | |
899 'member_id' => '%{^-m}:%{d}', | |
900 'member_id_taxon_id' => '%{-m}%{"_"-x}:%{d}', | |
901 'display_label_composite' => '%{-l"_"}%{n}%{"_"-s}:%{d}', | |
902 'full_common' => '%{n}%{" "-c.^}%{"."-g}%{"_"-t"_MYA"}:%{d}', | |
903 'gene_stable_id_composite' => '%{-i"_"}%{n}%{"_"-s}:%{d}', | |
904 'gene_stable_id' => '%{-i}:%{d}', | |
905 'ncbi_taxon' => '%{o}', | |
906 'ncbi_name' => '%{n}', | |
907 'simple' => '%{^-n}:%{d}', | |
908 'full' => '%{n}:%{d}', | |
909 'species' => '%{^-S|p}', | |
910 'species_short_name' => '%{^-s|p}', | |
911 'otu_id' => '%{-s"|"}%{-l"|"}%{n}:%{d}', | |
912 'int_node_id' => '%{-n}%{o-}:%{d}', | |
913 'full_web' => '%{n-}%{-n|p}%{"_"-s"_"}%{":"d}', | |
914 'phylip' => '%21{n,}:%{d}', | |
915 'njtree' => '%{o}%{-T(is_incomplete)|E"*"}%{-T(is_incomplete,0,*)}', | |
916 ); | |
917 | |
918 my $nhx0 = '%{n-_|T(taxon_name)}:%{d}'; | |
919 my $nhx1 = ':D=%{-E"N"}%{T(node_type,duplication,Y)-}%{T(node_type,dubious,Y)-}%{T(node_type,gene_split,Y)-}%{T(node_type,speciation,N)}%{":B="T(bootstrap)}'; | |
920 my $nhx2 = ':T=%{-x}%{T(taxon_id)-}'; | |
921 | |
922 my %nhx_ryo_modes_1 = ( | |
923 'member_id_taxon_id' => '%{-m}%{o-}_%{-x}%{T(taxon_id)-}:%{d}', | |
924 'protein_id' => '%{-n}'.$nhx0, | |
925 'transcript_id' => '%{-r}'.$nhx0, | |
926 'gene_id' => '%{-i}'.$nhx0, | |
927 'full' => $nhx0, | |
928 'full_web' => $nhx0, | |
929 'display_label' => '%{-L|i}%{"_"-s}'.$nhx0, | |
930 'display_label_composite' => '%{-L"_"}%{-i}%{"_"-s}'.$nhx0, | |
931 'treebest_ortho' => '%{-m}%{"_"-x}'.$nhx0, | |
932 'simple' => $ryo_modes{'simple'}, | |
933 'phylip' => $ryo_modes{'phylip'}, | |
934 ); | |
935 | |
936 my %nhx_ryo_modes_2 = ( | |
937 'member_id_taxon_id' => $nhx1.$nhx2, | |
938 'protein_id' => $nhx1.'%{":G="-i}'.$nhx2, | |
939 'transcript_id' => $nhx1.'%{":G="-i}'.$nhx2, | |
940 'gene_id' => $nhx1.'%{":G="-r}'.$nhx2, | |
941 'full' => $nhx1.$nhx2, | |
942 'full_web' => $nhx1.$nhx2, | |
943 'display_label' => $nhx1.$nhx2, | |
944 'display_label_composite' => $nhx1.$nhx2, | |
945 'treebest_ortho' => $nhx1.$nhx2.':S=%{-x}%{T(taxon_id)-}', | |
946 ); | |
947 | |
948 | |
949 sub newick_format { | |
950 my $self = shift; | |
951 my $format_mode = shift; | |
952 | |
953 my $ryo_string; | |
954 | |
955 if (not defined $format_mode) { | |
956 $ryo_string = $ryo_modes{'full'}; | |
957 | |
958 } elsif ($format_mode eq "ryo") { | |
959 $ryo_string = shift @_; | |
960 | |
961 } elsif (defined $ryo_modes{$format_mode}) { | |
962 $ryo_string = $ryo_modes{$format_mode}; | |
963 | |
964 } else { | |
965 throw("Unrecognized format '$format_mode'. Please use 'ryo' to introduce a roll-your-own format string\n"); | |
966 } | |
967 return $self->_internal_newick_format_ryo($ryo_string); | |
968 } | |
969 | |
970 sub nhx_format { | |
971 my ($self, $format_mode) = @_; | |
972 my $ryo_string1; | |
973 my $ryo_string2; | |
974 | |
975 if (not defined $format_mode) { | |
976 $ryo_string1 = $nhx_ryo_modes_1{'protein_id'}; | |
977 $ryo_string2 = $nhx_ryo_modes_2{'protein_id'}; | |
978 | |
979 } elsif ($format_mode eq "ryo") { | |
980 $ryo_string1 = shift @_; | |
981 $ryo_string2 = shift @_; | |
982 | |
983 } elsif (defined $nhx_ryo_modes_1{$format_mode}) { | |
984 $ryo_string1 = $nhx_ryo_modes_1{$format_mode}; | |
985 $ryo_string2 = $nhx_ryo_modes_2{$format_mode}; | |
986 | |
987 } else { | |
988 throw("Unrecognized format '$format_mode'. Please use 'ryo' to introduce a roll-your-own format string\n"); | |
989 } | |
990 my $fmt = $ryo_string1; | |
991 $fmt = $ryo_string1.'[&&NHX'.$ryo_string2.']' if defined $ryo_string2; | |
992 return $self->_internal_newick_format_ryo($fmt); | |
993 } | |
994 | |
995 sub _internal_newick_format_ryo { | |
996 my ($self, $ryo_string) = @_; | |
997 my $newick_str; | |
998 eval { | |
999 use Bio::EnsEMBL::Compara::FormatTree; | |
1000 my $ryo_formatter = Bio::EnsEMBL::Compara::FormatTree->new($ryo_string); | |
1001 $newick_str = $ryo_formatter->format_newick($self); | |
1002 }; | |
1003 if ($@) { | |
1004 throw("Something bad happened while trying to stringify the tree: $@\n"); | |
1005 } | |
1006 return "$newick_str;"; | |
1007 } | |
1008 | |
1009 =head2 newick_simple_format | |
1010 | |
1011 DEPRECATED. Use newick_format("simple") instead | |
1012 | |
1013 =cut | |
1014 | |
1015 sub newick_simple_format { | |
1016 my $self = shift; | |
1017 deprecate('Use newick_format("simple") instead.'); | |
1018 return $self->newick_format('simple'); | |
1019 } | |
1020 | |
1021 | |
1022 ################################## | |
1023 # | |
1024 # Set theory methods | |
1025 # | |
1026 ################################## | |
1027 | |
1028 #sub equals { | |
1029 # my $self = shift; | |
1030 # my $other = shift; | |
1031 # throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$other]") | |
1032 # unless($other->isa('Bio::EnsEMBL::Compara::NestedSet')); | |
1033 # return 1 if($self->node_id eq $other->node_id); | |
1034 # foreach my $child (@{$self->children}) { | |
1035 # return 0 unless($other->has_child($child)); | |
1036 # } | |
1037 # return 1; | |
1038 #} | |
1039 | |
1040 sub has_child { | |
1041 my $self = shift; | |
1042 my $child = shift; | |
1043 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$child]") | |
1044 unless($child->isa('Bio::EnsEMBL::Compara::NestedSet')); | |
1045 $self->load_children_if_needed; | |
1046 my $link = $self->link_for_neighbor($child); | |
1047 return 0 unless($link); | |
1048 return 0 if($self->{'_parent_link'} and ($self->{'_parent_link'}->equals($link))); | |
1049 return 1; | |
1050 } | |
1051 | |
1052 sub is_member_of { | |
1053 my $A = shift; | |
1054 my $B = shift; | |
1055 return 1 if($B->has_child($A)); | |
1056 return 0; | |
1057 } | |
1058 | |
1059 sub is_subset_of { | |
1060 my $A = shift; | |
1061 my $B = shift; | |
1062 foreach my $child (@{$A->children}) { | |
1063 return 0 unless($B->has_child($child)); | |
1064 } | |
1065 return 1; | |
1066 } | |
1067 | |
1068 sub is_leaf { | |
1069 my $self = shift; | |
1070 return 1 unless($self->get_child_count); | |
1071 return 0; | |
1072 } | |
1073 | |
1074 sub merge_children { | |
1075 my $self = shift; | |
1076 my $nset = shift; | |
1077 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$nset]") | |
1078 unless($nset->isa('Bio::EnsEMBL::Compara::NestedSet')); | |
1079 foreach my $child_node (@{$nset->children}) { | |
1080 $self->add_child($child_node, $child_node->distance_to_parent); | |
1081 } | |
1082 return $self; | |
1083 } | |
1084 | |
1085 sub merge_node_via_shared_ancestor { | |
1086 my $self = shift; | |
1087 my $node = shift; | |
1088 | |
1089 my $node_dup = $self->find_node_by_node_id($node->node_id); | |
1090 if($node_dup) { | |
1091 #warn("trying to merge in a node with already exists\n"); | |
1092 return $node_dup; | |
1093 } | |
1094 return undef unless($node->parent); | |
1095 | |
1096 my $ancestor = $self->find_node_by_node_id($node->parent->node_id); | |
1097 if($ancestor) { | |
1098 $ancestor->add_child($node); | |
1099 #print("common ancestor at : "); $ancestor->print_node; | |
1100 return $ancestor; | |
1101 } | |
1102 return $self->merge_node_via_shared_ancestor($node->parent); | |
1103 } | |
1104 | |
1105 | |
1106 sub extract_subtree_from_leaves { | |
1107 my $self = shift; | |
1108 my $copy = $self->copy; | |
1109 my $node_ids = shift; # Array ref of node_ids. | |
1110 my @keepers = @{$node_ids}; | |
1111 my @all = @{$copy->get_all_nodes}; | |
1112 | |
1113 # Add all ancestors of kept nodes to the keep list. | |
1114 my @all_keepers = (); | |
1115 foreach my $keeper (@keepers) { | |
1116 my $node = $copy->find_node_by_node_id($keeper); | |
1117 push @all_keepers, $keeper; | |
1118 | |
1119 my $parent = $node->parent; | |
1120 while (defined $parent) { | |
1121 push @all_keepers, $parent->node_id; | |
1122 $parent = $parent->parent; | |
1123 } | |
1124 } | |
1125 | |
1126 my @remove_me = (); | |
1127 foreach my $node (@all) { | |
1128 push @remove_me, $node unless (grep {$node->node_id == $_} @all_keepers); | |
1129 } | |
1130 $copy->remove_nodes(\@remove_me); | |
1131 return $copy; | |
1132 } | |
1133 | |
1134 | |
1135 ################################## | |
1136 # | |
1137 # nested_set manipulations | |
1138 # | |
1139 ################################## | |
1140 | |
1141 | |
1142 =head2 flatten_tree | |
1143 | |
1144 Overview : Removes all internal nodes and attaches leaves to the tree root, creating | |
1145 a "flattened" star tree structure. | |
1146 Example : $node->flatten_tree(); | |
1147 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet | |
1148 Exceptions : none | |
1149 Caller : general | |
1150 | |
1151 =cut | |
1152 | |
1153 sub flatten_tree { | |
1154 my $self = shift; | |
1155 | |
1156 my $leaves = $self->get_all_leaves; | |
1157 foreach my $leaf (@{$leaves}) { | |
1158 $leaf->disavow_parent; | |
1159 } | |
1160 | |
1161 $self->release_children; | |
1162 foreach my $leaf (@{$leaves}) { | |
1163 $self->add_child($leaf, 0.0); | |
1164 } | |
1165 | |
1166 return $self; | |
1167 } | |
1168 | |
1169 =head2 re_root | |
1170 | |
1171 Overview : rearranges the tree structure so that the root is moved to | |
1172 beetween this node and its parent. If the old root was more than | |
1173 bifurcated (2 children) a new node is created where it was to hold | |
1174 the multiple children that arises from the re-rooting. | |
1175 The old root is returned. | |
1176 Example : $node->re_root(); | |
1177 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet | |
1178 Exceptions : none | |
1179 Caller : general | |
1180 | |
1181 =cut | |
1182 | |
1183 sub re_root { | |
1184 my $self = shift; | |
1185 | |
1186 return $self unless($self->parent); #I'm root so just return self | |
1187 | |
1188 my $root = $self->root; | |
1189 my $tmp_root = new Bio::EnsEMBL::Compara::NestedSet; | |
1190 $tmp_root->merge_children($root); | |
1191 | |
1192 my $parent = $self->parent; | |
1193 my $dist = $self->distance_to_parent; | |
1194 $self->disavow_parent; | |
1195 | |
1196 my $old_root = $parent->_invert_tree_above; | |
1197 $old_root->minimize_node; | |
1198 | |
1199 $root->add_child($parent, $dist / 2.0); | |
1200 $root->add_child($self, $dist / 2.0); | |
1201 | |
1202 return $root; | |
1203 } | |
1204 | |
1205 | |
1206 sub _invert_tree_above { | |
1207 my $self = shift; | |
1208 return $self unless($self->parent); | |
1209 | |
1210 my $old_root = $self->parent->_invert_tree_above; | |
1211 #now my parent has been inverted so it is the new root | |
1212 | |
1213 #flip the direction of the link between myself and my parent | |
1214 $self->parent->_set_parent_link($self->{'_parent_link'}); | |
1215 $self->_set_parent_link(undef); | |
1216 | |
1217 #now I'm the new root and the old root might need to be modified | |
1218 return $old_root; | |
1219 } | |
1220 | |
1221 | |
1222 sub build_leftright_indexing { | |
1223 my $self = shift; | |
1224 my $counter = shift; | |
1225 | |
1226 $counter = 1 unless($counter); | |
1227 | |
1228 $self->left_index($counter++); | |
1229 foreach my $child_node (@{$self->sorted_children}) { | |
1230 $counter = $child_node->build_leftright_indexing($counter); | |
1231 } | |
1232 $self->right_index($counter++); | |
1233 return $counter; | |
1234 } | |
1235 | |
1236 | |
1237 =head2 remove_nodes | |
1238 | |
1239 Arg [1] : arrayref Bio::EnsEMBL::Compara::NestedSet $nodes | |
1240 Example : my $ret_tree = $tree->remove_nodes($nodes); | |
1241 Description : Returns the tree with removed nodes in list. Nodes should be in the tree. | |
1242 Returntype : Bio::EnsEMBL::Compara::NestedSet object | |
1243 Exceptions : | |
1244 Caller : general | |
1245 Status : At risk (behaviour on exceptions could change) | |
1246 | |
1247 =cut | |
1248 | |
1249 sub remove_nodes { | |
1250 my $self = shift; | |
1251 my $nodes = shift; | |
1252 | |
1253 foreach my $node (@$nodes) { | |
1254 if ($node->is_leaf) { | |
1255 $node->disavow_parent; | |
1256 $self = $self->minimize_tree; | |
1257 } else { | |
1258 my $node_children = $node->children; | |
1259 foreach my $child (@$node_children) { | |
1260 $node->parent->add_child($child); | |
1261 } | |
1262 $node->disavow_parent; | |
1263 } | |
1264 # Delete dangling one-child trees (help memory manager) | |
1265 if ($self->get_child_count == 1) { | |
1266 my $child = $self->children->[0]; | |
1267 $child->parent->merge_children($child); | |
1268 $child->disavow_parent; | |
1269 return undef; | |
1270 } | |
1271 # Could be zero if all asked to delete, so return undef instead of | |
1272 # fake one-node tree. | |
1273 if ($self->get_child_count < 2) { | |
1274 return undef; | |
1275 } | |
1276 } | |
1277 return $self; | |
1278 } | |
1279 | |
1280 | |
1281 =head2 delete_lineage | |
1282 | |
1283 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node | |
1284 Example : $tree->delete_lineage($node); | |
1285 Description : Removes $node from tree. Nodes should be in the tree. | |
1286 Returntype : | |
1287 Exceptions : | |
1288 Caller : general | |
1289 Status : At risk (behaviour on exceptions could change) | |
1290 | |
1291 =cut | |
1292 | |
1293 sub delete_lineage { | |
1294 my $self = shift; | |
1295 my $del_me = shift; | |
1296 | |
1297 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$self]") | |
1298 unless ($self->isa('Bio::EnsEMBL::Compara::NestedSet')); | |
1299 | |
1300 my $parent = $del_me->parent; | |
1301 while ($parent) { | |
1302 my $num_children = scalar @{$parent->children}; | |
1303 if ($num_children > 1) { | |
1304 $self->remove_nodes([$del_me]); | |
1305 return $self; | |
1306 } elsif ($num_children == 1) { | |
1307 $self->remove_nodes([$del_me]); | |
1308 $del_me = $parent; | |
1309 $parent = $del_me->parent; | |
1310 } | |
1311 } | |
1312 return $self; | |
1313 } | |
1314 | |
1315 =head2 minimize_tree | |
1316 | |
1317 Arg [1] : -none- | |
1318 Example : $leaf->disavow_parent(); | |
1319 $tree = $tree->minimize_tree(); | |
1320 Description : Returns the tree after removing internal nodes that do not | |
1321 represent an multi- or bi-furcation anymore. This is typically | |
1322 required after disavowing a node. Please ensure you use the | |
1323 object returned by the method and not the original object | |
1324 anymore! | |
1325 Returntype : Bio::EnsEMBL::Compara::NestedSet object | |
1326 Exceptions : | |
1327 Caller : general | |
1328 Status : Stable | |
1329 | |
1330 =cut | |
1331 | |
1332 sub minimize_tree { | |
1333 my $self = shift; | |
1334 return $self if($self->is_leaf); | |
1335 | |
1336 foreach my $child (@{$self->children}) { | |
1337 $child->minimize_tree; | |
1338 } | |
1339 return $self->minimize_node; | |
1340 } | |
1341 | |
1342 | |
1343 sub minimize_node { | |
1344 my $self = shift; | |
1345 | |
1346 return $self unless($self->get_child_count() == 1); | |
1347 | |
1348 my $child = $self->children->[0]; | |
1349 my $dist = $child->distance_to_parent + $self->distance_to_parent; | |
1350 if ($self->parent) { | |
1351 $self->parent->add_child($child, $dist); | |
1352 $self->disavow_parent; | |
1353 } else { | |
1354 $child->disavow_parent; | |
1355 } | |
1356 return $child | |
1357 } | |
1358 | |
1359 | |
1360 sub scale { | |
1361 my $self = shift; | |
1362 my $scale = shift; | |
1363 | |
1364 foreach my $node (@{$self->get_all_nodes}) { | |
1365 my $bl = $node->distance_to_parent; | |
1366 $bl = 0 unless (defined $bl); | |
1367 $node->distance_to_parent($bl*$scale); | |
1368 } | |
1369 return $self; | |
1370 } | |
1371 | |
1372 | |
1373 sub scale_max_to { | |
1374 my $self = shift; | |
1375 my $new_max = shift; | |
1376 | |
1377 my $max_dist = $self->max_distance; | |
1378 my $scale_factor = $new_max / $max_dist; | |
1379 return $self->scale($scale_factor); | |
1380 } | |
1381 | |
1382 | |
1383 | |
1384 ################################## | |
1385 # | |
1386 # search methods | |
1387 # | |
1388 ################################## | |
1389 | |
1390 sub find_node_by_name { | |
1391 my $self = shift; | |
1392 my $name = shift; | |
1393 | |
1394 return $self if((defined $self->name) && $name eq $self->name); | |
1395 | |
1396 my $children = $self->children; | |
1397 foreach my $child_node (@$children) { | |
1398 my $found = $child_node->find_node_by_name($name); | |
1399 return $found if(defined($found)); | |
1400 } | |
1401 | |
1402 return undef; | |
1403 } | |
1404 | |
1405 sub find_node_by_node_id { | |
1406 my $self = shift; | |
1407 my $node_id = shift; | |
1408 | |
1409 return $self if($node_id eq $self->node_id); | |
1410 | |
1411 my $children = $self->children; | |
1412 foreach my $child_node (@$children) { | |
1413 my $found = $child_node->find_node_by_node_id($node_id); | |
1414 return $found if(defined($found)); | |
1415 } | |
1416 | |
1417 return undef; | |
1418 } | |
1419 | |
1420 sub find_leaf_by_name { | |
1421 my $self = shift; | |
1422 my $name = shift; | |
1423 | |
1424 return $self if((defined $self->name) and ($name eq $self->name)); | |
1425 | |
1426 my $leaves = $self->get_all_leaves; | |
1427 foreach my $leaf (@$leaves) { | |
1428 return $leaf if((defined $leaf->name) and ($name eq $leaf->name)); | |
1429 } | |
1430 | |
1431 return undef; | |
1432 } | |
1433 | |
1434 sub find_leaf_by_node_id { | |
1435 my $self = shift; | |
1436 my $node_id = shift; | |
1437 | |
1438 return $self if($node_id eq $self->node_id); | |
1439 | |
1440 my $leaves = $self->get_all_leaves; | |
1441 foreach my $leaf (@$leaves) { | |
1442 return $leaf if($node_id eq $leaf->node_id); | |
1443 } | |
1444 | |
1445 return undef; | |
1446 } | |
1447 | |
1448 | |
1449 =head2 get_all_sorted_leaves | |
1450 | |
1451 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $top_leaf | |
1452 Arg [...] : (optional) Bio::EnsEMBL::Compara::NestedSet $secondary_priority_leaf | |
1453 Example : my $sorted_leaves = $object->get_all_sorted_leaves($human_leaf); | |
1454 Example : my $sorted_leaves = $object->get_all_sorted_leaves($human_leaf, $mouse_leaf); | |
1455 Description : Sorts the tree such as $top_leaf is the first leave and returns | |
1456 all the other leaves in the order defined by the tree. | |
1457 It is possible to define as many secondary top leaves as you require | |
1458 to sort other branches of the tree. The priority to sort the trees | |
1459 is defined by the order in which you specify the leaves. | |
1460 Returntype : listref of Bio::EnsEMBL::Compara::NestedSet (all sorted leaves) | |
1461 Exceptions : none | |
1462 Caller : general | |
1463 Status : Stable | |
1464 | |
1465 =cut | |
1466 | |
1467 sub get_all_sorted_leaves { | |
1468 my ($self, @priority_leaves) = @_; | |
1469 | |
1470 if (!@priority_leaves) { | |
1471 return $self->get_all_leaves; | |
1472 } | |
1473 | |
1474 # Assign priority scores for all parent nodes of the priority leaves | |
1475 my $score_by_node; | |
1476 my $score = 0; | |
1477 # Loop through all the priority leaves, starting from the last one (lowest score) | |
1478 while (my $priority_leaf = pop @priority_leaves) { | |
1479 $score++; # Increases the score, next priority leaves (earlier in the argument list) will overwrite the score if needed | |
1480 my $this_node = $priority_leaf; | |
1481 # Loop through all the parent node up to the root of the tree | |
1482 do { | |
1483 $score_by_node->{$this_node} = $score; | |
1484 $this_node = $this_node->parent; | |
1485 } while ($this_node); | |
1486 } | |
1487 | |
1488 my $sorted_leaves = $self->_recursive_get_all_sorted_leaves($score_by_node); | |
1489 | |
1490 return $sorted_leaves; | |
1491 } | |
1492 | |
1493 =head2 _recursive_get_all_sorted_leaves | |
1494 | |
1495 Arg [1] : hashref $score_by_node | |
1496 Example : my $sorted_leaves = $object->_recursive_get_all_sorted_leaves($score_by_node); | |
1497 Description : Recursive code for the get_all_sorted_leaves() method | |
1498 Returntype : listref of Bio::EnsEMBL::Compara::NestedSet (sorted leaves) | |
1499 Exceptions : none | |
1500 Caller : private | |
1501 Status : Stable | |
1502 | |
1503 =cut | |
1504 | |
1505 sub _recursive_get_all_sorted_leaves { | |
1506 my $self = shift; | |
1507 my $score_by_node = shift; | |
1508 | |
1509 my $sorted_leaves = []; | |
1510 my $children = $self->children; | |
1511 | |
1512 if (@$children == 0) { | |
1513 $sorted_leaves = [$self]; | |
1514 } else { | |
1515 $children = [sort { | |
1516 ($score_by_node->{$b} || $score_by_node->{$a}) ? (($score_by_node->{$b} || 0)<=>($score_by_node->{$a} || 0)) : ($a->node_id <=> $b->node_id) | |
1517 } @$children]; | |
1518 for (my $i = 0; $i < @$children; $i++) { | |
1519 push(@$sorted_leaves, @{$children->[$i]->_recursive_get_all_sorted_leaves($score_by_node)}); | |
1520 } | |
1521 } | |
1522 | |
1523 return $sorted_leaves; | |
1524 } | |
1525 | |
1526 =head2 get_all_leaves | |
1527 | |
1528 Title : get_all_leaves | |
1529 Usage : my @leaves = @{$tree->get_all_leaves}; | |
1530 Function: searching from the given starting node, searches and creates list | |
1531 of all leaves in this subtree and returns by reference | |
1532 Example : | |
1533 Returns : reference to list of NestedSet objects (all leaves) | |
1534 Args : none | |
1535 | |
1536 =cut | |
1537 | |
1538 sub get_all_leaves { | |
1539 my $self = shift; | |
1540 | |
1541 my $leaves = {}; | |
1542 $self->_recursive_get_all_leaves($leaves); | |
1543 my @leaf_list = sort {$a->node_id <=> $b->node_id} values(%{$leaves}); | |
1544 return \@leaf_list; | |
1545 } | |
1546 | |
1547 sub _recursive_get_all_leaves { | |
1548 my $self = shift; | |
1549 my $leaves = shift; | |
1550 | |
1551 $leaves->{$self->obj_id} = $self if($self->is_leaf); | |
1552 | |
1553 foreach my $child (@{$self->children}) { | |
1554 no warnings 'recursion'; | |
1555 $child->_recursive_get_all_leaves($leaves); | |
1556 } | |
1557 } | |
1558 | |
1559 | |
1560 sub get_all_leaves_indexed { | |
1561 my $self = shift; | |
1562 deprecate("Use Bio::EnsEMBL::Compara::DBSQL::GeneTreeNodeAdaptor->fetch_all_leaves_indexed() method instead"); | |
1563 | |
1564 my @leaf_list = @{$self->adaptor->fetch_all_leaves_indexed($self)}; | |
1565 | |
1566 return \@leaf_list; | |
1567 } | |
1568 | |
1569 =head2 max_distance | |
1570 | |
1571 Title : max_distance | |
1572 Args : none | |
1573 Usage : $tree_node->max_distance; | |
1574 Function: searching from the given starting node, calculates the maximum distance to a leaf | |
1575 Returns : int | |
1576 | |
1577 =cut | |
1578 | |
1579 sub max_distance { | |
1580 my $self = shift; | |
1581 | |
1582 my $max_distance = 0; | |
1583 | |
1584 foreach my $child (@{$self->children}) { | |
1585 my $distance = $child->max_distance; | |
1586 $max_distance = $distance if($distance>$max_distance); | |
1587 } | |
1588 | |
1589 return ($self->distance_to_parent + $max_distance); | |
1590 } | |
1591 | |
1592 | |
1593 =head2 max_depth | |
1594 | |
1595 Title : max_depth | |
1596 Args : none | |
1597 Usage : $tree_node->max_depth; | |
1598 Function: searching from the given starting node, calculates the maximum depth to a leaf | |
1599 Returns : int | |
1600 | |
1601 =cut | |
1602 | |
1603 sub max_depth { | |
1604 my $self = shift; | |
1605 | |
1606 my $max_depth = 0; | |
1607 | |
1608 foreach my $child (@{$self->children}) { | |
1609 my $depth = $child->max_depth + 1; | |
1610 $max_depth=$depth if($depth>$max_depth); | |
1611 } | |
1612 return $max_depth; | |
1613 } | |
1614 | |
1615 | |
1616 =head2 find_first_shared_ancestor | |
1617 | |
1618 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node | |
1619 Example : my $ancestor = $this_node->find_first_shared_ancestor($other_node); | |
1620 Description : Gets the first common ancestor between this node and the other one. | |
1621 Returntype : Bio::EnsEMBL::Compara::NestedSet object | |
1622 Exceptions : | |
1623 Caller : general | |
1624 Status : Stable | |
1625 | |
1626 =cut | |
1627 | |
1628 sub find_first_shared_ancestor { | |
1629 my $self = shift; | |
1630 my $node = shift; | |
1631 | |
1632 return $self if($self->equals($node)); | |
1633 return $node if($self->has_ancestor($node)); | |
1634 return $self->find_first_shared_ancestor($node->parent); | |
1635 } | |
1636 | |
1637 | |
1638 sub find_first_shared_ancestor_from_leaves { | |
1639 my $self = shift; | |
1640 my $leaf_list = shift; | |
1641 | |
1642 my @leaves = @{$leaf_list}; | |
1643 | |
1644 my $ancestor = shift @leaves; | |
1645 while (scalar @leaves > 0) { | |
1646 my $node = shift @leaves; | |
1647 $ancestor = $ancestor->find_first_shared_ancestor($node); | |
1648 } | |
1649 return $ancestor; | |
1650 } | |
1651 | |
1652 | |
1653 ################################## | |
1654 # | |
1655 # developer/adaptor API methods | |
1656 # | |
1657 ################################## | |
1658 | |
1659 | |
1660 # used for building tree from a DB fetch, want to restrict users to create trees | |
1661 # by only -add_child method | |
1662 sub _set_parent_link { | |
1663 my ($self, $link) = @_; | |
1664 | |
1665 $self->{'_parent_id'} = 0; | |
1666 $self->{'_parent_link'} = $link; | |
1667 $self->{'_parent_id'} = $link->get_neighbor($self)->node_id if($link); | |
1668 return $self; | |
1669 } | |
1670 | |
1671 | |
1672 # used for building tree from a DB fetch until all the objects are in memory | |
1673 sub _parent_id { | |
1674 my $self = shift; | |
1675 $self->{'_parent_id'} = shift if(@_); | |
1676 return $self->{'_parent_id'}; | |
1677 } | |
1678 | |
1679 # used for building tree from a DB fetch until all the objects are in memory | |
1680 sub _root_id { | |
1681 my $self = shift; | |
1682 $self->{'_root_id'} = shift if(@_); | |
1683 return $self->{'_root_id'}; | |
1684 } | |
1685 | |
1686 1; | |
1687 |