0
|
1 =head1 NAME
|
|
2
|
|
3 NestedSet - DESCRIPTION of Object
|
|
4
|
|
5 =head1 SYNOPSIS
|
|
6
|
|
7 =head1 DESCRIPTION
|
|
8
|
|
9 Abstract superclass to encapsulate the process of storing and manipulating a
|
|
10 nested-set representation tree. Also implements a 'reference count' system
|
|
11 based on the ObjectiveC retain/release design.
|
|
12 Designed to be used as the Root class for all Compara 'proxy' classes
|
|
13 (Member, GenomeDB, DnaFrag, NCBITaxon) to allow them to be made into sets and trees.
|
|
14
|
|
15 =head1 CONTACT
|
|
16
|
|
17 Contact Albert Vilella on implementation detail: avilella@ebi.ac.uk
|
|
18 Contact Jessica Severin on implementation/design detail: jessica@ebi.ac.uk
|
|
19 Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
|
|
20
|
|
21 =head1 APPENDIX
|
|
22
|
|
23 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
24
|
|
25 =cut
|
|
26
|
|
27
|
|
28
|
|
29 package Bio::EnsEMBL::Compara::NestedSet;
|
|
30
|
|
31 use strict;
|
|
32 use warnings;
|
|
33 use Bio::EnsEMBL::Utils::Exception;
|
|
34 use Bio::EnsEMBL::Utils::Argument;
|
|
35
|
|
36 use Bio::EnsEMBL::Utils::Exception qw(deprecate throw);
|
|
37
|
|
38 use Bio::TreeIO;
|
|
39 use Bio::EnsEMBL::Compara::Graph::Node;
|
|
40 use Bio::EnsEMBL::Compara::Member;
|
|
41 our @ISA = qw(Bio::EnsEMBL::Compara::Graph::Node);
|
|
42
|
|
43 #################################################
|
|
44 # Factory methods
|
|
45 #################################################
|
|
46
|
|
47 sub init {
|
|
48 my $self = shift;
|
|
49 $self->SUPER::init;
|
|
50 return $self;
|
|
51 }
|
|
52
|
|
53 sub dealloc {
|
|
54 my $self = shift;
|
|
55
|
|
56 #printf("DEALLOC NestedSet refcount:%d ", $self->refcount); $self->print_node;
|
|
57 #$self->release_children;
|
|
58 return $self->SUPER::dealloc;
|
|
59 }
|
|
60
|
|
61 =head2 copy
|
|
62
|
|
63 Overview : creates copy of tree starting at this node going down
|
|
64 Example : my $clone = $self->copy;
|
|
65 Returntype : Bio::EnsEMBL::Compara::NestedSet
|
|
66 Exceptions : none
|
|
67 Caller : general
|
|
68
|
|
69 =cut
|
|
70
|
|
71 sub copy {
|
|
72 my $self = shift;
|
|
73
|
|
74 my $mycopy = $self->SUPER::copy;
|
|
75 bless $mycopy, ref $self;
|
|
76
|
|
77 $mycopy->distance_to_parent($self->distance_to_parent);
|
|
78 $mycopy->left_index($self->left_index);
|
|
79 $mycopy->right_index($self->right_index);
|
|
80
|
|
81 foreach my $child (@{$self->children}) {
|
|
82 $mycopy->add_child($child->copy);
|
|
83 }
|
|
84 return $mycopy;
|
|
85 }
|
|
86
|
|
87
|
|
88 =head2 release_tree
|
|
89
|
|
90 Overview : deletes and frees the memory used by this tree
|
|
91 and all the underlying nodes.
|
|
92 Example : $self->release_tree;
|
|
93 Returntype : undef
|
|
94 Exceptions : none
|
|
95 Caller : general
|
|
96
|
|
97 =cut
|
|
98
|
|
99 sub release_tree {
|
|
100 my $self = shift;
|
|
101
|
|
102 my $child_count = $self->get_child_count;
|
|
103 $self->disavow_parent;
|
|
104 $self->cascade_unlink if($child_count);
|
|
105 return undef;
|
|
106 }
|
|
107
|
|
108 #################################################
|
|
109 # Object variable methods
|
|
110 #################################################
|
|
111
|
|
112 sub left_index {
|
|
113 my $self = shift;
|
|
114 $self->{'_left_index'} = shift if(@_);
|
|
115 $self->{'_left_index'} = 0 unless(defined($self->{'_left_index'}));
|
|
116 return $self->{'_left_index'};
|
|
117 }
|
|
118
|
|
119 sub right_index {
|
|
120 my $self = shift;
|
|
121 $self->{'_right_index'} = shift if(@_);
|
|
122 $self->{'_right_index'} = 0 unless(defined($self->{'_right_index'}));
|
|
123 return $self->{'_right_index'};
|
|
124 }
|
|
125
|
|
126
|
|
127 #######################################
|
|
128 # Set manipulation methods
|
|
129 #######################################
|
|
130
|
|
131 =head2 add_child
|
|
132
|
|
133 Overview : attaches child nestedset node to this nested set
|
|
134 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $child
|
|
135 Arg [2] : (opt.) distance between this node and child
|
|
136 Example : $self->add_child($child);
|
|
137 Returntype : undef
|
|
138 Exceptions : if child is undef or not a NestedSet subclass
|
|
139 Caller : general
|
|
140
|
|
141 =cut
|
|
142
|
|
143 sub add_child {
|
|
144 my $self = shift;
|
|
145 my $child = shift;
|
|
146 my $dist = shift;
|
|
147
|
|
148 throw("child not defined")
|
|
149 unless(defined($child));
|
|
150 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$child]")
|
|
151 unless($child->isa('Bio::EnsEMBL::Compara::NestedSet'));
|
|
152
|
|
153 #printf("add_child: parent(%s) <-> child(%s)\n", $self->node_id, $child->node_id);
|
|
154
|
|
155 unless(defined($dist)) { $dist = $child->_distance; }
|
|
156
|
|
157 $child->disavow_parent;
|
|
158 #create_link_to_node is a safe method which checks if connection exists
|
|
159 my $link = $self->create_link_to_node($child);
|
|
160 $child->_set_parent_link($link);
|
|
161 $self->{'_children_loaded'} = 1;
|
|
162 $link->distance_between($dist);
|
|
163 return $link;
|
|
164 }
|
|
165
|
|
166
|
|
167 =head2 disavow_parent
|
|
168
|
|
169 Overview : unlink and release self from its parent
|
|
170 might cause self to delete if refcount reaches Zero.
|
|
171 Example : $self->disavow_parent
|
|
172 Returntype : undef
|
|
173 Caller : general
|
|
174
|
|
175 =cut
|
|
176
|
|
177 sub disavow_parent {
|
|
178 my $self = shift;
|
|
179
|
|
180 if($self->{'_parent_link'}) {
|
|
181 my $link = $self->{'_parent_link'};
|
|
182 #print("DISAVOW parent : "); $parent->print_node;
|
|
183 #print(" child : "); $self->print_node;
|
|
184 $link->dealloc;
|
|
185 }
|
|
186 $self->_set_parent_link(undef);
|
|
187 return undef;
|
|
188 }
|
|
189
|
|
190
|
|
191 =head2 release_children
|
|
192
|
|
193 Overview : recursive releases all children
|
|
194 will cause potential deletion of children if refcount reaches Zero.
|
|
195 Example : $self->release_children
|
|
196 Returntype : $self
|
|
197 Exceptions : none
|
|
198 Caller : general
|
|
199
|
|
200 =cut
|
|
201
|
|
202 sub release_children {
|
|
203 my $self = shift;
|
|
204
|
|
205 # by calling with parent, this preserved the link to the parent
|
|
206 # and thus doesn't unlink self
|
|
207 foreach my $child (@{$self->children}) {
|
|
208 $child->disavow_parent;
|
|
209 $child->release_children;
|
|
210 }
|
|
211 #$self->cascade_unlink($self->{'_parent_node'});
|
|
212 return $self;
|
|
213 }
|
|
214
|
|
215
|
|
216 =head2 parent
|
|
217
|
|
218 Overview : returns the parent NestedSet object for this node
|
|
219 Example : my $my_parent = $object->parent();
|
|
220 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
|
|
221 Exceptions : none
|
|
222 Caller : general
|
|
223
|
|
224 =cut
|
|
225
|
|
226 sub parent {
|
|
227 my $self = shift;
|
|
228 return $self->adaptor->fetch_parent_for_node($self) if defined $self->adaptor;
|
|
229 return $self->{'_parent_link'}->get_neighbor($self) if defined $self->{'_parent_link'};
|
|
230 return undef;
|
|
231 }
|
|
232
|
|
233
|
|
234 sub parent_link {
|
|
235 my $self = shift;
|
|
236 return $self->{'_parent_link'};
|
|
237 }
|
|
238
|
|
239 sub has_parent {
|
|
240 my $self = shift;
|
|
241 return 1 if($self->{'_parent_link'} or $self->_parent_id);
|
|
242 return 0;
|
|
243 }
|
|
244
|
|
245
|
|
246 sub has_ancestor {
|
|
247 my $self = shift;
|
|
248 my $ancestor = shift;
|
|
249 throw "[$ancestor] must be a Bio::EnsEMBL::Compara::NestedSet object"
|
|
250 unless ($ancestor and $ancestor->isa("Bio::EnsEMBL::Compara::NestedSet"));
|
|
251 my $node = $self->parent;
|
|
252 while($node) {
|
|
253 return 1 if($node->equals($ancestor));
|
|
254 $node = $node->parent;
|
|
255 }
|
|
256 return 0;
|
|
257 }
|
|
258
|
|
259
|
|
260 =head2 root
|
|
261
|
|
262 Overview : returns the root NestedSet object for this node
|
|
263 returns $self if node has no parent (this is the root)
|
|
264 Example : my $root = $object->root();
|
|
265 Description: Returns the root of the tree for this node
|
|
266 with links to all the intermediate nodes. Sister nodes
|
|
267 are not included in the result.
|
|
268 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
|
|
269 Exceptions : none
|
|
270 Caller : general
|
|
271
|
|
272 =cut
|
|
273
|
|
274 sub root {
|
|
275 my $self = shift;
|
|
276
|
|
277 # Only if we don't have it cached
|
|
278 # Only if we have left and right and it's not a leaf
|
|
279 # Only if it's for release clusterset (1 genetrees - 0 genomic align trees)
|
|
280 if (!defined($self->{'_parent_link'}) and $self->adaptor
|
|
281 and ($self->right_index-$self->left_index)>1
|
|
282 and (defined $self->{'_parent_id'})
|
|
283 and (1==$self->{'_parent_id'})
|
|
284 ) {
|
|
285 return $self->adaptor->fetch_root_by_node($self);
|
|
286 }
|
|
287
|
|
288 # Otherwise, go through the step-by-step method
|
|
289 return $self unless(defined($self->parent));
|
|
290 # return $self if($self->node_id eq $self->parent->node_id);
|
|
291 return $self->parent->root;
|
|
292 }
|
|
293
|
|
294 sub subroot {
|
|
295 deprecate('subroot() should not be used and will be removed in release 70.
|
|
296 If you are using it, please contact the dev mailing-list dev@ensembl.org');
|
|
297 my $self = shift;
|
|
298
|
|
299 return undef unless($self->parent);
|
|
300 return $self unless(defined($self->parent->parent));
|
|
301 return $self->parent->subroot;
|
|
302 }
|
|
303
|
|
304
|
|
305 =head2 children
|
|
306
|
|
307 Overview : returns a list of NestedSet nodes directly under this parent node
|
|
308 Example : my @children = @{$object->children()};
|
|
309 Returntype : array reference of Bio::EnsEMBL::Compara::NestedSet objects (could be empty)
|
|
310 Exceptions : none
|
|
311 Caller : general
|
|
312 Algorithm : new algorithm for fetching children:
|
|
313 for each link connected to this NestedsSet node, a child is defined if
|
|
314 old: the link is not my parent_link
|
|
315 new: the link's neighbors' parent_link is the link
|
|
316 This allows one (with a carefully coded algorithm) to overlay a tree on top
|
|
317 of a fully connected graph and use the parent/children methods of NestedSet
|
|
318 to walk the 'tree' substructure of the graph.
|
|
319 Trees that are really just trees are still trees.
|
|
320
|
|
321 =cut
|
|
322
|
|
323 sub children {
|
|
324 my $self = shift;
|
|
325 $self->load_children_if_needed;
|
|
326 my @kids;
|
|
327 foreach my $link (@{$self->links}) {
|
|
328 next unless(defined($link));
|
|
329 my $neighbor = $link->get_neighbor($self);
|
|
330 my $parent_link = $neighbor->parent_link;
|
|
331 next unless($parent_link);
|
|
332 next unless($parent_link eq $link);
|
|
333 push @kids, $neighbor;
|
|
334 }
|
|
335 return \@kids;
|
|
336 }
|
|
337
|
|
338 sub each_child {
|
|
339 my $self = shift;
|
|
340
|
|
341 # Return an iterator over the children (most effective when children list is LONG)
|
|
342 my $count = -1;
|
|
343 $self->load_children_if_needed;
|
|
344 my @links = @{$self->links};
|
|
345
|
|
346 return sub {
|
|
347 while ($count < scalar(@links)) {
|
|
348 $count++;
|
|
349 my $link = $links[$count];
|
|
350 next unless(defined $link);
|
|
351
|
|
352 my $neighbor = $link->get_neighbor($self);
|
|
353 next unless($neighbor->parent_link);
|
|
354 next unless($neighbor->parent_link->equals($link));
|
|
355 return $neighbor;
|
|
356 }
|
|
357 return undef;
|
|
358 };
|
|
359 }
|
|
360
|
|
361 =head2 sorted_children
|
|
362
|
|
363 Overview : returns a sorted list of NestedSet nodes directly under this parent node
|
|
364 sort so that internal nodes<leaves and then on distance
|
|
365 Example : my @kids = @{$object->ordered_children()};
|
|
366 Returntype : array reference of Bio::EnsEMBL::Compara::NestedSet objects (could be empty)
|
|
367 Exceptions : none
|
|
368 Caller : general
|
|
369
|
|
370 =cut
|
|
371
|
|
372 sub sorted_children {
|
|
373 my $self = shift;
|
|
374
|
|
375 my @sortedkids =
|
|
376 sort { $a->is_leaf <=> $b->is_leaf
|
|
377 ||
|
|
378 $a->get_child_count <=> $b->get_child_count
|
|
379 ||
|
|
380 $a->distance_to_parent <=> $b->distance_to_parent
|
|
381 } @{$self->children;};
|
|
382 return \@sortedkids;
|
|
383 }
|
|
384
|
|
385
|
|
386 =head2 get_all_nodes
|
|
387
|
|
388 Arg 1 : hashref $node_hash [used for recursivity, do not use it!]
|
|
389 Example : my $all_nodes = $root->get_all_nodes();
|
|
390 Description : Returns this and all underlying sub nodes
|
|
391 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
|
|
392 Exceptions : none
|
|
393 Caller : general
|
|
394 Status : Stable
|
|
395
|
|
396 =cut
|
|
397
|
|
398 sub get_all_nodes {
|
|
399 my $self = shift;
|
|
400 my $node_hash = shift;
|
|
401
|
|
402 my $toplevel = 0;
|
|
403 unless($node_hash) {
|
|
404 $node_hash = {};
|
|
405 $toplevel =1;
|
|
406 }
|
|
407
|
|
408 $node_hash->{$self->obj_id} = $self;
|
|
409 foreach my $child (@{$self->children}) {
|
|
410 $child->get_all_nodes($node_hash);
|
|
411 }
|
|
412
|
|
413 if ($toplevel) {
|
|
414 return [values(%$node_hash)];
|
|
415 }
|
|
416 return undef;
|
|
417 }
|
|
418
|
|
419
|
|
420 =head2 get_all_nodes_by_tag_value
|
|
421
|
|
422 Arg 1 : tag_name
|
|
423 Arg 2 : tag_value (optional)
|
|
424 Example : my $all_nodes = $root->get_all_nodes_by_tagvalue('taxon_name'=>'Mamalia');
|
|
425 Description : Returns all underlying nodes that have a tag of the given name, and optionally a value of the given value.
|
|
426 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
|
|
427 Exceptions : none
|
|
428 Caller : general
|
|
429 Status :
|
|
430
|
|
431 =cut
|
|
432
|
|
433 sub get_all_nodes_by_tag_value {
|
|
434 my $self = shift;
|
|
435 my $tag = shift || die( "Need a tag name" );
|
|
436 my $value = shift;
|
|
437 my @found;
|
|
438 foreach my $node( @{$self->get_all_nodes} ){
|
|
439 my $tval = $node->get_tagvalue($tag);
|
|
440 if( defined $tval and $value ? $tval eq $value : 1 ){
|
|
441 push @found, $node;
|
|
442 }
|
|
443 }
|
|
444 return [@found];
|
|
445 }
|
|
446
|
|
447
|
|
448 =head2 get_all_subnodes
|
|
449
|
|
450 Arg 1 : hashref $node_hash [used for recursivity, do not use it!]
|
|
451 Example : my $all_nodes = $root->get_all_nodes();
|
|
452 Description : Returns all underlying sub nodes
|
|
453 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
|
|
454 Exceptions : none
|
|
455 Caller : general
|
|
456 Status : Stable
|
|
457
|
|
458 =cut
|
|
459
|
|
460 sub get_all_subnodes {
|
|
461 my $self = shift;
|
|
462 my $node_hash = shift;
|
|
463
|
|
464 my $toplevel = 0;
|
|
465 unless($node_hash) {
|
|
466 $node_hash = {};
|
|
467 $toplevel =1;
|
|
468 }
|
|
469
|
|
470 foreach my $child (@{$self->children}) {
|
|
471 $node_hash->{$child->obj_id} = $child;
|
|
472 $child->get_all_subnodes($node_hash);
|
|
473 }
|
|
474 return values(%$node_hash) if($toplevel);
|
|
475 return undef;
|
|
476 }
|
|
477
|
|
478 =head2 get_all_ancestors
|
|
479
|
|
480 Arg 1 :
|
|
481 Example : my @ancestors = @{$node->get_all_ancestors};
|
|
482 Description : Returns all ancestor nodes for a given node
|
|
483 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
|
|
484 Exceptions : none
|
|
485 Caller : general
|
|
486 Status :
|
|
487
|
|
488 =cut
|
|
489
|
|
490 sub get_all_ancestors {
|
|
491 my $self = shift;
|
|
492 my $this = $self;
|
|
493 my @ancestors;
|
|
494 while( $this = $this->parent ){
|
|
495 push @ancestors, $this;
|
|
496 }
|
|
497 return [@ancestors]
|
|
498 }
|
|
499
|
|
500 =head2 get_all_adjacent_subtrees
|
|
501
|
|
502 Arg 1 :
|
|
503 Example : my @subtrees = @{$node->get_all_adjacent_subtrees};
|
|
504 Description : Returns subtree 'root' nodes where the subtree is adjacent
|
|
505 to this node. Used e.g. by the web code for the 'collapse
|
|
506 other nodes' action
|
|
507 ReturnType : listref of Bio::EnsEMBL::Compara::NestedSet objects
|
|
508 Exceptions : none
|
|
509 Caller : EnsEMBL::Web::Component::Gene::ComparaTree
|
|
510 Status :
|
|
511
|
|
512 =cut
|
|
513
|
|
514 sub get_all_adjacent_subtrees {
|
|
515 my $self = shift;
|
|
516 my $node_id = $self->node_id;
|
|
517 my @node_path_to_root = ($self, @{$self->get_all_ancestors} );
|
|
518 my %path_node_ids = map{ $_->node_id => 1 } @node_path_to_root;
|
|
519
|
|
520 my $this = $self->root; # Start at the root node
|
|
521
|
|
522 my @adjacent_subtrees;
|
|
523 while( $this ){
|
|
524 last if $this->node_id == $node_id; # Stop on reaching current node
|
|
525 my $next;
|
|
526 foreach my $child (@{$this->children}){
|
|
527 next if $child->is_leaf; # Leaves cannot be subtrees
|
|
528 if( $path_node_ids{$child->node_id} ){ # Ancestor node
|
|
529 $next = $child;
|
|
530 } else {
|
|
531 push @adjacent_subtrees, $child;
|
|
532 }
|
|
533 }
|
|
534 $this = $next || undef;
|
|
535 }
|
|
536
|
|
537 return [@adjacent_subtrees]
|
|
538 }
|
|
539
|
|
540
|
|
541 =head2 num_leaves
|
|
542
|
|
543 Example : my $num_leaves = $node->num_leaves
|
|
544 Description : Returns the number of leaves underlying the node
|
|
545 ReturnType : integer
|
|
546 Exceptions : none
|
|
547 Caller : general
|
|
548 Status : At risk (relies on left and right indexes)
|
|
549
|
|
550 =cut
|
|
551 #'
|
|
552 sub num_leaves {
|
|
553 my $self = shift;
|
|
554
|
|
555 my $left = $self->left_index;
|
|
556 my $right = $self->right_index;
|
|
557
|
|
558 return unless( $left && $right );
|
|
559
|
|
560 my $num = $right - $left + 1;
|
|
561 my $num_leaves = ( ($num/2) + 1 ) / 2;
|
|
562
|
|
563 return $num_leaves;
|
|
564 }
|
|
565
|
|
566
|
|
567 sub get_child_count {
|
|
568 my $self = shift;
|
|
569 $self->load_children_if_needed;
|
|
570 return scalar @{$self->children};
|
|
571 # my $count = $self->link_count;
|
|
572 # $count-- if($self->has_parent);
|
|
573 # return $count;
|
|
574 }
|
|
575
|
|
576 sub load_children_if_needed {
|
|
577 my $self = shift;
|
|
578 if(!defined($self->{'_children_loaded'}) and $self->adaptor) {
|
|
579 #define _children_id_hash thereby signally that I've tried to load my children
|
|
580 $self->{'_children_loaded'} = 1;
|
|
581 #print("load_children_if_needed : "); $self->print_node;
|
|
582 $self->adaptor->fetch_all_children_for_node($self);
|
|
583 }
|
|
584 }
|
|
585
|
|
586 sub no_autoload_children {
|
|
587 my $self = shift;
|
|
588
|
|
589 return if($self->{'_children_loaded'});
|
|
590 $self->{'_children_loaded'} = 1;
|
|
591 }
|
|
592
|
|
593
|
|
594 =head2 distance_to_parent
|
|
595
|
|
596 Arg [1] : (opt.) <int or double> distance
|
|
597 Example : my $dist = $object->distance_to_parent();
|
|
598 Example : $object->distance_to_parent(1.618);
|
|
599 Description: Getter/Setter for the distance between this child and its parent
|
|
600 Returntype : integer node_id
|
|
601 Exceptions : none
|
|
602 Caller : general
|
|
603
|
|
604 =cut
|
|
605
|
|
606 sub distance_to_parent {
|
|
607 my $self = shift;
|
|
608 my $dist = shift;
|
|
609
|
|
610 if($self->{'_parent_link'}) {
|
|
611 if(defined($dist)) { $self->{'_parent_link'}->distance_between($dist); }
|
|
612 else { $dist = $self->{'_parent_link'}->distance_between; }
|
|
613 } else {
|
|
614 if(defined($dist)) { $self->_distance($dist); }
|
|
615 else { $dist = $self->_distance; }
|
|
616 }
|
|
617 return $dist;
|
|
618 }
|
|
619
|
|
620 sub _distance {
|
|
621 my $self = shift;
|
|
622 $self->{'_distance_to_parent'} = shift if(@_);
|
|
623 $self->{'_distance_to_parent'} = 0.0 unless(defined($self->{'_distance_to_parent'}));
|
|
624 return $self->{'_distance_to_parent'};
|
|
625 }
|
|
626
|
|
627 sub distance_to_root {
|
|
628 my $self = shift;
|
|
629 my $dist = $self->distance_to_parent;
|
|
630 $dist += $self->parent->distance_to_root if($self->parent);
|
|
631 return $dist;
|
|
632 }
|
|
633
|
|
634
|
|
635 =head2 distance_to_ancestor
|
|
636
|
|
637 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $ancestor
|
|
638 Example : my $distance = $this_node->distance_to_ancestor($ancestor);
|
|
639 Description : Calculates the distance in the tree between this node and
|
|
640 its ancestor $ancestor
|
|
641 Returntype : float
|
|
642 Exceptions : throws if $ancestor is not an ancestor of this node.
|
|
643 Caller : general
|
|
644 Status : Stable
|
|
645
|
|
646 =cut
|
|
647
|
|
648 sub distance_to_ancestor {
|
|
649 my $self = shift;
|
|
650 my $ancestor = shift;
|
|
651
|
|
652 if ($ancestor->node_id eq $self->node_id) {
|
|
653 return 0;
|
|
654 }
|
|
655 unless (defined $self->parent) {
|
|
656 throw("Ancestor not found\n");
|
|
657 }
|
|
658 return $self->distance_to_parent + $self->parent->distance_to_ancestor($ancestor);
|
|
659 }
|
|
660
|
|
661
|
|
662 =head2 distance_to_node
|
|
663
|
|
664 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node
|
|
665 Example : my $distance = $this_node->distance_to_node($other_node);
|
|
666 Description : Calculates the distance in the tree between these
|
|
667 two nodes.
|
|
668 Returntype : float
|
|
669 Exceptions : returns undef if no ancestor can be found, no distances are
|
|
670 defined in the tree, etc.
|
|
671 Caller : general
|
|
672 Status : Stable
|
|
673
|
|
674 =cut
|
|
675
|
|
676 sub distance_to_node {
|
|
677 my $self = shift;
|
|
678 my $node = shift;
|
|
679
|
|
680 my $ancestor = $self->find_first_shared_ancestor($node);
|
|
681 if (!$ancestor) {
|
|
682 return undef;
|
|
683 }
|
|
684 my $distance = $self->distance_to_ancestor($ancestor);
|
|
685 $distance += $node->distance_to_ancestor($ancestor);
|
|
686
|
|
687 return $distance;
|
|
688 }
|
|
689
|
|
690
|
|
691 # Returns a TreeI-compliant object based on this NestedSet.
|
|
692 sub get_TreeI {
|
|
693 my $self = shift;
|
|
694 my $newick = $self->newick_format();
|
|
695
|
|
696 open(my $fake_fh, "+<", \$newick);
|
|
697 my $treein = new Bio::TreeIO(-fh => $fake_fh, -format => 'newick');
|
|
698 my $treeI = $treein->next_tree;
|
|
699 $treein->close;
|
|
700
|
|
701 return $treeI;
|
|
702 }
|
|
703
|
|
704 sub new_from_newick {
|
|
705 my $class = shift;
|
|
706 my $file = shift;
|
|
707 my $treein = new Bio::TreeIO(-file => $file, -format => 'newick');
|
|
708 my $treeI = $treein->next_tree;
|
|
709 $treein->close;
|
|
710
|
|
711 return $class->new_from_TreeI($treeI);
|
|
712 }
|
|
713
|
|
714 sub new_from_TreeI {
|
|
715 my $class = shift;
|
|
716 my $treeI = shift;
|
|
717
|
|
718 my $rootI = $treeI->get_root_node;
|
|
719 my $node = new $class;
|
|
720
|
|
721 # Kick off the recursive, parallel node adding.
|
|
722 _add_nodeI_to_node($node,$rootI);
|
|
723
|
|
724 return $node;
|
|
725 }
|
|
726
|
|
727 # Recursive helper for new_from_TreeI.
|
|
728 sub _add_nodeI_to_node {
|
|
729 my $node = shift; # Our node object (Compara)
|
|
730 my $nodeI = shift; # Our nodeI object (BioPerl)
|
|
731
|
|
732 foreach my $c ($nodeI->each_Descendent) {
|
|
733 my $child = ref($node)->new;
|
|
734
|
|
735 my $name = $c->id || '';
|
|
736 $name =~ s/^\s+//;
|
|
737 $name =~ s/\s+$//;
|
|
738
|
|
739 # Set name.
|
|
740 $child->name($name);
|
|
741
|
|
742 # Set branch length.
|
|
743 $node->add_child($child,$c->branch_length);
|
|
744
|
|
745 # Recurse.
|
|
746 _add_nodeI_to_node($child,$c);
|
|
747 }
|
|
748 }
|
|
749
|
|
750 =head2 print_tree
|
|
751
|
|
752 Arg [1] : int $scale
|
|
753 Example : $this_node->print_tree(100);
|
|
754 Description : Prints this tree in ASCII format. The scale is used to define
|
|
755 the width of the tree in the output
|
|
756 Returntype : undef
|
|
757 Exceptions :
|
|
758 Caller : general
|
|
759 Status : At risk (as the output might change)
|
|
760
|
|
761 =cut
|
|
762
|
|
763 sub print_tree {
|
|
764 my $self = shift;
|
|
765 my $scale = shift;
|
|
766
|
|
767 $scale = 100 unless($scale);
|
|
768 $self->_internal_print_tree(undef, 0, $scale);
|
|
769 }
|
|
770
|
|
771 sub string_tree {
|
|
772 my $self = shift;
|
|
773 my $scale = shift;
|
|
774 $scale ||= 100;
|
|
775 my $buffer = '';
|
|
776 $self->_internal_string_tree(undef, 0, $scale, \$buffer);
|
|
777 return $buffer;
|
|
778 }
|
|
779
|
|
780 sub _internal_string_tree {
|
|
781 my $self = shift;
|
|
782 my $indent = shift;
|
|
783 my $lastone = shift;
|
|
784 my $scale = shift;
|
|
785 my $buffer = shift;
|
|
786
|
|
787 if(defined($indent)) {
|
|
788 $$buffer .= $indent;
|
|
789 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $$buffer .= '-'; }
|
|
790 }
|
|
791
|
|
792 $$buffer .= $self->string_node($indent);
|
|
793
|
|
794 if(defined($indent)) {
|
|
795 if($lastone) {
|
|
796 chop($indent);
|
|
797 $indent .= " ";
|
|
798 }
|
|
799 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $indent .= ' '; }
|
|
800 }
|
|
801 $indent = '' unless(defined($indent));
|
|
802 $indent .= "|";
|
|
803
|
|
804 my $children = $self->sorted_children;
|
|
805 my $count=0;
|
|
806 $lastone = 0;
|
|
807 foreach my $child_node (@$children) {
|
|
808 $count++;
|
|
809 $lastone = 1 if($count == scalar(@$children));
|
|
810 $child_node->_internal_string_tree($indent,$lastone,$scale,$buffer);
|
|
811 }
|
|
812 }
|
|
813
|
|
814 sub _internal_print_tree {
|
|
815 my $self = shift;
|
|
816 my $indent = shift;
|
|
817 my $lastone = shift;
|
|
818 my $scale = shift;
|
|
819
|
|
820 if(defined($indent)) {
|
|
821 print($indent);
|
|
822 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { print('-'); }
|
|
823 }
|
|
824
|
|
825 $self->print_node($indent);
|
|
826
|
|
827 if(defined($indent)) {
|
|
828 if($lastone) {
|
|
829 chop($indent);
|
|
830 $indent .= " ";
|
|
831 }
|
|
832 for(my $i=0; $i<$self->distance_to_parent()*$scale; $i++) { $indent .= ' '; }
|
|
833 }
|
|
834 $indent = '' unless(defined($indent));
|
|
835 $indent .= "|";
|
|
836
|
|
837 my $children = $self->sorted_children;
|
|
838 my $count=0;
|
|
839 $lastone = 0;
|
|
840 foreach my $child_node (@$children) {
|
|
841 $count++;
|
|
842 $lastone = 1 if($count == scalar(@$children));
|
|
843 $child_node->_internal_print_tree($indent,$lastone,$scale);
|
|
844 }
|
|
845 }
|
|
846
|
|
847
|
|
848 sub print_node {
|
|
849 my $self = shift;
|
|
850 print $self->string_node;
|
|
851 }
|
|
852
|
|
853 sub string_node {
|
|
854 my $self = shift;
|
|
855 my $str = '(';
|
|
856
|
|
857 my $isdup = 0;
|
|
858 $isdup = 1 if ($self->get_tagvalue('Duplication', 0) > 0 and not $self->get_tagvalue('dubious_duplication', 0));
|
|
859 $isdup = 1 if $self->get_tagvalue('node_type', '') eq 'duplication';
|
|
860
|
|
861 my $isdub = ($self->get_tagvalue('node_type', '') eq 'dubious');
|
|
862
|
|
863 if ($isdup) {
|
|
864 my $taxon_name = $self->get_tagvalue('taxon_name', '');
|
|
865 if ($taxon_name =~ /\S+\ \S+/) {
|
|
866 $str .= "Dup ";
|
|
867 } else {
|
|
868 $str .= "DUP ";
|
|
869 }
|
|
870 my $sis = $self->get_tagvalue('duplication_confidence_score', 0) * 100;
|
|
871 $str .= sprintf('SIS=%d ', $sis);
|
|
872 } elsif ($isdub) {
|
|
873 $str .= "DD ";
|
|
874 $str .= 'SIS=0 ';
|
|
875 }
|
|
876 if($self->has_tag("bootstrap")) { my $bootstrap_value = $self->get_tagvalue("bootstrap"); $str .= "B=$bootstrap_value "; }
|
|
877 if($self->has_tag("taxon_name")) { my $taxon_name_value = $self->get_tagvalue("taxon_name"); $str .="T=$taxon_name_value "; }
|
|
878 $str .= sprintf("%s %d,%d)", $self->node_id, $self->left_index, $self->right_index);
|
|
879 $str .= sprintf("%s\n", $self->name || '');
|
|
880 return $str;
|
|
881 }
|
|
882
|
|
883
|
|
884 =head2 newick_format
|
|
885
|
|
886 Arg [1] : string $format_mode
|
|
887 Example : $this_node->newick_format("full");
|
|
888 Description : Prints this tree in Newick format. Several modes are
|
|
889 available: full, display_label_composite, simple, species,
|
|
890 species_short_name, ncbi_taxon, ncbi_name, njtree and phylip
|
|
891 Returntype : string
|
|
892 Exceptions :
|
|
893 Caller : general
|
|
894 Status : Stable
|
|
895
|
|
896 =cut
|
|
897
|
|
898 my %ryo_modes = (
|
|
899 'member_id' => '%{^-m}:%{d}',
|
|
900 'member_id_taxon_id' => '%{-m}%{"_"-x}:%{d}',
|
|
901 'display_label_composite' => '%{-l"_"}%{n}%{"_"-s}:%{d}',
|
|
902 'full_common' => '%{n}%{" "-c.^}%{"."-g}%{"_"-t"_MYA"}:%{d}',
|
|
903 'gene_stable_id_composite' => '%{-i"_"}%{n}%{"_"-s}:%{d}',
|
|
904 'gene_stable_id' => '%{-i}:%{d}',
|
|
905 'ncbi_taxon' => '%{o}',
|
|
906 'ncbi_name' => '%{n}',
|
|
907 'simple' => '%{^-n}:%{d}',
|
|
908 'full' => '%{n}:%{d}',
|
|
909 'species' => '%{^-S|p}',
|
|
910 'species_short_name' => '%{^-s|p}',
|
|
911 'otu_id' => '%{-s"|"}%{-l"|"}%{n}:%{d}',
|
|
912 'int_node_id' => '%{-n}%{o-}:%{d}',
|
|
913 'full_web' => '%{n-}%{-n|p}%{"_"-s"_"}%{":"d}',
|
|
914 'phylip' => '%21{n,}:%{d}',
|
|
915 'njtree' => '%{o}%{-T(is_incomplete)|E"*"}%{-T(is_incomplete,0,*)}',
|
|
916 );
|
|
917
|
|
918 my $nhx0 = '%{n-_|T(taxon_name)}:%{d}';
|
|
919 my $nhx1 = ':D=%{-E"N"}%{T(node_type,duplication,Y)-}%{T(node_type,dubious,Y)-}%{T(node_type,gene_split,Y)-}%{T(node_type,speciation,N)}%{":B="T(bootstrap)}';
|
|
920 my $nhx2 = ':T=%{-x}%{T(taxon_id)-}';
|
|
921
|
|
922 my %nhx_ryo_modes_1 = (
|
|
923 'member_id_taxon_id' => '%{-m}%{o-}_%{-x}%{T(taxon_id)-}:%{d}',
|
|
924 'protein_id' => '%{-n}'.$nhx0,
|
|
925 'transcript_id' => '%{-r}'.$nhx0,
|
|
926 'gene_id' => '%{-i}'.$nhx0,
|
|
927 'full' => $nhx0,
|
|
928 'full_web' => $nhx0,
|
|
929 'display_label' => '%{-L|i}%{"_"-s}'.$nhx0,
|
|
930 'display_label_composite' => '%{-L"_"}%{-i}%{"_"-s}'.$nhx0,
|
|
931 'treebest_ortho' => '%{-m}%{"_"-x}'.$nhx0,
|
|
932 'simple' => $ryo_modes{'simple'},
|
|
933 'phylip' => $ryo_modes{'phylip'},
|
|
934 );
|
|
935
|
|
936 my %nhx_ryo_modes_2 = (
|
|
937 'member_id_taxon_id' => $nhx1.$nhx2,
|
|
938 'protein_id' => $nhx1.'%{":G="-i}'.$nhx2,
|
|
939 'transcript_id' => $nhx1.'%{":G="-i}'.$nhx2,
|
|
940 'gene_id' => $nhx1.'%{":G="-r}'.$nhx2,
|
|
941 'full' => $nhx1.$nhx2,
|
|
942 'full_web' => $nhx1.$nhx2,
|
|
943 'display_label' => $nhx1.$nhx2,
|
|
944 'display_label_composite' => $nhx1.$nhx2,
|
|
945 'treebest_ortho' => $nhx1.$nhx2.':S=%{-x}%{T(taxon_id)-}',
|
|
946 );
|
|
947
|
|
948
|
|
949 sub newick_format {
|
|
950 my $self = shift;
|
|
951 my $format_mode = shift;
|
|
952
|
|
953 my $ryo_string;
|
|
954
|
|
955 if (not defined $format_mode) {
|
|
956 $ryo_string = $ryo_modes{'full'};
|
|
957
|
|
958 } elsif ($format_mode eq "ryo") {
|
|
959 $ryo_string = shift @_;
|
|
960
|
|
961 } elsif (defined $ryo_modes{$format_mode}) {
|
|
962 $ryo_string = $ryo_modes{$format_mode};
|
|
963
|
|
964 } else {
|
|
965 throw("Unrecognized format '$format_mode'. Please use 'ryo' to introduce a roll-your-own format string\n");
|
|
966 }
|
|
967 return $self->_internal_newick_format_ryo($ryo_string);
|
|
968 }
|
|
969
|
|
970 sub nhx_format {
|
|
971 my ($self, $format_mode) = @_;
|
|
972 my $ryo_string1;
|
|
973 my $ryo_string2;
|
|
974
|
|
975 if (not defined $format_mode) {
|
|
976 $ryo_string1 = $nhx_ryo_modes_1{'protein_id'};
|
|
977 $ryo_string2 = $nhx_ryo_modes_2{'protein_id'};
|
|
978
|
|
979 } elsif ($format_mode eq "ryo") {
|
|
980 $ryo_string1 = shift @_;
|
|
981 $ryo_string2 = shift @_;
|
|
982
|
|
983 } elsif (defined $nhx_ryo_modes_1{$format_mode}) {
|
|
984 $ryo_string1 = $nhx_ryo_modes_1{$format_mode};
|
|
985 $ryo_string2 = $nhx_ryo_modes_2{$format_mode};
|
|
986
|
|
987 } else {
|
|
988 throw("Unrecognized format '$format_mode'. Please use 'ryo' to introduce a roll-your-own format string\n");
|
|
989 }
|
|
990 my $fmt = $ryo_string1;
|
|
991 $fmt = $ryo_string1.'[&&NHX'.$ryo_string2.']' if defined $ryo_string2;
|
|
992 return $self->_internal_newick_format_ryo($fmt);
|
|
993 }
|
|
994
|
|
995 sub _internal_newick_format_ryo {
|
|
996 my ($self, $ryo_string) = @_;
|
|
997 my $newick_str;
|
|
998 eval {
|
|
999 use Bio::EnsEMBL::Compara::FormatTree;
|
|
1000 my $ryo_formatter = Bio::EnsEMBL::Compara::FormatTree->new($ryo_string);
|
|
1001 $newick_str = $ryo_formatter->format_newick($self);
|
|
1002 };
|
|
1003 if ($@) {
|
|
1004 throw("Something bad happened while trying to stringify the tree: $@\n");
|
|
1005 }
|
|
1006 return "$newick_str;";
|
|
1007 }
|
|
1008
|
|
1009 =head2 newick_simple_format
|
|
1010
|
|
1011 DEPRECATED. Use newick_format("simple") instead
|
|
1012
|
|
1013 =cut
|
|
1014
|
|
1015 sub newick_simple_format {
|
|
1016 my $self = shift;
|
|
1017 deprecate('Use newick_format("simple") instead.');
|
|
1018 return $self->newick_format('simple');
|
|
1019 }
|
|
1020
|
|
1021
|
|
1022 ##################################
|
|
1023 #
|
|
1024 # Set theory methods
|
|
1025 #
|
|
1026 ##################################
|
|
1027
|
|
1028 #sub equals {
|
|
1029 # my $self = shift;
|
|
1030 # my $other = shift;
|
|
1031 # throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$other]")
|
|
1032 # unless($other->isa('Bio::EnsEMBL::Compara::NestedSet'));
|
|
1033 # return 1 if($self->node_id eq $other->node_id);
|
|
1034 # foreach my $child (@{$self->children}) {
|
|
1035 # return 0 unless($other->has_child($child));
|
|
1036 # }
|
|
1037 # return 1;
|
|
1038 #}
|
|
1039
|
|
1040 sub has_child {
|
|
1041 my $self = shift;
|
|
1042 my $child = shift;
|
|
1043 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$child]")
|
|
1044 unless($child->isa('Bio::EnsEMBL::Compara::NestedSet'));
|
|
1045 $self->load_children_if_needed;
|
|
1046 my $link = $self->link_for_neighbor($child);
|
|
1047 return 0 unless($link);
|
|
1048 return 0 if($self->{'_parent_link'} and ($self->{'_parent_link'}->equals($link)));
|
|
1049 return 1;
|
|
1050 }
|
|
1051
|
|
1052 sub is_member_of {
|
|
1053 my $A = shift;
|
|
1054 my $B = shift;
|
|
1055 return 1 if($B->has_child($A));
|
|
1056 return 0;
|
|
1057 }
|
|
1058
|
|
1059 sub is_subset_of {
|
|
1060 my $A = shift;
|
|
1061 my $B = shift;
|
|
1062 foreach my $child (@{$A->children}) {
|
|
1063 return 0 unless($B->has_child($child));
|
|
1064 }
|
|
1065 return 1;
|
|
1066 }
|
|
1067
|
|
1068 sub is_leaf {
|
|
1069 my $self = shift;
|
|
1070 return 1 unless($self->get_child_count);
|
|
1071 return 0;
|
|
1072 }
|
|
1073
|
|
1074 sub merge_children {
|
|
1075 my $self = shift;
|
|
1076 my $nset = shift;
|
|
1077 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$nset]")
|
|
1078 unless($nset->isa('Bio::EnsEMBL::Compara::NestedSet'));
|
|
1079 foreach my $child_node (@{$nset->children}) {
|
|
1080 $self->add_child($child_node, $child_node->distance_to_parent);
|
|
1081 }
|
|
1082 return $self;
|
|
1083 }
|
|
1084
|
|
1085 sub merge_node_via_shared_ancestor {
|
|
1086 my $self = shift;
|
|
1087 my $node = shift;
|
|
1088
|
|
1089 my $node_dup = $self->find_node_by_node_id($node->node_id);
|
|
1090 if($node_dup) {
|
|
1091 #warn("trying to merge in a node with already exists\n");
|
|
1092 return $node_dup;
|
|
1093 }
|
|
1094 return undef unless($node->parent);
|
|
1095
|
|
1096 my $ancestor = $self->find_node_by_node_id($node->parent->node_id);
|
|
1097 if($ancestor) {
|
|
1098 $ancestor->add_child($node);
|
|
1099 #print("common ancestor at : "); $ancestor->print_node;
|
|
1100 return $ancestor;
|
|
1101 }
|
|
1102 return $self->merge_node_via_shared_ancestor($node->parent);
|
|
1103 }
|
|
1104
|
|
1105
|
|
1106 sub extract_subtree_from_leaves {
|
|
1107 my $self = shift;
|
|
1108 my $copy = $self->copy;
|
|
1109 my $node_ids = shift; # Array ref of node_ids.
|
|
1110 my @keepers = @{$node_ids};
|
|
1111 my @all = @{$copy->get_all_nodes};
|
|
1112
|
|
1113 # Add all ancestors of kept nodes to the keep list.
|
|
1114 my @all_keepers = ();
|
|
1115 foreach my $keeper (@keepers) {
|
|
1116 my $node = $copy->find_node_by_node_id($keeper);
|
|
1117 push @all_keepers, $keeper;
|
|
1118
|
|
1119 my $parent = $node->parent;
|
|
1120 while (defined $parent) {
|
|
1121 push @all_keepers, $parent->node_id;
|
|
1122 $parent = $parent->parent;
|
|
1123 }
|
|
1124 }
|
|
1125
|
|
1126 my @remove_me = ();
|
|
1127 foreach my $node (@all) {
|
|
1128 push @remove_me, $node unless (grep {$node->node_id == $_} @all_keepers);
|
|
1129 }
|
|
1130 $copy->remove_nodes(\@remove_me);
|
|
1131 return $copy;
|
|
1132 }
|
|
1133
|
|
1134
|
|
1135 ##################################
|
|
1136 #
|
|
1137 # nested_set manipulations
|
|
1138 #
|
|
1139 ##################################
|
|
1140
|
|
1141
|
|
1142 =head2 flatten_tree
|
|
1143
|
|
1144 Overview : Removes all internal nodes and attaches leaves to the tree root, creating
|
|
1145 a "flattened" star tree structure.
|
|
1146 Example : $node->flatten_tree();
|
|
1147 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
|
|
1148 Exceptions : none
|
|
1149 Caller : general
|
|
1150
|
|
1151 =cut
|
|
1152
|
|
1153 sub flatten_tree {
|
|
1154 my $self = shift;
|
|
1155
|
|
1156 my $leaves = $self->get_all_leaves;
|
|
1157 foreach my $leaf (@{$leaves}) {
|
|
1158 $leaf->disavow_parent;
|
|
1159 }
|
|
1160
|
|
1161 $self->release_children;
|
|
1162 foreach my $leaf (@{$leaves}) {
|
|
1163 $self->add_child($leaf, 0.0);
|
|
1164 }
|
|
1165
|
|
1166 return $self;
|
|
1167 }
|
|
1168
|
|
1169 =head2 re_root
|
|
1170
|
|
1171 Overview : rearranges the tree structure so that the root is moved to
|
|
1172 beetween this node and its parent. If the old root was more than
|
|
1173 bifurcated (2 children) a new node is created where it was to hold
|
|
1174 the multiple children that arises from the re-rooting.
|
|
1175 The old root is returned.
|
|
1176 Example : $node->re_root();
|
|
1177 Returntype : undef or Bio::EnsEMBL::Compara::NestedSet
|
|
1178 Exceptions : none
|
|
1179 Caller : general
|
|
1180
|
|
1181 =cut
|
|
1182
|
|
1183 sub re_root {
|
|
1184 my $self = shift;
|
|
1185
|
|
1186 return $self unless($self->parent); #I'm root so just return self
|
|
1187
|
|
1188 my $root = $self->root;
|
|
1189 my $tmp_root = new Bio::EnsEMBL::Compara::NestedSet;
|
|
1190 $tmp_root->merge_children($root);
|
|
1191
|
|
1192 my $parent = $self->parent;
|
|
1193 my $dist = $self->distance_to_parent;
|
|
1194 $self->disavow_parent;
|
|
1195
|
|
1196 my $old_root = $parent->_invert_tree_above;
|
|
1197 $old_root->minimize_node;
|
|
1198
|
|
1199 $root->add_child($parent, $dist / 2.0);
|
|
1200 $root->add_child($self, $dist / 2.0);
|
|
1201
|
|
1202 return $root;
|
|
1203 }
|
|
1204
|
|
1205
|
|
1206 sub _invert_tree_above {
|
|
1207 my $self = shift;
|
|
1208 return $self unless($self->parent);
|
|
1209
|
|
1210 my $old_root = $self->parent->_invert_tree_above;
|
|
1211 #now my parent has been inverted so it is the new root
|
|
1212
|
|
1213 #flip the direction of the link between myself and my parent
|
|
1214 $self->parent->_set_parent_link($self->{'_parent_link'});
|
|
1215 $self->_set_parent_link(undef);
|
|
1216
|
|
1217 #now I'm the new root and the old root might need to be modified
|
|
1218 return $old_root;
|
|
1219 }
|
|
1220
|
|
1221
|
|
1222 sub build_leftright_indexing {
|
|
1223 my $self = shift;
|
|
1224 my $counter = shift;
|
|
1225
|
|
1226 $counter = 1 unless($counter);
|
|
1227
|
|
1228 $self->left_index($counter++);
|
|
1229 foreach my $child_node (@{$self->sorted_children}) {
|
|
1230 $counter = $child_node->build_leftright_indexing($counter);
|
|
1231 }
|
|
1232 $self->right_index($counter++);
|
|
1233 return $counter;
|
|
1234 }
|
|
1235
|
|
1236
|
|
1237 =head2 remove_nodes
|
|
1238
|
|
1239 Arg [1] : arrayref Bio::EnsEMBL::Compara::NestedSet $nodes
|
|
1240 Example : my $ret_tree = $tree->remove_nodes($nodes);
|
|
1241 Description : Returns the tree with removed nodes in list. Nodes should be in the tree.
|
|
1242 Returntype : Bio::EnsEMBL::Compara::NestedSet object
|
|
1243 Exceptions :
|
|
1244 Caller : general
|
|
1245 Status : At risk (behaviour on exceptions could change)
|
|
1246
|
|
1247 =cut
|
|
1248
|
|
1249 sub remove_nodes {
|
|
1250 my $self = shift;
|
|
1251 my $nodes = shift;
|
|
1252
|
|
1253 foreach my $node (@$nodes) {
|
|
1254 if ($node->is_leaf) {
|
|
1255 $node->disavow_parent;
|
|
1256 $self = $self->minimize_tree;
|
|
1257 } else {
|
|
1258 my $node_children = $node->children;
|
|
1259 foreach my $child (@$node_children) {
|
|
1260 $node->parent->add_child($child);
|
|
1261 }
|
|
1262 $node->disavow_parent;
|
|
1263 }
|
|
1264 # Delete dangling one-child trees (help memory manager)
|
|
1265 if ($self->get_child_count == 1) {
|
|
1266 my $child = $self->children->[0];
|
|
1267 $child->parent->merge_children($child);
|
|
1268 $child->disavow_parent;
|
|
1269 return undef;
|
|
1270 }
|
|
1271 # Could be zero if all asked to delete, so return undef instead of
|
|
1272 # fake one-node tree.
|
|
1273 if ($self->get_child_count < 2) {
|
|
1274 return undef;
|
|
1275 }
|
|
1276 }
|
|
1277 return $self;
|
|
1278 }
|
|
1279
|
|
1280
|
|
1281 =head2 delete_lineage
|
|
1282
|
|
1283 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node
|
|
1284 Example : $tree->delete_lineage($node);
|
|
1285 Description : Removes $node from tree. Nodes should be in the tree.
|
|
1286 Returntype :
|
|
1287 Exceptions :
|
|
1288 Caller : general
|
|
1289 Status : At risk (behaviour on exceptions could change)
|
|
1290
|
|
1291 =cut
|
|
1292
|
|
1293 sub delete_lineage {
|
|
1294 my $self = shift;
|
|
1295 my $del_me = shift;
|
|
1296
|
|
1297 throw("arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a [$self]")
|
|
1298 unless ($self->isa('Bio::EnsEMBL::Compara::NestedSet'));
|
|
1299
|
|
1300 my $parent = $del_me->parent;
|
|
1301 while ($parent) {
|
|
1302 my $num_children = scalar @{$parent->children};
|
|
1303 if ($num_children > 1) {
|
|
1304 $self->remove_nodes([$del_me]);
|
|
1305 return $self;
|
|
1306 } elsif ($num_children == 1) {
|
|
1307 $self->remove_nodes([$del_me]);
|
|
1308 $del_me = $parent;
|
|
1309 $parent = $del_me->parent;
|
|
1310 }
|
|
1311 }
|
|
1312 return $self;
|
|
1313 }
|
|
1314
|
|
1315 =head2 minimize_tree
|
|
1316
|
|
1317 Arg [1] : -none-
|
|
1318 Example : $leaf->disavow_parent();
|
|
1319 $tree = $tree->minimize_tree();
|
|
1320 Description : Returns the tree after removing internal nodes that do not
|
|
1321 represent an multi- or bi-furcation anymore. This is typically
|
|
1322 required after disavowing a node. Please ensure you use the
|
|
1323 object returned by the method and not the original object
|
|
1324 anymore!
|
|
1325 Returntype : Bio::EnsEMBL::Compara::NestedSet object
|
|
1326 Exceptions :
|
|
1327 Caller : general
|
|
1328 Status : Stable
|
|
1329
|
|
1330 =cut
|
|
1331
|
|
1332 sub minimize_tree {
|
|
1333 my $self = shift;
|
|
1334 return $self if($self->is_leaf);
|
|
1335
|
|
1336 foreach my $child (@{$self->children}) {
|
|
1337 $child->minimize_tree;
|
|
1338 }
|
|
1339 return $self->minimize_node;
|
|
1340 }
|
|
1341
|
|
1342
|
|
1343 sub minimize_node {
|
|
1344 my $self = shift;
|
|
1345
|
|
1346 return $self unless($self->get_child_count() == 1);
|
|
1347
|
|
1348 my $child = $self->children->[0];
|
|
1349 my $dist = $child->distance_to_parent + $self->distance_to_parent;
|
|
1350 if ($self->parent) {
|
|
1351 $self->parent->add_child($child, $dist);
|
|
1352 $self->disavow_parent;
|
|
1353 } else {
|
|
1354 $child->disavow_parent;
|
|
1355 }
|
|
1356 return $child
|
|
1357 }
|
|
1358
|
|
1359
|
|
1360 sub scale {
|
|
1361 my $self = shift;
|
|
1362 my $scale = shift;
|
|
1363
|
|
1364 foreach my $node (@{$self->get_all_nodes}) {
|
|
1365 my $bl = $node->distance_to_parent;
|
|
1366 $bl = 0 unless (defined $bl);
|
|
1367 $node->distance_to_parent($bl*$scale);
|
|
1368 }
|
|
1369 return $self;
|
|
1370 }
|
|
1371
|
|
1372
|
|
1373 sub scale_max_to {
|
|
1374 my $self = shift;
|
|
1375 my $new_max = shift;
|
|
1376
|
|
1377 my $max_dist = $self->max_distance;
|
|
1378 my $scale_factor = $new_max / $max_dist;
|
|
1379 return $self->scale($scale_factor);
|
|
1380 }
|
|
1381
|
|
1382
|
|
1383
|
|
1384 ##################################
|
|
1385 #
|
|
1386 # search methods
|
|
1387 #
|
|
1388 ##################################
|
|
1389
|
|
1390 sub find_node_by_name {
|
|
1391 my $self = shift;
|
|
1392 my $name = shift;
|
|
1393
|
|
1394 return $self if((defined $self->name) && $name eq $self->name);
|
|
1395
|
|
1396 my $children = $self->children;
|
|
1397 foreach my $child_node (@$children) {
|
|
1398 my $found = $child_node->find_node_by_name($name);
|
|
1399 return $found if(defined($found));
|
|
1400 }
|
|
1401
|
|
1402 return undef;
|
|
1403 }
|
|
1404
|
|
1405 sub find_node_by_node_id {
|
|
1406 my $self = shift;
|
|
1407 my $node_id = shift;
|
|
1408
|
|
1409 return $self if($node_id eq $self->node_id);
|
|
1410
|
|
1411 my $children = $self->children;
|
|
1412 foreach my $child_node (@$children) {
|
|
1413 my $found = $child_node->find_node_by_node_id($node_id);
|
|
1414 return $found if(defined($found));
|
|
1415 }
|
|
1416
|
|
1417 return undef;
|
|
1418 }
|
|
1419
|
|
1420 sub find_leaf_by_name {
|
|
1421 my $self = shift;
|
|
1422 my $name = shift;
|
|
1423
|
|
1424 return $self if((defined $self->name) and ($name eq $self->name));
|
|
1425
|
|
1426 my $leaves = $self->get_all_leaves;
|
|
1427 foreach my $leaf (@$leaves) {
|
|
1428 return $leaf if((defined $leaf->name) and ($name eq $leaf->name));
|
|
1429 }
|
|
1430
|
|
1431 return undef;
|
|
1432 }
|
|
1433
|
|
1434 sub find_leaf_by_node_id {
|
|
1435 my $self = shift;
|
|
1436 my $node_id = shift;
|
|
1437
|
|
1438 return $self if($node_id eq $self->node_id);
|
|
1439
|
|
1440 my $leaves = $self->get_all_leaves;
|
|
1441 foreach my $leaf (@$leaves) {
|
|
1442 return $leaf if($node_id eq $leaf->node_id);
|
|
1443 }
|
|
1444
|
|
1445 return undef;
|
|
1446 }
|
|
1447
|
|
1448
|
|
1449 =head2 get_all_sorted_leaves
|
|
1450
|
|
1451 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $top_leaf
|
|
1452 Arg [...] : (optional) Bio::EnsEMBL::Compara::NestedSet $secondary_priority_leaf
|
|
1453 Example : my $sorted_leaves = $object->get_all_sorted_leaves($human_leaf);
|
|
1454 Example : my $sorted_leaves = $object->get_all_sorted_leaves($human_leaf, $mouse_leaf);
|
|
1455 Description : Sorts the tree such as $top_leaf is the first leave and returns
|
|
1456 all the other leaves in the order defined by the tree.
|
|
1457 It is possible to define as many secondary top leaves as you require
|
|
1458 to sort other branches of the tree. The priority to sort the trees
|
|
1459 is defined by the order in which you specify the leaves.
|
|
1460 Returntype : listref of Bio::EnsEMBL::Compara::NestedSet (all sorted leaves)
|
|
1461 Exceptions : none
|
|
1462 Caller : general
|
|
1463 Status : Stable
|
|
1464
|
|
1465 =cut
|
|
1466
|
|
1467 sub get_all_sorted_leaves {
|
|
1468 my ($self, @priority_leaves) = @_;
|
|
1469
|
|
1470 if (!@priority_leaves) {
|
|
1471 return $self->get_all_leaves;
|
|
1472 }
|
|
1473
|
|
1474 # Assign priority scores for all parent nodes of the priority leaves
|
|
1475 my $score_by_node;
|
|
1476 my $score = 0;
|
|
1477 # Loop through all the priority leaves, starting from the last one (lowest score)
|
|
1478 while (my $priority_leaf = pop @priority_leaves) {
|
|
1479 $score++; # Increases the score, next priority leaves (earlier in the argument list) will overwrite the score if needed
|
|
1480 my $this_node = $priority_leaf;
|
|
1481 # Loop through all the parent node up to the root of the tree
|
|
1482 do {
|
|
1483 $score_by_node->{$this_node} = $score;
|
|
1484 $this_node = $this_node->parent;
|
|
1485 } while ($this_node);
|
|
1486 }
|
|
1487
|
|
1488 my $sorted_leaves = $self->_recursive_get_all_sorted_leaves($score_by_node);
|
|
1489
|
|
1490 return $sorted_leaves;
|
|
1491 }
|
|
1492
|
|
1493 =head2 _recursive_get_all_sorted_leaves
|
|
1494
|
|
1495 Arg [1] : hashref $score_by_node
|
|
1496 Example : my $sorted_leaves = $object->_recursive_get_all_sorted_leaves($score_by_node);
|
|
1497 Description : Recursive code for the get_all_sorted_leaves() method
|
|
1498 Returntype : listref of Bio::EnsEMBL::Compara::NestedSet (sorted leaves)
|
|
1499 Exceptions : none
|
|
1500 Caller : private
|
|
1501 Status : Stable
|
|
1502
|
|
1503 =cut
|
|
1504
|
|
1505 sub _recursive_get_all_sorted_leaves {
|
|
1506 my $self = shift;
|
|
1507 my $score_by_node = shift;
|
|
1508
|
|
1509 my $sorted_leaves = [];
|
|
1510 my $children = $self->children;
|
|
1511
|
|
1512 if (@$children == 0) {
|
|
1513 $sorted_leaves = [$self];
|
|
1514 } else {
|
|
1515 $children = [sort {
|
|
1516 ($score_by_node->{$b} || $score_by_node->{$a}) ? (($score_by_node->{$b} || 0)<=>($score_by_node->{$a} || 0)) : ($a->node_id <=> $b->node_id)
|
|
1517 } @$children];
|
|
1518 for (my $i = 0; $i < @$children; $i++) {
|
|
1519 push(@$sorted_leaves, @{$children->[$i]->_recursive_get_all_sorted_leaves($score_by_node)});
|
|
1520 }
|
|
1521 }
|
|
1522
|
|
1523 return $sorted_leaves;
|
|
1524 }
|
|
1525
|
|
1526 =head2 get_all_leaves
|
|
1527
|
|
1528 Title : get_all_leaves
|
|
1529 Usage : my @leaves = @{$tree->get_all_leaves};
|
|
1530 Function: searching from the given starting node, searches and creates list
|
|
1531 of all leaves in this subtree and returns by reference
|
|
1532 Example :
|
|
1533 Returns : reference to list of NestedSet objects (all leaves)
|
|
1534 Args : none
|
|
1535
|
|
1536 =cut
|
|
1537
|
|
1538 sub get_all_leaves {
|
|
1539 my $self = shift;
|
|
1540
|
|
1541 my $leaves = {};
|
|
1542 $self->_recursive_get_all_leaves($leaves);
|
|
1543 my @leaf_list = sort {$a->node_id <=> $b->node_id} values(%{$leaves});
|
|
1544 return \@leaf_list;
|
|
1545 }
|
|
1546
|
|
1547 sub _recursive_get_all_leaves {
|
|
1548 my $self = shift;
|
|
1549 my $leaves = shift;
|
|
1550
|
|
1551 $leaves->{$self->obj_id} = $self if($self->is_leaf);
|
|
1552
|
|
1553 foreach my $child (@{$self->children}) {
|
|
1554 no warnings 'recursion';
|
|
1555 $child->_recursive_get_all_leaves($leaves);
|
|
1556 }
|
|
1557 }
|
|
1558
|
|
1559
|
|
1560 sub get_all_leaves_indexed {
|
|
1561 my $self = shift;
|
|
1562 deprecate("Use Bio::EnsEMBL::Compara::DBSQL::GeneTreeNodeAdaptor->fetch_all_leaves_indexed() method instead");
|
|
1563
|
|
1564 my @leaf_list = @{$self->adaptor->fetch_all_leaves_indexed($self)};
|
|
1565
|
|
1566 return \@leaf_list;
|
|
1567 }
|
|
1568
|
|
1569 =head2 max_distance
|
|
1570
|
|
1571 Title : max_distance
|
|
1572 Args : none
|
|
1573 Usage : $tree_node->max_distance;
|
|
1574 Function: searching from the given starting node, calculates the maximum distance to a leaf
|
|
1575 Returns : int
|
|
1576
|
|
1577 =cut
|
|
1578
|
|
1579 sub max_distance {
|
|
1580 my $self = shift;
|
|
1581
|
|
1582 my $max_distance = 0;
|
|
1583
|
|
1584 foreach my $child (@{$self->children}) {
|
|
1585 my $distance = $child->max_distance;
|
|
1586 $max_distance = $distance if($distance>$max_distance);
|
|
1587 }
|
|
1588
|
|
1589 return ($self->distance_to_parent + $max_distance);
|
|
1590 }
|
|
1591
|
|
1592
|
|
1593 =head2 max_depth
|
|
1594
|
|
1595 Title : max_depth
|
|
1596 Args : none
|
|
1597 Usage : $tree_node->max_depth;
|
|
1598 Function: searching from the given starting node, calculates the maximum depth to a leaf
|
|
1599 Returns : int
|
|
1600
|
|
1601 =cut
|
|
1602
|
|
1603 sub max_depth {
|
|
1604 my $self = shift;
|
|
1605
|
|
1606 my $max_depth = 0;
|
|
1607
|
|
1608 foreach my $child (@{$self->children}) {
|
|
1609 my $depth = $child->max_depth + 1;
|
|
1610 $max_depth=$depth if($depth>$max_depth);
|
|
1611 }
|
|
1612 return $max_depth;
|
|
1613 }
|
|
1614
|
|
1615
|
|
1616 =head2 find_first_shared_ancestor
|
|
1617
|
|
1618 Arg [1] : Bio::EnsEMBL::Compara::NestedSet $node
|
|
1619 Example : my $ancestor = $this_node->find_first_shared_ancestor($other_node);
|
|
1620 Description : Gets the first common ancestor between this node and the other one.
|
|
1621 Returntype : Bio::EnsEMBL::Compara::NestedSet object
|
|
1622 Exceptions :
|
|
1623 Caller : general
|
|
1624 Status : Stable
|
|
1625
|
|
1626 =cut
|
|
1627
|
|
1628 sub find_first_shared_ancestor {
|
|
1629 my $self = shift;
|
|
1630 my $node = shift;
|
|
1631
|
|
1632 return $self if($self->equals($node));
|
|
1633 return $node if($self->has_ancestor($node));
|
|
1634 return $self->find_first_shared_ancestor($node->parent);
|
|
1635 }
|
|
1636
|
|
1637
|
|
1638 sub find_first_shared_ancestor_from_leaves {
|
|
1639 my $self = shift;
|
|
1640 my $leaf_list = shift;
|
|
1641
|
|
1642 my @leaves = @{$leaf_list};
|
|
1643
|
|
1644 my $ancestor = shift @leaves;
|
|
1645 while (scalar @leaves > 0) {
|
|
1646 my $node = shift @leaves;
|
|
1647 $ancestor = $ancestor->find_first_shared_ancestor($node);
|
|
1648 }
|
|
1649 return $ancestor;
|
|
1650 }
|
|
1651
|
|
1652
|
|
1653 ##################################
|
|
1654 #
|
|
1655 # developer/adaptor API methods
|
|
1656 #
|
|
1657 ##################################
|
|
1658
|
|
1659
|
|
1660 # used for building tree from a DB fetch, want to restrict users to create trees
|
|
1661 # by only -add_child method
|
|
1662 sub _set_parent_link {
|
|
1663 my ($self, $link) = @_;
|
|
1664
|
|
1665 $self->{'_parent_id'} = 0;
|
|
1666 $self->{'_parent_link'} = $link;
|
|
1667 $self->{'_parent_id'} = $link->get_neighbor($self)->node_id if($link);
|
|
1668 return $self;
|
|
1669 }
|
|
1670
|
|
1671
|
|
1672 # used for building tree from a DB fetch until all the objects are in memory
|
|
1673 sub _parent_id {
|
|
1674 my $self = shift;
|
|
1675 $self->{'_parent_id'} = shift if(@_);
|
|
1676 return $self->{'_parent_id'};
|
|
1677 }
|
|
1678
|
|
1679 # used for building tree from a DB fetch until all the objects are in memory
|
|
1680 sub _root_id {
|
|
1681 my $self = shift;
|
|
1682 $self->{'_root_id'} = shift if(@_);
|
|
1683 return $self->{'_root_id'};
|
|
1684 }
|
|
1685
|
|
1686 1;
|
|
1687
|