0
|
1 # $Id: Node.pm,v 1.17.2.3 2003/09/14 19:00:35 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Tree::Node
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tree::Node - A Simple Tree Node
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Tree::Node;
|
|
20 my $nodeA = new Bio::Tree::Node();
|
|
21 my $nodeL = new Bio::Tree::Node();
|
|
22 my $nodeR = new Bio::Tree::Node();
|
|
23
|
|
24 my $node = new Bio::Tree::Node();
|
|
25 $node->add_Descendent($nodeL);
|
|
26 $node->add_Descendent($nodeR);
|
|
27
|
|
28 print "node is not a leaf \n" if( $node->is_leaf);
|
|
29
|
|
30 =head1 DESCRIPTION
|
|
31
|
|
32 Makes a Tree Node suitable for building a Tree.
|
|
33
|
|
34 =head1 FEEDBACK
|
|
35
|
|
36 =head2 Mailing Lists
|
|
37
|
|
38 User feedback is an integral part of the evolution of this and other
|
|
39 Bioperl modules. Send your comments and suggestions preferably to
|
|
40 the Bioperl mailing list. Your participation is much appreciated.
|
|
41
|
|
42 bioperl-l@bioperl.org - General discussion
|
|
43 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
44
|
|
45 =head2 Reporting Bugs
|
|
46
|
|
47 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
48 of the bugs and their resolution. Bug reports can be submitted via
|
|
49 the web:
|
|
50
|
|
51 http://bugzilla.bioperl.org/
|
|
52
|
|
53 =head1 AUTHOR - Jason Stajich
|
|
54
|
|
55 Email jason@bioperl.org
|
|
56
|
|
57 =head1 CONTRIBUTORS
|
|
58
|
|
59 Aaron Mackey amackey@virginia.edu
|
|
60
|
|
61 =head1 APPENDIX
|
|
62
|
|
63 The rest of the documentation details each of the object methods.
|
|
64 Internal methods are usually preceded with a _
|
|
65
|
|
66 =cut
|
|
67
|
|
68
|
|
69 # Let the code begin...
|
|
70
|
|
71 package Bio::Tree::Node;
|
|
72 use vars qw(@ISA $CREATIONORDER);
|
|
73 use strict;
|
|
74
|
|
75 use Bio::Root::Root;
|
|
76 use Bio::Tree::NodeI;
|
|
77
|
|
78 @ISA = qw(Bio::Root::Root Bio::Tree::NodeI);
|
|
79
|
|
80 BEGIN {
|
|
81 $CREATIONORDER = 0;
|
|
82 }
|
|
83
|
|
84
|
|
85 =head2 new
|
|
86
|
|
87 Title : new
|
|
88 Usage : my $obj = new Bio::Tree::Node();
|
|
89 Function: Builds a new Bio::Tree::Node object
|
|
90 Returns : Bio::Tree::Node
|
|
91 Args : -left => pointer to Left descendent (optional)
|
|
92 -right => pointer to Right descenent (optional)
|
|
93 -branch_length => branch length [integer] (optional)
|
|
94 -bootstrap => value bootstrap value (string)
|
|
95 -description => description of node
|
|
96 -id => human readable id for node
|
|
97
|
|
98 =cut
|
|
99
|
|
100 sub new {
|
|
101 my($class,@args) = @_;
|
|
102
|
|
103 my $self = $class->SUPER::new(@args);
|
|
104 my ($children, $branchlen,$id,
|
|
105 $bootstrap, $desc,$d) = $self->_rearrange([qw(DESCENDENTS
|
|
106 BRANCH_LENGTH
|
|
107 ID
|
|
108 BOOTSTRAP
|
|
109 DESC
|
|
110 DESCRIPTION
|
|
111 )],
|
|
112 @args);
|
|
113 $self->_register_for_cleanup(\&node_cleanup);
|
|
114 $self->{'_desc'} = {}; # for descendents
|
|
115 if( $d && $desc ) {
|
|
116 $self->warn("can only accept -desc or -description, not both, accepting -description");
|
|
117 $desc = $d;
|
|
118 } elsif( defined $d && ! defined $desc ) {
|
|
119 $desc = $d;
|
|
120 }
|
|
121 defined $desc && $self->description($desc);
|
|
122 defined $bootstrap && $self->bootstrap($bootstrap);
|
|
123 defined $id && $self->id($id);
|
|
124 defined $branchlen && $self->branch_length($branchlen);
|
|
125
|
|
126 if( defined $children ) {
|
|
127 if( ref($children) !~ /ARRAY/i ) {
|
|
128 $self->warn("Must specify a valid ARRAY reference to initialize a Node's Descendents");
|
|
129 }
|
|
130 foreach my $c ( @$children ) {
|
|
131 $self->add_Descendent($c);
|
|
132 }
|
|
133 }
|
|
134 $self->_creation_id($CREATIONORDER++);
|
|
135 return $self;
|
|
136 }
|
|
137
|
|
138 =head2 add_Descendent
|
|
139
|
|
140 Title : add_Descendent
|
|
141 Usage : $node->add_Descendant($node);
|
|
142 Function: Adds a descendent to a node
|
|
143 Returns : number of current descendents for this node
|
|
144 Args : Bio::Node::NodeI
|
|
145 boolean flag, true if you want to ignore the fact that you are
|
|
146 adding a second node with the same unique id (typically memory
|
|
147 location reference in this implementation). default is false and
|
|
148 will throw an error if you try and overwrite an existing node.
|
|
149
|
|
150 =cut
|
|
151
|
|
152 sub add_Descendent{
|
|
153 my ($self,$node,$ignoreoverwrite) = @_;
|
|
154 return -1 if( ! defined $node ) ;
|
|
155 if( ! $node->isa('Bio::Tree::NodeI') ) {
|
|
156 $self->warn("Trying to add a Descendent who is not a Bio::Tree::NodeI");
|
|
157 return -1;
|
|
158 }
|
|
159 # do we care about order?
|
|
160 $node->ancestor($self);
|
|
161 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
|
|
162 $self->throw("Going to overwrite a node which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
|
|
163 }
|
|
164
|
|
165 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
|
|
166
|
|
167 $self->invalidate_height();
|
|
168
|
|
169 return scalar keys %{$self->{'_desc'}};
|
|
170 }
|
|
171
|
|
172
|
|
173 =head2 each_Descendent
|
|
174
|
|
175 Title : each_Descendent($sortby)
|
|
176 Usage : my @nodes = $node->each_Descendent;
|
|
177 Function: all the descendents for this Node (but not their descendents
|
|
178 i.e. not a recursive fetchall)
|
|
179 Returns : Array of Bio::Tree::NodeI objects
|
|
180 Args : $sortby [optional] "height", "creation" or coderef to be used
|
|
181 to sort the order of children nodes.
|
|
182
|
|
183 =cut
|
|
184
|
|
185 sub each_Descendent{
|
|
186 my ($self, $sortby) = @_;
|
|
187
|
|
188 # order can be based on branch length (and sub branchlength)
|
|
189
|
|
190 $sortby ||= 'height';
|
|
191
|
|
192 if (ref $sortby eq 'CODE') {
|
|
193 return sort $sortby values %{$self->{'_desc'}};
|
|
194 } else {
|
|
195 if ($sortby eq 'height') {
|
|
196 return map { $_->[0] }
|
|
197 sort { $a->[1] <=> $b->[1] ||
|
|
198 $a->[2] <=> $b->[2] }
|
|
199 map { [$_, $_->height, $_->internal_id ] }
|
|
200 values %{$self->{'_desc'}};
|
|
201 } else {
|
|
202 return map { $_->[0] }
|
|
203 sort { $a->[1] <=> $b->[1] }
|
|
204 map { [$_, $_->height ] }
|
|
205 values %{$self->{'_desc'}};
|
|
206 }
|
|
207 }
|
|
208 }
|
|
209
|
|
210 =head2 remove_Descendent
|
|
211
|
|
212 Title : remove_Descendent
|
|
213 Usage : $node->remove_Descedent($node_foo);
|
|
214 Function: Removes a specific node from being a Descendent of this node
|
|
215 Returns : nothing
|
|
216 Args : An array of Bio::Node::NodeI objects which have be previously
|
|
217 passed to the add_Descendent call of this object.
|
|
218
|
|
219 =cut
|
|
220
|
|
221 sub remove_Descendent{
|
|
222 my ($self,@nodes) = @_;
|
|
223 my $c= 0;
|
|
224 foreach my $n ( @nodes ) {
|
|
225 if( $self->{'_desc'}->{$n->internal_id} ) {
|
|
226 $n->ancestor(undef);
|
|
227 # should be redundant
|
|
228 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
|
|
229 delete $self->{'_desc'}->{$n->internal_id};
|
|
230 my $a1 = $self->ancestor;
|
|
231 # remove unecessary nodes if we have removed the part
|
|
232 # which branches.
|
|
233 # if( $a1 ) {
|
|
234 # my $bl = $self->branch_length || 0;
|
|
235 # my @d = $self->each_Descendent;
|
|
236 # if (scalar @d == 1) {
|
|
237 # $d[0]->branch_length($bl + ($d[0]->branch_length || 0));
|
|
238 # $a1->add_Descendent($d[0]);
|
|
239 # }
|
|
240 # $a1->remove_Descendent($self);
|
|
241 #}
|
|
242 $c++;
|
|
243 } else {
|
|
244 if( $self->verbose ) {
|
|
245 $self->debug(sprintf("no node %s (%s) listed as a descendent in this node %s (%s)\n",$n->id, $n,$self->id,$self));
|
|
246 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
|
|
247 }
|
|
248 }
|
|
249 }
|
|
250 $c;
|
|
251 }
|
|
252
|
|
253
|
|
254 =head2 remove_all_Descendents
|
|
255
|
|
256 Title : remove_all_Descendents
|
|
257 Usage : $node->remove_All_Descendents()
|
|
258 Function: Cleanup the node's reference to descendents and reset
|
|
259 their ancestor pointers to undef, if you don't have a reference
|
|
260 to these objects after this call they will be cleaned up - so
|
|
261 a get_nodes from the Tree object would be a safe thing to do first
|
|
262 Returns : nothing
|
|
263 Args : none
|
|
264
|
|
265
|
|
266 =cut
|
|
267
|
|
268 sub remove_all_Descendents{
|
|
269 my ($self) = @_;
|
|
270 # this won't cleanup the nodes themselves if you also have
|
|
271 # a copy/pointer of them (I think)...
|
|
272 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
|
|
273 $val->ancestor(undef);
|
|
274 }
|
|
275 $self->{'_desc'} = {};
|
|
276 1;
|
|
277 }
|
|
278
|
|
279 =head2 get_all_Descendents
|
|
280
|
|
281 Title : get_all_Descendents
|
|
282 Usage : my @nodes = $node->get_all_Descendents;
|
|
283 Function: Recursively fetch all the nodes and their descendents
|
|
284 *NOTE* This is different from each_Descendent
|
|
285 Returns : Array or Bio::Tree::NodeI objects
|
|
286 Args : none
|
|
287
|
|
288 =cut
|
|
289
|
|
290 # implemented in the interface
|
|
291
|
|
292 =head2 ancestor
|
|
293
|
|
294 Title : ancestor
|
|
295 Usage : $obj->ancestor($newval)
|
|
296 Function: Set the Ancestor
|
|
297 Returns : value of ancestor
|
|
298 Args : newvalue (optional)
|
|
299
|
|
300 =cut
|
|
301
|
|
302 sub ancestor{
|
|
303 my $self = shift;
|
|
304 $self->{'_ancestor'} = shift @_ if @_;
|
|
305 return $self->{'_ancestor'};
|
|
306 }
|
|
307
|
|
308 =head2 branch_length
|
|
309
|
|
310 Title : branch_length
|
|
311 Usage : $obj->branch_length()
|
|
312 Function: Get/Set the branch length
|
|
313 Returns : value of branch_length
|
|
314 Args : newvalue (optional)
|
|
315
|
|
316
|
|
317 =cut
|
|
318
|
|
319 sub branch_length{
|
|
320 my $self = shift;
|
|
321 if( @_ ) {
|
|
322 my $bl = shift;
|
|
323 if( defined $bl &&
|
|
324 $bl =~ s/\[(\d+)\]// ) {
|
|
325 $self->bootstrap($1);
|
|
326 }
|
|
327 $self->{'_branch_length'} = $bl;
|
|
328 }
|
|
329 return $self->{'_branch_length'};
|
|
330 }
|
|
331
|
|
332
|
|
333 =head2 bootstrap
|
|
334
|
|
335 Title : bootstrap
|
|
336 Usage : $obj->bootstrap($newval)
|
|
337 Function: Get/Set the bootstrap value
|
|
338 Returns : value of bootstrap
|
|
339 Args : newvalue (optional)
|
|
340
|
|
341
|
|
342 =cut
|
|
343
|
|
344 sub bootstrap {
|
|
345 my $self = shift;
|
|
346 if( @_ ) {
|
|
347 if( $self->has_tag('B') ) {
|
|
348 $self->remove_tag('B');
|
|
349 }
|
|
350 $self->add_tag_value('B',shift);
|
|
351 }
|
|
352 return ($self->get_tag_values('B'))[0];
|
|
353 }
|
|
354
|
|
355 =head2 description
|
|
356
|
|
357 Title : description
|
|
358 Usage : $obj->description($newval)
|
|
359 Function: Get/Set the description string
|
|
360 Returns : value of description
|
|
361 Args : newvalue (optional)
|
|
362
|
|
363
|
|
364 =cut
|
|
365
|
|
366 sub description{
|
|
367 my $self = shift;
|
|
368 $self->{'_description'} = shift @_ if @_;
|
|
369 return $self->{'_description'};
|
|
370 }
|
|
371
|
|
372 =head2 id
|
|
373
|
|
374 Title : id
|
|
375 Usage : $obj->id($newval)
|
|
376 Function: The human readable identifier for the node
|
|
377 Returns : value of human readable id
|
|
378 Args : newvalue (optional)
|
|
379 Note : id cannot contain the chracters '();:'
|
|
380
|
|
381 "A name can be any string of printable characters except blanks,
|
|
382 colons, semicolons, parentheses, and square brackets. Because you may
|
|
383 want to include a blank in a name, it is assumed that an underscore
|
|
384 character ("_") stands for a blank; any of these in a name will be
|
|
385 converted to a blank when it is read in."
|
|
386
|
|
387 from L<http://evolution.genetics.washington.edu/phylip/newicktree.html>
|
|
388
|
|
389 =cut
|
|
390
|
|
391 sub id{
|
|
392 my ($self, $value) = @_;
|
|
393 if ($value) {
|
|
394 $self->warn("Illegal characters ();: and space in the id [$value], converting to _ ")
|
|
395 if $value =~ /\(\);:/ and $self->verbose >= 0;
|
|
396 $value =~ s/[\(\);:\s]/_/g;
|
|
397 $self->{'_id'} = $value;
|
|
398 }
|
|
399 return $self->{'_id'};
|
|
400 }
|
|
401
|
|
402 =head2 internal_id
|
|
403
|
|
404 Title : internal_id
|
|
405 Usage : my $internalid = $node->internal_id
|
|
406 Function: Returns the internal unique id for this Node
|
|
407 (a monotonically increasing number for this in-memory implementation
|
|
408 but could be a database determined unique id in other
|
|
409 implementations)
|
|
410 Returns : unique id
|
|
411 Args : none
|
|
412
|
|
413 =cut
|
|
414
|
|
415 sub internal_id{
|
|
416 return $_[0]->_creation_id;
|
|
417 }
|
|
418
|
|
419
|
|
420 =head2 _creation_id
|
|
421
|
|
422 Title : _creation_id
|
|
423 Usage : $obj->_creation_id($newval)
|
|
424 Function: a private method signifying the internal creation order
|
|
425 Returns : value of _creation_id
|
|
426 Args : newvalue (optional)
|
|
427
|
|
428
|
|
429 =cut
|
|
430
|
|
431 sub _creation_id{
|
|
432 my $self = shift @_;
|
|
433 $self->{'_creation_id'} = shift @_ if( @_);
|
|
434 return $self->{'_creation_id'} || 0;
|
|
435 }
|
|
436
|
|
437 =head2 Bio::Node::NodeI decorated interface implemented
|
|
438
|
|
439 The following methods are implemented by L<Bio::Node::NodeI> decorated
|
|
440 interface.
|
|
441
|
|
442 =head2 is_Leaf
|
|
443
|
|
444 Title : is_Leaf
|
|
445 Usage : if( $node->is_Leaf )
|
|
446 Function: Get Leaf status
|
|
447 Returns : boolean
|
|
448 Args : none
|
|
449
|
|
450 =cut
|
|
451
|
|
452 sub is_Leaf {
|
|
453 my ($self) = @_;
|
|
454 my $isleaf = ! (defined $self->{'_desc'} &&
|
|
455 (keys %{$self->{'_desc'}} > 0) );
|
|
456 return $isleaf;
|
|
457 }
|
|
458
|
|
459 =head2 to_string
|
|
460
|
|
461 Title : to_string
|
|
462 Usage : my $str = $node->to_string()
|
|
463 Function: For debugging, provide a node as a string
|
|
464 Returns : string
|
|
465 Args : none
|
|
466
|
|
467 =head2 height
|
|
468
|
|
469 Title : height
|
|
470 Usage : my $len = $node->height
|
|
471 Function: Returns the height of the tree starting at this
|
|
472 node. Height is the maximum branchlength.
|
|
473 Returns : The longest length (weighting branches with branch_length) to a leaf
|
|
474 Args : none
|
|
475
|
|
476 =cut
|
|
477
|
|
478 sub height {
|
|
479 my ($self) = @_;
|
|
480
|
|
481 return $self->{'_height'} if( defined $self->{'_height'} );
|
|
482
|
|
483 if( $self->is_Leaf ) {
|
|
484 if( !defined $self->branch_length ) {
|
|
485 $self->debug(sprintf("Trying to calculate height of a node when a Node (%s) has an undefined branch_length\n",$self->id || '?' ));
|
|
486 return 0;
|
|
487 }
|
|
488 return $self->branch_length;
|
|
489 }
|
|
490 my $max = 0;
|
|
491 foreach my $subnode ( $self->each_Descendent ) {
|
|
492 my $s = $subnode->height;
|
|
493 if( $s > $max ) { $max = $s; }
|
|
494 }
|
|
495 return ($self->{'_height'} = $max + ($self->branch_length || 1));
|
|
496 }
|
|
497
|
|
498
|
|
499 =head2 invalidate_height
|
|
500
|
|
501 Title : invalidate_height
|
|
502 Usage : private helper method
|
|
503 Function: Invalidate our cached value of the node's height in the tree
|
|
504 Returns : nothing
|
|
505 Args : none
|
|
506
|
|
507 =cut
|
|
508
|
|
509 #'
|
|
510
|
|
511 sub invalidate_height {
|
|
512 my ($self) = @_;
|
|
513
|
|
514 $self->{'_height'} = undef;
|
|
515 if( $self->ancestor ) {
|
|
516 $self->ancestor->invalidate_height;
|
|
517 }
|
|
518 }
|
|
519
|
|
520 =head2 add_tag_value
|
|
521
|
|
522 Title : add_tag_value
|
|
523 Usage : $node->add_tag_value($tag,$value)
|
|
524 Function: Adds a tag value to a node
|
|
525 Returns : number of values stored for this tag
|
|
526 Args : $tag - tag name
|
|
527 $value - value to store for the tag
|
|
528
|
|
529
|
|
530 =cut
|
|
531
|
|
532 sub add_tag_value{
|
|
533 my ($self,$tag,$value) = @_;
|
|
534 if( ! defined $tag || ! defined $value ) {
|
|
535 $self->warn("cannot call add_tag_value with an undefined value");
|
|
536 }
|
|
537 push @{$self->{'_tags'}->{$tag}}, $value;
|
|
538 return scalar @{$self->{'_tags'}->{$tag}};
|
|
539 }
|
|
540
|
|
541 =head2 remove_tag
|
|
542
|
|
543 Title : remove_tag
|
|
544 Usage : $node->remove_tag($tag)
|
|
545 Function: Remove the tag and all values for this tag
|
|
546 Returns : boolean representing success (0 if tag does not exist)
|
|
547 Args : $tag - tagname to remove
|
|
548
|
|
549
|
|
550 =cut
|
|
551
|
|
552 sub remove_tag {
|
|
553 my ($self,$tag) = @_;
|
|
554 if( exists $self->{'_tags'}->{$tag} ) {
|
|
555 $self->{'_tags'}->{$tag} = undef;
|
|
556 delete $self->{'_tags'}->{$tag};
|
|
557 return 1;
|
|
558 }
|
|
559 return 0;
|
|
560 }
|
|
561
|
|
562 =head2 remove_all_tags
|
|
563
|
|
564 Title : remove_all_tags
|
|
565 Usage : $node->remove_all_tags()
|
|
566 Function: Removes all tags
|
|
567 Returns : None
|
|
568 Args : None
|
|
569
|
|
570
|
|
571 =cut
|
|
572
|
|
573 sub remove_all_tags{
|
|
574 my ($self) = @_;
|
|
575 $self->{'_tags'} = {};
|
|
576 return;
|
|
577 }
|
|
578
|
|
579 =head2 get_all_tags
|
|
580
|
|
581 Title : get_all_tags
|
|
582 Usage : my @tags = $node->get_all_tags()
|
|
583 Function: Gets all the tag names for this Node
|
|
584 Returns : Array of tagnames
|
|
585 Args : None
|
|
586
|
|
587
|
|
588 =cut
|
|
589
|
|
590 sub get_all_tags{
|
|
591 my ($self) = @_;
|
|
592 return sort keys %{$self->{'_tags'} || {}};
|
|
593 }
|
|
594
|
|
595 =head2 get_tag_values
|
|
596
|
|
597 Title : get_tag_values
|
|
598 Usage : my @values = $node->get_tag_value($tag)
|
|
599 Function: Gets the values for given tag ($tag)
|
|
600 Returns : Array of values or empty list if tag does not exist
|
|
601 Args : $tag - tag name
|
|
602
|
|
603
|
|
604 =cut
|
|
605
|
|
606 sub get_tag_values{
|
|
607 my ($self,$tag) = @_;
|
|
608 return @{$self->{'_tags'}->{$tag} || []};
|
|
609 }
|
|
610
|
|
611 =head2 has_tag
|
|
612
|
|
613 Title : has_tag
|
|
614 Usage : $node->has_tag($tag)
|
|
615 Function: Boolean test if tag exists in the Node
|
|
616 Returns : Boolean
|
|
617 Args : $tag - tagname
|
|
618
|
|
619
|
|
620 =cut
|
|
621
|
|
622 sub has_tag {
|
|
623 my ($self,$tag) = @_;
|
|
624 return exists $self->{'_tags'}->{$tag};
|
|
625 }
|
|
626
|
|
627 sub node_cleanup {
|
|
628 my $self = shift;
|
|
629 if( defined $self->{'_desc'} &&
|
|
630 ref($self->{'_desc'}) =~ /ARRAY/i ) {
|
|
631 while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
|
|
632 $node->ancestor(undef); # insure no circular references
|
|
633 $node = undef;
|
|
634 }
|
|
635 }
|
|
636 $self->{'_desc'} = {};
|
|
637 }
|
|
638
|
|
639 1;
|