comparison variant_effect_predictor/Bio/Taxonomy/Taxon.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Taxon.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $
2 #
3 # BioPerl module for Bio::Taxonomy::Taxon
4 #
5 # Cared for by Dan Kortschak but pilfered extensively from
6 # the Bio::Tree::Node code of Jason Stajich
7 #
8 # You may distribute this module under the same terms as perl itself
9
10 # POD documentation - main docs before the code
11
12 =head1 NAME
13
14 Bio::Taxonomy::Taxon - Generic Taxonomic Entity object
15
16 =head1 SYNOPSIS
17
18 use Bio::Taxonomy::Taxon;
19 my $taxonA = new Bio::Taxonomy::Taxon();
20 my $taxonL = new Bio::Taxonomy::Taxon();
21 my $taxonR = new Bio::Taxonomy::Taxon();
22
23 my $taxon = new Bio::Taxonomy::Taxon();
24 $taxon->add_Descendents($nodeL);
25 $taxon->add_Descendents($nodeR);
26
27 $species = $taxon->species;
28
29 =head1 DESCRIPTION
30
31 Makes a taxonomic unit suitable for use in a taxonomic tree
32
33 =head1 CONTACT
34
35 Dan Kortschak email B<kortschak@rsbs.anu.edu.au>
36
37 =head1 APPENDIX
38
39 The rest of the documentation details each of the object
40 methods. Internal methods are usually preceded with a _
41
42 =cut
43
44
45 # code begins...
46
47 package Bio::Taxonomy::Taxon;
48 use vars qw(@ISA $CREATIONORDER);
49 use strict;
50
51 # Object preamble - inherits from Bio::Root::Object, Bio::Tree::NodeI, Bio::Species and Bio::Taxonomy
52 use Bio::Root::Root;
53 use Bio::Tree::NodeI;
54 use Bio::Taxonomy;
55 use Bio::Species;
56
57 # import rank information from Bio::Taxonomy.pm
58 use vars qw(@RANK %RANK);
59
60 @ISA = qw(Bio::Root::Root Bio::Tree::NodeI);
61
62 BEGIN {
63 $CREATIONORDER = 0;
64 }
65
66 =head2 new
67
68 Title : new
69 Usage : my $obj = new Bio::Taxonomy::Taxon();
70 Function: Builds a new Bio::Taxonomy::Taxon object
71 Returns : Bio::Taxonomy::Taxon
72 Args : -descendents => array pointer to descendents (optional)
73 -branch_length => branch length [integer] (optional)
74 -taxon => taxon
75 -id => unique taxon id for node (from NCBI's list preferably)
76 -rank => the taxonomic level of the node (also from NCBI)
77
78 =cut
79
80 sub new {
81 my($class,@args) = @_;
82
83 my $self = $class->SUPER::new(@args);
84 my ($children,$branchlen,$id,$taxon,$rank,$desc) =
85
86 $self->_rearrange([qw(DESCENDENTS
87 BRANCH_LENGTH
88 ID
89 TAXON
90 RANK
91 DESC)], @args);
92
93 $self->{'_desc'} = {};
94 defined $desc && $self->description($desc);
95 defined $taxon && $self->taxon($taxon);
96 defined $id && $self->id($id);
97 defined $branchlen && $self->branch_length($branchlen);
98 defined $rank && $self->rank($rank);
99
100 if( defined $children ) {
101 if( ref($children) !~ /ARRAY/i ) {
102 $self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents");
103 }
104 foreach my $c ( @$children ) {
105 $self->add_Descendent($c);
106 }
107 }
108 $self->_creation_id($CREATIONORDER++);
109 return $self;
110 }
111
112 =head2 add_Descendent
113
114 Title : add_Descendent
115 Usage : $taxon->add_Descendant($taxon);
116 Function: Adds a descendent to a taxon
117 Returns : number of current descendents for this taxon
118 Args : Bio::Taxonomy::Taxon
119 boolean flag, true if you want to ignore the fact that you are
120 adding a second node with the same unique id (typically memory
121 location reference in this implementation). default is false and
122 will throw an error if you try and overwrite an existing node.
123
124
125 =cut
126
127 sub add_Descendent{
128
129 my ($self,$node,$ignoreoverwrite) = @_;
130
131 return -1 if( ! defined $node ) ;
132 if( ! $node->isa('Bio::Taxonomy::Taxon') ) {
133 $self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon");
134 return -1;
135 }
136 # do we care about order?
137 $node->{'_ancestor'} = $self;
138 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
139 $self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
140 }
141
142 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
143
144 $self->invalidate_height();
145
146 return scalar keys %{$self->{'_desc'}};
147 }
148
149
150 =head2 each_Descendent
151
152 Title : each_Descendent($sortby)
153 Usage : my @taxa = $taxon->each_Descendent;
154 Function: all the descendents for this taxon (but not their descendents
155 i.e. not a recursive fetchall)
156 Returns : Array of Bio::Taxonomy::Taxon objects
157 Args : $sortby [optional] "height", "creation" or coderef to be used
158 to sort the order of children taxa.
159
160
161 =cut
162
163 sub each_Descendent{
164 my ($self, $sortby) = @_;
165
166 # order can be based on branch length (and sub branchlength)
167
168 $sortby ||= 'height';
169
170 if (ref $sortby eq 'CODE') {
171 return sort $sortby values %{$self->{'_desc'}};
172 } else {
173 if ($sortby eq 'height') {
174 return map { $_->[0] }
175 sort { $a->[1] <=> $b->[1] ||
176 $a->[2] <=> $b->[2] }
177 map { [$_, $_->height, $_->internal_id ] }
178 values %{$self->{'_desc'}};
179 } else {
180 return map { $_->[0] }
181 sort { $a->[1] <=> $b->[1] }
182 map { [$_, $_->height ] }
183 values %{$self->{'_desc'}};
184 }
185 }
186 }
187
188 =head2 remove_Descendent
189
190 Title : remove_Descendent
191 Usage : $taxon->remove_Descedent($taxon_foo);
192 Function: Removes a specific taxon from being a Descendent of this taxon
193 Returns : nothing
194 Args : An array of Bio::taxonomy::Taxon objects which have be previously
195 passed to the add_Descendent call of this object.
196
197 =cut
198
199 sub remove_Descendent{
200 my ($self,@nodes) = @_;
201 foreach my $n ( @nodes ) {
202 if( $self->{'_desc'}->{$n->internal_id} ) {
203 $n->{'_ancestor'} = undef;
204 $self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef;
205 delete $self->{'_desc'}->{$n->internal_id};
206
207 } else {
208 $self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self));
209 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
210 }
211 }
212 1;
213 }
214
215
216 =head2 remove_all_Descendents
217
218 Title : remove_all_Descendents
219 Usage : $taxon->remove_All_Descendents()
220 Function: Cleanup the taxon's reference to descendents and reset
221 their ancestor pointers to undef, if you don't have a reference
222 to these objects after this call they will be cleanedup - so
223 a get_nodes from the Tree object would be a safe thing to do first
224 Returns : nothing
225 Args : none
226
227
228 =cut
229
230 sub remove_all_Descendents{
231 my ($self) = @_;
232 # this won't cleanup the taxa themselves if you also have
233 # a copy/pointer of them (I think)...
234 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
235 $val->{'_ancestor'} = undef;
236 }
237 $self->{'_desc'} = {};
238 1;
239 }
240
241 =head2 get_Descendents
242
243 Title : get_Descendents
244 Usage : my @taxa = $taxon->get_Descendents;
245 Function: Recursively fetch all the taxa and their descendents
246 *NOTE* This is different from each_Descendent
247 Returns : Array or Bio::Taxonomy::Taxon objects
248 Args : none
249
250 =cut
251
252 # implemented in the interface
253
254 =head2 ancestor
255
256 Title : ancestor
257 Usage : $taxon->ancestor($newval)
258 Function: Set the Ancestor
259 Returns : value of ancestor
260 Args : newvalue (optional)
261
262 =cut
263
264 sub ancestor {
265 my ($self, $value) = @_;
266 if (defined $value) {
267 $self->{'_ancestor'} = $value;
268 }
269 return $self->{'_ancestor'};
270 }
271
272 =head2 branch_length
273
274 Title : branch_length
275 Usage : $obj->branch_length($newval)
276 Function:
277 Example :
278 Returns : value of branch_length
279 Args : newvalue (optional)
280
281
282 =cut
283
284 sub branch_length {
285 my ($self,$value) = @_;
286 if( defined $value) {
287 $self->{'branch_length'} = $value;
288 }
289 return $self->{'branch_length'};
290 }
291
292 =head2 description
293
294 Title : description
295 Usage : $obj->description($newval)
296 Function:
297 Example :
298 Returns : value of description
299 Args : newvalue (optional)
300
301
302 =cut
303
304 sub description {
305 my ($self,$value) = @_;
306 if( defined $value ) {
307 $self->{'_desc'} = $value;
308 }
309 return $self->{'_desc'};
310 }
311
312
313 =head2 rank
314
315 Title : rank
316 Usage : $obj->rank($newval)
317 Function: Set the taxonomic rank
318 Example :
319 Returns : taxonomic rank of taxon
320 Args : newvalue (optional)
321
322
323 =cut
324
325 sub rank {
326 my ($self,$value) = @_;
327 if (defined $value) {
328 my $ranks=join("|",@RANK);
329 if ($value=~/$ranks/) {
330 $self->{'_rank'} = $value;
331 } else {
332 $self->throw("Attempted to set unknown taxonomic rank: $value.\n");
333 }
334 }
335 return $self->{'_rank'};
336 }
337
338
339 =head2 taxon
340
341 Title : taxon
342 Usage : $obj->taxon($newtaxon)
343 Function: Set the name of the taxon
344 Example :
345 Returns : name of taxon
346 Args : newtaxon (optional)
347
348
349 =cut
350
351 # because internal taxa have names too...
352 sub taxon {
353 my ($self,$value) = @_;
354 if( defined $value ) {
355 $self->{'_taxon'} = $value;
356 }
357 return $self->{'_taxon'};
358 }
359
360
361 =head2 id
362
363 Title : id
364 Usage : $obj->id($newval)
365 Function:
366 Example :
367 Returns : value of id
368 Args : newvalue (optional)
369
370
371 =cut
372
373 sub id {
374 my ($self,$value) = @_;
375 if( defined $value ) {
376 $self->{'_id'} = $value;
377 }
378 return $self->{'_id'};
379 }
380
381
382
383 sub DESTROY {
384 my ($self) = @_;
385 # try to insure that everything is cleaned up
386 $self->SUPER::DESTROY();
387 if( defined $self->{'_desc'} &&
388 ref($self->{'_desc'}) =~ /ARRAY/i ) {
389 while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
390 $node->{'_ancestor'} = undef; # ensure no circular references
391 $node->DESTROY();
392 $node = undef;
393 }
394 $self->{'_desc'} = {};
395 }
396 }
397
398 =head2 internal_id
399
400 Title : internal_id
401 Usage : my $internalid = $taxon->internal_id
402 Function: Returns the internal unique id for this taxon
403 (a monotonically increasing number for this in-memory implementation
404 but could be a database determined unique id in other
405 implementations)
406 Returns : unique id
407 Args : none
408
409 =cut
410
411 sub internal_id {
412 return $_[0]->_creation_id;
413 }
414
415
416 =head2 _creation_id
417
418 Title : _creation_id
419 Usage : $obj->_creation_id($newval)
420 Function: a private method signifying the internal creation order
421 Returns : value of _creation_id
422 Args : newvalue (optional)
423
424
425 =cut
426
427 sub _creation_id {
428 my ($self,$value) = @_;
429 if( defined $value) {
430 $self->{'_creation_id'} = $value;
431 }
432 return $self->{'_creation_id'} || 0;
433 }
434
435
436 # The following methods are implemented by NodeI decorated interface
437
438 =head2 is_Leaf
439
440 Title : is_Leaf
441 Usage : if( $node->is_Leaf )
442 Function: Get Leaf status
443 Returns : boolean
444 Args : none
445
446 =cut
447
448 sub is_Leaf {
449 my ($self) = @_;
450 my $rc = 0;
451 $rc = 1 if( ! defined $self->{'_desc'} ||
452 keys %{$self->{'_desc'}} == 0);
453 return $rc;
454 }
455
456 =head2 to_string
457
458 Title : to_string
459 Usage : my $str = $taxon->to_string()
460 Function: For debugging, provide a taxon as a string
461 Returns : string
462 Args : none
463
464 =cut
465
466 =head2 height
467
468 Title : height
469 Usage : my $len = $taxon->height
470 Function: Returns the height of the tree starting at this
471 taxon. Height is the maximum branchlength.
472 Returns : The longest length (weighting branches with branch_length) to a leaf
473 Args : none
474
475 =cut
476
477 sub height {
478 my ($self) = @_;
479
480 return $self->{'_height'} if( defined $self->{'_height'} );
481
482 if( $self->is_Leaf ) {
483 if( !defined $self->branch_length ) {
484 $self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' ));
485 return 0;
486 }
487 return $self->branch_length;
488 }
489 my $max = 0;
490 foreach my $subnode ( $self->each_Descendent ) {
491 my $s = $subnode->height;
492 if( $s > $max ) { $max = $s; }
493 }
494 return ($self->{'_height'} = $max + ($self->branch_length || 1));
495 }
496
497
498 =head2 invalidate_height
499
500 Title : invalidate_height
501 Usage : private helper method
502 Function: Invalidate our cached value of the taxon's height in the tree
503 Returns : nothing
504 Args : none
505
506 =cut
507
508
509 sub invalidate_height {
510 my ($self) = @_;
511
512 $self->{'_height'} = undef;
513 if( $self->ancestor ) {
514 $self->ancestor->invalidate_height;
515 }
516 }
517
518 =head2 classify
519
520 Title : classify
521 Usage : @obj->classify()
522 Function: a method to return the classification of a species
523 Returns : name of taxon and ancestor's taxon recursively
524 Args : boolean to specify whether we want all taxa not just ranked
525 levels
526
527
528 =cut
529
530 sub classify {
531 my ($self,$allnodes) = @_;
532
533 my @classification=($self->taxon);
534 my $node=$self;
535
536 while (defined $node->ancestor) {
537 push @classification, $node->ancestor->taxon if $allnodes==1;
538 $node=$node->ancestor;
539 }
540
541 return (@classification);
542 }
543
544
545 =head2 has_rank
546
547 Title : has_rank
548 Usage : $obj->has_rank($rank)
549 Function: a method to query ancestors' rank
550 Returns : boolean
551 Args : $rank
552
553
554 =cut
555
556 sub has_rank {
557 my ($self,$rank) = @_;
558
559 return $self if $self->rank eq $rank;
560
561 while (defined $self->ancestor) {
562 return $self if $self->ancestor->rank eq $rank;
563 $self=$self->ancestor;
564 }
565
566 return undef;
567 }
568
569
570 =head2 has_taxon
571
572 Title : has_taxon
573 Usage : $obj->has_taxon($taxon)
574 Function: a method to query ancestors' taxa
575 Returns : boolean
576 Args : Bio::Taxonomy::Taxon object
577
578
579 =cut
580
581 sub has_taxon {
582 my ($self,$taxon) = @_;
583
584 return $self if
585 ((defined $self->id && $self->id == $taxon->id) ||
586 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank));
587
588 while (defined $self->ancestor) {
589 return $self if
590 ((defined $self->id && $self->id == $taxon->id) ||
591 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) &&
592 ($self->taxon ne 'no rank'));
593 $self=$self->ancestor;
594 }
595
596 return undef;
597 }
598
599
600 =head2 distance_to_root
601
602 Title : distance_to_root
603 Usage : $obj->distance_to_root
604 Function: a method to query ancestors' taxa
605 Returns : number of links to root
606 Args :
607
608
609 =cut
610
611 sub distance_to_root {
612 my ($self,$taxon) = @_;
613
614 my $count=0;
615
616 while (defined $self->ancestor) {
617 $count++;
618 $self=$self->ancestor;
619 }
620
621 return $count;
622 }
623
624
625 =head2 recent_common_ancestor
626
627 Title : recent_common_ancestor
628 Usage : $obj->recent_common_ancestor($taxon)
629 Function: a method to query find common ancestors
630 Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank
631 Args : Bio::Taxonomy::Taxon
632
633
634 =cut
635
636 sub recent_common_ancestor {
637 my ($self,$node) = @_;
638
639 while (defined $node->ancestor) {
640 my $common=$self->has_taxon($node);
641 return $common if defined $common;
642 $node=$node->ancestor;
643 }
644
645 return undef;
646 }
647
648 =head2 species
649
650 Title : species
651 Usage : $obj=$taxon->species;
652 Function: Returns a Bio::Species object reflecting the taxon's tree position
653 Returns : a Bio::Species object
654 Args : none
655
656 =cut
657
658 sub species {
659 my ($self) = @_;
660 my $species;
661
662 if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') {
663 $species = Bio::Species->new(-classification => $self->ancestor->classify);
664 $species->genus($self->ancestor->ancestor->taxon);
665 $species->species($self->ancestor->taxon);
666 $species->sub_species($self->taxon);
667 } elsif ($self->has_rank('species')) {
668 $species = Bio::Species->new(-classification => $self->classify);
669 $species->genus($self->ancestor->taxon);
670 $species->species($self->taxon);
671 } else {
672 $self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n");
673 }
674 return $species;
675 }
676
677 1;