Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Taxonomy/Taxon.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Taxon.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $ | |
2 # | |
3 # BioPerl module for Bio::Taxonomy::Taxon | |
4 # | |
5 # Cared for by Dan Kortschak but pilfered extensively from | |
6 # the Bio::Tree::Node code of Jason Stajich | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 | |
10 # POD documentation - main docs before the code | |
11 | |
12 =head1 NAME | |
13 | |
14 Bio::Taxonomy::Taxon - Generic Taxonomic Entity object | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 use Bio::Taxonomy::Taxon; | |
19 my $taxonA = new Bio::Taxonomy::Taxon(); | |
20 my $taxonL = new Bio::Taxonomy::Taxon(); | |
21 my $taxonR = new Bio::Taxonomy::Taxon(); | |
22 | |
23 my $taxon = new Bio::Taxonomy::Taxon(); | |
24 $taxon->add_Descendents($nodeL); | |
25 $taxon->add_Descendents($nodeR); | |
26 | |
27 $species = $taxon->species; | |
28 | |
29 =head1 DESCRIPTION | |
30 | |
31 Makes a taxonomic unit suitable for use in a taxonomic tree | |
32 | |
33 =head1 CONTACT | |
34 | |
35 Dan Kortschak email B<kortschak@rsbs.anu.edu.au> | |
36 | |
37 =head1 APPENDIX | |
38 | |
39 The rest of the documentation details each of the object | |
40 methods. Internal methods are usually preceded with a _ | |
41 | |
42 =cut | |
43 | |
44 | |
45 # code begins... | |
46 | |
47 package Bio::Taxonomy::Taxon; | |
48 use vars qw(@ISA $CREATIONORDER); | |
49 use strict; | |
50 | |
51 # Object preamble - inherits from Bio::Root::Object, Bio::Tree::NodeI, Bio::Species and Bio::Taxonomy | |
52 use Bio::Root::Root; | |
53 use Bio::Tree::NodeI; | |
54 use Bio::Taxonomy; | |
55 use Bio::Species; | |
56 | |
57 # import rank information from Bio::Taxonomy.pm | |
58 use vars qw(@RANK %RANK); | |
59 | |
60 @ISA = qw(Bio::Root::Root Bio::Tree::NodeI); | |
61 | |
62 BEGIN { | |
63 $CREATIONORDER = 0; | |
64 } | |
65 | |
66 =head2 new | |
67 | |
68 Title : new | |
69 Usage : my $obj = new Bio::Taxonomy::Taxon(); | |
70 Function: Builds a new Bio::Taxonomy::Taxon object | |
71 Returns : Bio::Taxonomy::Taxon | |
72 Args : -descendents => array pointer to descendents (optional) | |
73 -branch_length => branch length [integer] (optional) | |
74 -taxon => taxon | |
75 -id => unique taxon id for node (from NCBI's list preferably) | |
76 -rank => the taxonomic level of the node (also from NCBI) | |
77 | |
78 =cut | |
79 | |
80 sub new { | |
81 my($class,@args) = @_; | |
82 | |
83 my $self = $class->SUPER::new(@args); | |
84 my ($children,$branchlen,$id,$taxon,$rank,$desc) = | |
85 | |
86 $self->_rearrange([qw(DESCENDENTS | |
87 BRANCH_LENGTH | |
88 ID | |
89 TAXON | |
90 RANK | |
91 DESC)], @args); | |
92 | |
93 $self->{'_desc'} = {}; | |
94 defined $desc && $self->description($desc); | |
95 defined $taxon && $self->taxon($taxon); | |
96 defined $id && $self->id($id); | |
97 defined $branchlen && $self->branch_length($branchlen); | |
98 defined $rank && $self->rank($rank); | |
99 | |
100 if( defined $children ) { | |
101 if( ref($children) !~ /ARRAY/i ) { | |
102 $self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents"); | |
103 } | |
104 foreach my $c ( @$children ) { | |
105 $self->add_Descendent($c); | |
106 } | |
107 } | |
108 $self->_creation_id($CREATIONORDER++); | |
109 return $self; | |
110 } | |
111 | |
112 =head2 add_Descendent | |
113 | |
114 Title : add_Descendent | |
115 Usage : $taxon->add_Descendant($taxon); | |
116 Function: Adds a descendent to a taxon | |
117 Returns : number of current descendents for this taxon | |
118 Args : Bio::Taxonomy::Taxon | |
119 boolean flag, true if you want to ignore the fact that you are | |
120 adding a second node with the same unique id (typically memory | |
121 location reference in this implementation). default is false and | |
122 will throw an error if you try and overwrite an existing node. | |
123 | |
124 | |
125 =cut | |
126 | |
127 sub add_Descendent{ | |
128 | |
129 my ($self,$node,$ignoreoverwrite) = @_; | |
130 | |
131 return -1 if( ! defined $node ) ; | |
132 if( ! $node->isa('Bio::Taxonomy::Taxon') ) { | |
133 $self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon"); | |
134 return -1; | |
135 } | |
136 # do we care about order? | |
137 $node->{'_ancestor'} = $self; | |
138 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) { | |
139 $self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future"); | |
140 } | |
141 | |
142 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate?? | |
143 | |
144 $self->invalidate_height(); | |
145 | |
146 return scalar keys %{$self->{'_desc'}}; | |
147 } | |
148 | |
149 | |
150 =head2 each_Descendent | |
151 | |
152 Title : each_Descendent($sortby) | |
153 Usage : my @taxa = $taxon->each_Descendent; | |
154 Function: all the descendents for this taxon (but not their descendents | |
155 i.e. not a recursive fetchall) | |
156 Returns : Array of Bio::Taxonomy::Taxon objects | |
157 Args : $sortby [optional] "height", "creation" or coderef to be used | |
158 to sort the order of children taxa. | |
159 | |
160 | |
161 =cut | |
162 | |
163 sub each_Descendent{ | |
164 my ($self, $sortby) = @_; | |
165 | |
166 # order can be based on branch length (and sub branchlength) | |
167 | |
168 $sortby ||= 'height'; | |
169 | |
170 if (ref $sortby eq 'CODE') { | |
171 return sort $sortby values %{$self->{'_desc'}}; | |
172 } else { | |
173 if ($sortby eq 'height') { | |
174 return map { $_->[0] } | |
175 sort { $a->[1] <=> $b->[1] || | |
176 $a->[2] <=> $b->[2] } | |
177 map { [$_, $_->height, $_->internal_id ] } | |
178 values %{$self->{'_desc'}}; | |
179 } else { | |
180 return map { $_->[0] } | |
181 sort { $a->[1] <=> $b->[1] } | |
182 map { [$_, $_->height ] } | |
183 values %{$self->{'_desc'}}; | |
184 } | |
185 } | |
186 } | |
187 | |
188 =head2 remove_Descendent | |
189 | |
190 Title : remove_Descendent | |
191 Usage : $taxon->remove_Descedent($taxon_foo); | |
192 Function: Removes a specific taxon from being a Descendent of this taxon | |
193 Returns : nothing | |
194 Args : An array of Bio::taxonomy::Taxon objects which have be previously | |
195 passed to the add_Descendent call of this object. | |
196 | |
197 =cut | |
198 | |
199 sub remove_Descendent{ | |
200 my ($self,@nodes) = @_; | |
201 foreach my $n ( @nodes ) { | |
202 if( $self->{'_desc'}->{$n->internal_id} ) { | |
203 $n->{'_ancestor'} = undef; | |
204 $self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef; | |
205 delete $self->{'_desc'}->{$n->internal_id}; | |
206 | |
207 } else { | |
208 $self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self)); | |
209 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n"); | |
210 } | |
211 } | |
212 1; | |
213 } | |
214 | |
215 | |
216 =head2 remove_all_Descendents | |
217 | |
218 Title : remove_all_Descendents | |
219 Usage : $taxon->remove_All_Descendents() | |
220 Function: Cleanup the taxon's reference to descendents and reset | |
221 their ancestor pointers to undef, if you don't have a reference | |
222 to these objects after this call they will be cleanedup - so | |
223 a get_nodes from the Tree object would be a safe thing to do first | |
224 Returns : nothing | |
225 Args : none | |
226 | |
227 | |
228 =cut | |
229 | |
230 sub remove_all_Descendents{ | |
231 my ($self) = @_; | |
232 # this won't cleanup the taxa themselves if you also have | |
233 # a copy/pointer of them (I think)... | |
234 while( my ($node,$val) = each %{ $self->{'_desc'} } ) { | |
235 $val->{'_ancestor'} = undef; | |
236 } | |
237 $self->{'_desc'} = {}; | |
238 1; | |
239 } | |
240 | |
241 =head2 get_Descendents | |
242 | |
243 Title : get_Descendents | |
244 Usage : my @taxa = $taxon->get_Descendents; | |
245 Function: Recursively fetch all the taxa and their descendents | |
246 *NOTE* This is different from each_Descendent | |
247 Returns : Array or Bio::Taxonomy::Taxon objects | |
248 Args : none | |
249 | |
250 =cut | |
251 | |
252 # implemented in the interface | |
253 | |
254 =head2 ancestor | |
255 | |
256 Title : ancestor | |
257 Usage : $taxon->ancestor($newval) | |
258 Function: Set the Ancestor | |
259 Returns : value of ancestor | |
260 Args : newvalue (optional) | |
261 | |
262 =cut | |
263 | |
264 sub ancestor { | |
265 my ($self, $value) = @_; | |
266 if (defined $value) { | |
267 $self->{'_ancestor'} = $value; | |
268 } | |
269 return $self->{'_ancestor'}; | |
270 } | |
271 | |
272 =head2 branch_length | |
273 | |
274 Title : branch_length | |
275 Usage : $obj->branch_length($newval) | |
276 Function: | |
277 Example : | |
278 Returns : value of branch_length | |
279 Args : newvalue (optional) | |
280 | |
281 | |
282 =cut | |
283 | |
284 sub branch_length { | |
285 my ($self,$value) = @_; | |
286 if( defined $value) { | |
287 $self->{'branch_length'} = $value; | |
288 } | |
289 return $self->{'branch_length'}; | |
290 } | |
291 | |
292 =head2 description | |
293 | |
294 Title : description | |
295 Usage : $obj->description($newval) | |
296 Function: | |
297 Example : | |
298 Returns : value of description | |
299 Args : newvalue (optional) | |
300 | |
301 | |
302 =cut | |
303 | |
304 sub description { | |
305 my ($self,$value) = @_; | |
306 if( defined $value ) { | |
307 $self->{'_desc'} = $value; | |
308 } | |
309 return $self->{'_desc'}; | |
310 } | |
311 | |
312 | |
313 =head2 rank | |
314 | |
315 Title : rank | |
316 Usage : $obj->rank($newval) | |
317 Function: Set the taxonomic rank | |
318 Example : | |
319 Returns : taxonomic rank of taxon | |
320 Args : newvalue (optional) | |
321 | |
322 | |
323 =cut | |
324 | |
325 sub rank { | |
326 my ($self,$value) = @_; | |
327 if (defined $value) { | |
328 my $ranks=join("|",@RANK); | |
329 if ($value=~/$ranks/) { | |
330 $self->{'_rank'} = $value; | |
331 } else { | |
332 $self->throw("Attempted to set unknown taxonomic rank: $value.\n"); | |
333 } | |
334 } | |
335 return $self->{'_rank'}; | |
336 } | |
337 | |
338 | |
339 =head2 taxon | |
340 | |
341 Title : taxon | |
342 Usage : $obj->taxon($newtaxon) | |
343 Function: Set the name of the taxon | |
344 Example : | |
345 Returns : name of taxon | |
346 Args : newtaxon (optional) | |
347 | |
348 | |
349 =cut | |
350 | |
351 # because internal taxa have names too... | |
352 sub taxon { | |
353 my ($self,$value) = @_; | |
354 if( defined $value ) { | |
355 $self->{'_taxon'} = $value; | |
356 } | |
357 return $self->{'_taxon'}; | |
358 } | |
359 | |
360 | |
361 =head2 id | |
362 | |
363 Title : id | |
364 Usage : $obj->id($newval) | |
365 Function: | |
366 Example : | |
367 Returns : value of id | |
368 Args : newvalue (optional) | |
369 | |
370 | |
371 =cut | |
372 | |
373 sub id { | |
374 my ($self,$value) = @_; | |
375 if( defined $value ) { | |
376 $self->{'_id'} = $value; | |
377 } | |
378 return $self->{'_id'}; | |
379 } | |
380 | |
381 | |
382 | |
383 sub DESTROY { | |
384 my ($self) = @_; | |
385 # try to insure that everything is cleaned up | |
386 $self->SUPER::DESTROY(); | |
387 if( defined $self->{'_desc'} && | |
388 ref($self->{'_desc'}) =~ /ARRAY/i ) { | |
389 while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) { | |
390 $node->{'_ancestor'} = undef; # ensure no circular references | |
391 $node->DESTROY(); | |
392 $node = undef; | |
393 } | |
394 $self->{'_desc'} = {}; | |
395 } | |
396 } | |
397 | |
398 =head2 internal_id | |
399 | |
400 Title : internal_id | |
401 Usage : my $internalid = $taxon->internal_id | |
402 Function: Returns the internal unique id for this taxon | |
403 (a monotonically increasing number for this in-memory implementation | |
404 but could be a database determined unique id in other | |
405 implementations) | |
406 Returns : unique id | |
407 Args : none | |
408 | |
409 =cut | |
410 | |
411 sub internal_id { | |
412 return $_[0]->_creation_id; | |
413 } | |
414 | |
415 | |
416 =head2 _creation_id | |
417 | |
418 Title : _creation_id | |
419 Usage : $obj->_creation_id($newval) | |
420 Function: a private method signifying the internal creation order | |
421 Returns : value of _creation_id | |
422 Args : newvalue (optional) | |
423 | |
424 | |
425 =cut | |
426 | |
427 sub _creation_id { | |
428 my ($self,$value) = @_; | |
429 if( defined $value) { | |
430 $self->{'_creation_id'} = $value; | |
431 } | |
432 return $self->{'_creation_id'} || 0; | |
433 } | |
434 | |
435 | |
436 # The following methods are implemented by NodeI decorated interface | |
437 | |
438 =head2 is_Leaf | |
439 | |
440 Title : is_Leaf | |
441 Usage : if( $node->is_Leaf ) | |
442 Function: Get Leaf status | |
443 Returns : boolean | |
444 Args : none | |
445 | |
446 =cut | |
447 | |
448 sub is_Leaf { | |
449 my ($self) = @_; | |
450 my $rc = 0; | |
451 $rc = 1 if( ! defined $self->{'_desc'} || | |
452 keys %{$self->{'_desc'}} == 0); | |
453 return $rc; | |
454 } | |
455 | |
456 =head2 to_string | |
457 | |
458 Title : to_string | |
459 Usage : my $str = $taxon->to_string() | |
460 Function: For debugging, provide a taxon as a string | |
461 Returns : string | |
462 Args : none | |
463 | |
464 =cut | |
465 | |
466 =head2 height | |
467 | |
468 Title : height | |
469 Usage : my $len = $taxon->height | |
470 Function: Returns the height of the tree starting at this | |
471 taxon. Height is the maximum branchlength. | |
472 Returns : The longest length (weighting branches with branch_length) to a leaf | |
473 Args : none | |
474 | |
475 =cut | |
476 | |
477 sub height { | |
478 my ($self) = @_; | |
479 | |
480 return $self->{'_height'} if( defined $self->{'_height'} ); | |
481 | |
482 if( $self->is_Leaf ) { | |
483 if( !defined $self->branch_length ) { | |
484 $self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' )); | |
485 return 0; | |
486 } | |
487 return $self->branch_length; | |
488 } | |
489 my $max = 0; | |
490 foreach my $subnode ( $self->each_Descendent ) { | |
491 my $s = $subnode->height; | |
492 if( $s > $max ) { $max = $s; } | |
493 } | |
494 return ($self->{'_height'} = $max + ($self->branch_length || 1)); | |
495 } | |
496 | |
497 | |
498 =head2 invalidate_height | |
499 | |
500 Title : invalidate_height | |
501 Usage : private helper method | |
502 Function: Invalidate our cached value of the taxon's height in the tree | |
503 Returns : nothing | |
504 Args : none | |
505 | |
506 =cut | |
507 | |
508 | |
509 sub invalidate_height { | |
510 my ($self) = @_; | |
511 | |
512 $self->{'_height'} = undef; | |
513 if( $self->ancestor ) { | |
514 $self->ancestor->invalidate_height; | |
515 } | |
516 } | |
517 | |
518 =head2 classify | |
519 | |
520 Title : classify | |
521 Usage : @obj->classify() | |
522 Function: a method to return the classification of a species | |
523 Returns : name of taxon and ancestor's taxon recursively | |
524 Args : boolean to specify whether we want all taxa not just ranked | |
525 levels | |
526 | |
527 | |
528 =cut | |
529 | |
530 sub classify { | |
531 my ($self,$allnodes) = @_; | |
532 | |
533 my @classification=($self->taxon); | |
534 my $node=$self; | |
535 | |
536 while (defined $node->ancestor) { | |
537 push @classification, $node->ancestor->taxon if $allnodes==1; | |
538 $node=$node->ancestor; | |
539 } | |
540 | |
541 return (@classification); | |
542 } | |
543 | |
544 | |
545 =head2 has_rank | |
546 | |
547 Title : has_rank | |
548 Usage : $obj->has_rank($rank) | |
549 Function: a method to query ancestors' rank | |
550 Returns : boolean | |
551 Args : $rank | |
552 | |
553 | |
554 =cut | |
555 | |
556 sub has_rank { | |
557 my ($self,$rank) = @_; | |
558 | |
559 return $self if $self->rank eq $rank; | |
560 | |
561 while (defined $self->ancestor) { | |
562 return $self if $self->ancestor->rank eq $rank; | |
563 $self=$self->ancestor; | |
564 } | |
565 | |
566 return undef; | |
567 } | |
568 | |
569 | |
570 =head2 has_taxon | |
571 | |
572 Title : has_taxon | |
573 Usage : $obj->has_taxon($taxon) | |
574 Function: a method to query ancestors' taxa | |
575 Returns : boolean | |
576 Args : Bio::Taxonomy::Taxon object | |
577 | |
578 | |
579 =cut | |
580 | |
581 sub has_taxon { | |
582 my ($self,$taxon) = @_; | |
583 | |
584 return $self if | |
585 ((defined $self->id && $self->id == $taxon->id) || | |
586 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank)); | |
587 | |
588 while (defined $self->ancestor) { | |
589 return $self if | |
590 ((defined $self->id && $self->id == $taxon->id) || | |
591 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) && | |
592 ($self->taxon ne 'no rank')); | |
593 $self=$self->ancestor; | |
594 } | |
595 | |
596 return undef; | |
597 } | |
598 | |
599 | |
600 =head2 distance_to_root | |
601 | |
602 Title : distance_to_root | |
603 Usage : $obj->distance_to_root | |
604 Function: a method to query ancestors' taxa | |
605 Returns : number of links to root | |
606 Args : | |
607 | |
608 | |
609 =cut | |
610 | |
611 sub distance_to_root { | |
612 my ($self,$taxon) = @_; | |
613 | |
614 my $count=0; | |
615 | |
616 while (defined $self->ancestor) { | |
617 $count++; | |
618 $self=$self->ancestor; | |
619 } | |
620 | |
621 return $count; | |
622 } | |
623 | |
624 | |
625 =head2 recent_common_ancestor | |
626 | |
627 Title : recent_common_ancestor | |
628 Usage : $obj->recent_common_ancestor($taxon) | |
629 Function: a method to query find common ancestors | |
630 Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank | |
631 Args : Bio::Taxonomy::Taxon | |
632 | |
633 | |
634 =cut | |
635 | |
636 sub recent_common_ancestor { | |
637 my ($self,$node) = @_; | |
638 | |
639 while (defined $node->ancestor) { | |
640 my $common=$self->has_taxon($node); | |
641 return $common if defined $common; | |
642 $node=$node->ancestor; | |
643 } | |
644 | |
645 return undef; | |
646 } | |
647 | |
648 =head2 species | |
649 | |
650 Title : species | |
651 Usage : $obj=$taxon->species; | |
652 Function: Returns a Bio::Species object reflecting the taxon's tree position | |
653 Returns : a Bio::Species object | |
654 Args : none | |
655 | |
656 =cut | |
657 | |
658 sub species { | |
659 my ($self) = @_; | |
660 my $species; | |
661 | |
662 if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') { | |
663 $species = Bio::Species->new(-classification => $self->ancestor->classify); | |
664 $species->genus($self->ancestor->ancestor->taxon); | |
665 $species->species($self->ancestor->taxon); | |
666 $species->sub_species($self->taxon); | |
667 } elsif ($self->has_rank('species')) { | |
668 $species = Bio::Species->new(-classification => $self->classify); | |
669 $species->genus($self->ancestor->taxon); | |
670 $species->species($self->taxon); | |
671 } else { | |
672 $self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n"); | |
673 } | |
674 return $species; | |
675 } | |
676 | |
677 1; |