Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Taxonomy/Taxon.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Taxonomy/Taxon.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,677 @@ +# $Id: Taxon.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $ +# +# BioPerl module for Bio::Taxonomy::Taxon +# +# Cared for by Dan Kortschak but pilfered extensively from +# the Bio::Tree::Node code of Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Taxonomy::Taxon - Generic Taxonomic Entity object + +=head1 SYNOPSIS + + use Bio::Taxonomy::Taxon; + my $taxonA = new Bio::Taxonomy::Taxon(); + my $taxonL = new Bio::Taxonomy::Taxon(); + my $taxonR = new Bio::Taxonomy::Taxon(); + + my $taxon = new Bio::Taxonomy::Taxon(); + $taxon->add_Descendents($nodeL); + $taxon->add_Descendents($nodeR); + + $species = $taxon->species; + +=head1 DESCRIPTION + +Makes a taxonomic unit suitable for use in a taxonomic tree + +=head1 CONTACT + +Dan Kortschak email B<kortschak@rsbs.anu.edu.au> + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + + +# code begins... + +package Bio::Taxonomy::Taxon; +use vars qw(@ISA $CREATIONORDER); +use strict; + +# Object preamble - inherits from Bio::Root::Object, Bio::Tree::NodeI, Bio::Species and Bio::Taxonomy +use Bio::Root::Root; +use Bio::Tree::NodeI; +use Bio::Taxonomy; +use Bio::Species; + +# import rank information from Bio::Taxonomy.pm +use vars qw(@RANK %RANK); + +@ISA = qw(Bio::Root::Root Bio::Tree::NodeI); + +BEGIN { + $CREATIONORDER = 0; +} + +=head2 new + + Title : new + Usage : my $obj = new Bio::Taxonomy::Taxon(); + Function: Builds a new Bio::Taxonomy::Taxon object + Returns : Bio::Taxonomy::Taxon + Args : -descendents => array pointer to descendents (optional) + -branch_length => branch length [integer] (optional) + -taxon => taxon + -id => unique taxon id for node (from NCBI's list preferably) + -rank => the taxonomic level of the node (also from NCBI) + +=cut + +sub new { + my($class,@args) = @_; + + my $self = $class->SUPER::new(@args); + my ($children,$branchlen,$id,$taxon,$rank,$desc) = + + $self->_rearrange([qw(DESCENDENTS + BRANCH_LENGTH + ID + TAXON + RANK + DESC)], @args); + + $self->{'_desc'} = {}; + defined $desc && $self->description($desc); + defined $taxon && $self->taxon($taxon); + defined $id && $self->id($id); + defined $branchlen && $self->branch_length($branchlen); + defined $rank && $self->rank($rank); + + if( defined $children ) { + if( ref($children) !~ /ARRAY/i ) { + $self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents"); + } + foreach my $c ( @$children ) { + $self->add_Descendent($c); + } + } + $self->_creation_id($CREATIONORDER++); + return $self; +} + +=head2 add_Descendent + + Title : add_Descendent + Usage : $taxon->add_Descendant($taxon); + Function: Adds a descendent to a taxon + Returns : number of current descendents for this taxon + Args : Bio::Taxonomy::Taxon + boolean flag, true if you want to ignore the fact that you are + adding a second node with the same unique id (typically memory + location reference in this implementation). default is false and + will throw an error if you try and overwrite an existing node. + + +=cut + +sub add_Descendent{ + + my ($self,$node,$ignoreoverwrite) = @_; + + return -1 if( ! defined $node ) ; + if( ! $node->isa('Bio::Taxonomy::Taxon') ) { + $self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon"); + return -1; + } + # do we care about order? + $node->{'_ancestor'} = $self; + if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) { + $self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future"); + } + + $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate?? + + $self->invalidate_height(); + + return scalar keys %{$self->{'_desc'}}; +} + + +=head2 each_Descendent + + Title : each_Descendent($sortby) + Usage : my @taxa = $taxon->each_Descendent; + Function: all the descendents for this taxon (but not their descendents + i.e. not a recursive fetchall) + Returns : Array of Bio::Taxonomy::Taxon objects + Args : $sortby [optional] "height", "creation" or coderef to be used + to sort the order of children taxa. + + +=cut + +sub each_Descendent{ + my ($self, $sortby) = @_; + + # order can be based on branch length (and sub branchlength) + + $sortby ||= 'height'; + + if (ref $sortby eq 'CODE') { + return sort $sortby values %{$self->{'_desc'}}; + } else { + if ($sortby eq 'height') { + return map { $_->[0] } + sort { $a->[1] <=> $b->[1] || + $a->[2] <=> $b->[2] } + map { [$_, $_->height, $_->internal_id ] } + values %{$self->{'_desc'}}; + } else { + return map { $_->[0] } + sort { $a->[1] <=> $b->[1] } + map { [$_, $_->height ] } + values %{$self->{'_desc'}}; + } + } +} + +=head2 remove_Descendent + + Title : remove_Descendent + Usage : $taxon->remove_Descedent($taxon_foo); + Function: Removes a specific taxon from being a Descendent of this taxon + Returns : nothing + Args : An array of Bio::taxonomy::Taxon objects which have be previously + passed to the add_Descendent call of this object. + +=cut + +sub remove_Descendent{ + my ($self,@nodes) = @_; + foreach my $n ( @nodes ) { + if( $self->{'_desc'}->{$n->internal_id} ) { + $n->{'_ancestor'} = undef; + $self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef; + delete $self->{'_desc'}->{$n->internal_id}; + + } else { + $self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self)); + $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n"); + } + } + 1; +} + + +=head2 remove_all_Descendents + + Title : remove_all_Descendents + Usage : $taxon->remove_All_Descendents() + Function: Cleanup the taxon's reference to descendents and reset + their ancestor pointers to undef, if you don't have a reference + to these objects after this call they will be cleanedup - so + a get_nodes from the Tree object would be a safe thing to do first + Returns : nothing + Args : none + + +=cut + +sub remove_all_Descendents{ + my ($self) = @_; + # this won't cleanup the taxa themselves if you also have + # a copy/pointer of them (I think)... + while( my ($node,$val) = each %{ $self->{'_desc'} } ) { + $val->{'_ancestor'} = undef; + } + $self->{'_desc'} = {}; + 1; +} + +=head2 get_Descendents + + Title : get_Descendents + Usage : my @taxa = $taxon->get_Descendents; + Function: Recursively fetch all the taxa and their descendents + *NOTE* This is different from each_Descendent + Returns : Array or Bio::Taxonomy::Taxon objects + Args : none + +=cut + +# implemented in the interface + +=head2 ancestor + + Title : ancestor + Usage : $taxon->ancestor($newval) + Function: Set the Ancestor + Returns : value of ancestor + Args : newvalue (optional) + +=cut + +sub ancestor { + my ($self, $value) = @_; + if (defined $value) { + $self->{'_ancestor'} = $value; + } + return $self->{'_ancestor'}; +} + +=head2 branch_length + + Title : branch_length + Usage : $obj->branch_length($newval) + Function: + Example : + Returns : value of branch_length + Args : newvalue (optional) + + +=cut + +sub branch_length { + my ($self,$value) = @_; + if( defined $value) { + $self->{'branch_length'} = $value; + } + return $self->{'branch_length'}; +} + +=head2 description + + Title : description + Usage : $obj->description($newval) + Function: + Example : + Returns : value of description + Args : newvalue (optional) + + +=cut + +sub description { + my ($self,$value) = @_; + if( defined $value ) { + $self->{'_desc'} = $value; + } + return $self->{'_desc'}; +} + + +=head2 rank + + Title : rank + Usage : $obj->rank($newval) + Function: Set the taxonomic rank + Example : + Returns : taxonomic rank of taxon + Args : newvalue (optional) + + +=cut + +sub rank { + my ($self,$value) = @_; + if (defined $value) { + my $ranks=join("|",@RANK); + if ($value=~/$ranks/) { + $self->{'_rank'} = $value; + } else { + $self->throw("Attempted to set unknown taxonomic rank: $value.\n"); + } + } + return $self->{'_rank'}; +} + + +=head2 taxon + + Title : taxon + Usage : $obj->taxon($newtaxon) + Function: Set the name of the taxon + Example : + Returns : name of taxon + Args : newtaxon (optional) + + +=cut + +# because internal taxa have names too... +sub taxon { + my ($self,$value) = @_; + if( defined $value ) { + $self->{'_taxon'} = $value; + } + return $self->{'_taxon'}; +} + + +=head2 id + + Title : id + Usage : $obj->id($newval) + Function: + Example : + Returns : value of id + Args : newvalue (optional) + + +=cut + +sub id { + my ($self,$value) = @_; + if( defined $value ) { + $self->{'_id'} = $value; + } + return $self->{'_id'}; +} + + + +sub DESTROY { + my ($self) = @_; + # try to insure that everything is cleaned up + $self->SUPER::DESTROY(); + if( defined $self->{'_desc'} && + ref($self->{'_desc'}) =~ /ARRAY/i ) { + while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) { + $node->{'_ancestor'} = undef; # ensure no circular references + $node->DESTROY(); + $node = undef; + } + $self->{'_desc'} = {}; + } +} + +=head2 internal_id + + Title : internal_id + Usage : my $internalid = $taxon->internal_id + Function: Returns the internal unique id for this taxon + (a monotonically increasing number for this in-memory implementation + but could be a database determined unique id in other + implementations) + Returns : unique id + Args : none + +=cut + +sub internal_id { + return $_[0]->_creation_id; +} + + +=head2 _creation_id + + Title : _creation_id + Usage : $obj->_creation_id($newval) + Function: a private method signifying the internal creation order + Returns : value of _creation_id + Args : newvalue (optional) + + +=cut + +sub _creation_id { + my ($self,$value) = @_; + if( defined $value) { + $self->{'_creation_id'} = $value; + } + return $self->{'_creation_id'} || 0; +} + + +# The following methods are implemented by NodeI decorated interface + +=head2 is_Leaf + + Title : is_Leaf + Usage : if( $node->is_Leaf ) + Function: Get Leaf status + Returns : boolean + Args : none + +=cut + +sub is_Leaf { + my ($self) = @_; + my $rc = 0; + $rc = 1 if( ! defined $self->{'_desc'} || + keys %{$self->{'_desc'}} == 0); + return $rc; +} + +=head2 to_string + + Title : to_string + Usage : my $str = $taxon->to_string() + Function: For debugging, provide a taxon as a string + Returns : string + Args : none + +=cut + +=head2 height + + Title : height + Usage : my $len = $taxon->height + Function: Returns the height of the tree starting at this + taxon. Height is the maximum branchlength. + Returns : The longest length (weighting branches with branch_length) to a leaf + Args : none + +=cut + +sub height { + my ($self) = @_; + + return $self->{'_height'} if( defined $self->{'_height'} ); + + if( $self->is_Leaf ) { + if( !defined $self->branch_length ) { + $self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' )); + return 0; + } + return $self->branch_length; + } + my $max = 0; + foreach my $subnode ( $self->each_Descendent ) { + my $s = $subnode->height; + if( $s > $max ) { $max = $s; } + } + return ($self->{'_height'} = $max + ($self->branch_length || 1)); +} + + +=head2 invalidate_height + + Title : invalidate_height + Usage : private helper method + Function: Invalidate our cached value of the taxon's height in the tree + Returns : nothing + Args : none + +=cut + + +sub invalidate_height { + my ($self) = @_; + + $self->{'_height'} = undef; + if( $self->ancestor ) { + $self->ancestor->invalidate_height; + } +} + +=head2 classify + + Title : classify + Usage : @obj->classify() + Function: a method to return the classification of a species + Returns : name of taxon and ancestor's taxon recursively + Args : boolean to specify whether we want all taxa not just ranked +levels + + +=cut + +sub classify { + my ($self,$allnodes) = @_; + + my @classification=($self->taxon); + my $node=$self; + + while (defined $node->ancestor) { + push @classification, $node->ancestor->taxon if $allnodes==1; + $node=$node->ancestor; + } + + return (@classification); +} + + +=head2 has_rank + + Title : has_rank + Usage : $obj->has_rank($rank) + Function: a method to query ancestors' rank + Returns : boolean + Args : $rank + + +=cut + +sub has_rank { + my ($self,$rank) = @_; + + return $self if $self->rank eq $rank; + + while (defined $self->ancestor) { + return $self if $self->ancestor->rank eq $rank; + $self=$self->ancestor; + } + + return undef; +} + + +=head2 has_taxon + + Title : has_taxon + Usage : $obj->has_taxon($taxon) + Function: a method to query ancestors' taxa + Returns : boolean + Args : Bio::Taxonomy::Taxon object + + +=cut + +sub has_taxon { + my ($self,$taxon) = @_; + + return $self if + ((defined $self->id && $self->id == $taxon->id) || + ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank)); + + while (defined $self->ancestor) { + return $self if + ((defined $self->id && $self->id == $taxon->id) || + ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) && + ($self->taxon ne 'no rank')); + $self=$self->ancestor; + } + + return undef; +} + + +=head2 distance_to_root + + Title : distance_to_root + Usage : $obj->distance_to_root + Function: a method to query ancestors' taxa + Returns : number of links to root + Args : + + +=cut + +sub distance_to_root { + my ($self,$taxon) = @_; + + my $count=0; + + while (defined $self->ancestor) { + $count++; + $self=$self->ancestor; + } + + return $count; +} + + +=head2 recent_common_ancestor + + Title : recent_common_ancestor + Usage : $obj->recent_common_ancestor($taxon) + Function: a method to query find common ancestors + Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank + Args : Bio::Taxonomy::Taxon + + +=cut + +sub recent_common_ancestor { + my ($self,$node) = @_; + + while (defined $node->ancestor) { + my $common=$self->has_taxon($node); + return $common if defined $common; + $node=$node->ancestor; + } + + return undef; +} + +=head2 species + + Title : species + Usage : $obj=$taxon->species; + Function: Returns a Bio::Species object reflecting the taxon's tree position + Returns : a Bio::Species object + Args : none + +=cut + +sub species { + my ($self) = @_; + my $species; + + if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') { + $species = Bio::Species->new(-classification => $self->ancestor->classify); + $species->genus($self->ancestor->ancestor->taxon); + $species->species($self->ancestor->taxon); + $species->sub_species($self->taxon); + } elsif ($self->has_rank('species')) { + $species = Bio::Species->new(-classification => $self->classify); + $species->genus($self->ancestor->taxon); + $species->species($self->taxon); + } else { + $self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n"); + } + return $species; +} + +1;