Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/Taxonomy/Tree.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Taxonomy/Tree.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,450 @@ +# $Id: Tree.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $ +# +# BioPerl module for Bio::Taxonomy::Tree +# +# Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface. + +=head1 SYNOPSIS + + # like from a TreeIO + my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd'); + my $tree = $treeio->next_tree; + my @nodes = $tree->get_nodes; + my $root = $tree->get_root_node; + my @leaves = $tree->get_leaves; + + +=head1 DESCRIPTION + +This object holds handles to Taxonomic Nodes which make up a tree. + +=head1 EXAMPLES + + use Bio::Species; + use Bio::Taxonomy::Tree; + + my $human=new Bio::Species; + my $chimp=new Bio::Species; + my $bonobo=new Bio::Species; + + $human->classification(qw( sapiens Homo Hominidae + Catarrhini Primates Eutheria + Mammalia Euteleostomi Vertebrata + Craniata Chordata + Metazoa Eukaryota )); + $chimp->classification(qw( troglodytes Pan Hominidae + Catarrhini Primates Eutheria + Mammalia Euteleostomi Vertebrata + Craniata Chordata + Metazoa Eukaryota )); + $bonobo->classification(qw( paniscus Pan Hominidae + Catarrhini Primates Eutheria + Mammalia Euteleostomi Vertebrata + Craniata Chordata + Metazoa Eukaryota )); + + # ranks passed to $taxonomy match ranks of species + my @ranks = ('superkingdom','kingdom','phylum','subphylum', + 'no rank 1','no rank 2','class','no rank 3','order', + 'suborder','family','genus','species'); + + my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks, + -method => 'trust', + -order => -1); + + my @nodes; + + my $tree1=new Bio::Taxonomy::Tree; + my $tree2=new Bio::Taxonomy::Tree; + + push @nodes,$tree1->make_species_branch($human,$taxonomy); + push @nodes,$tree2->make_species_branch($chimp,$taxonomy); + + my ($homo_sapiens)=$tree1->get_leaves; + + $tree1->splice($tree2); + + push @nodes,$tree1->add_species($bonobo,$taxonomy); + + my @taxa; + foreach my $leaf ($tree1->get_leaves) { + push @taxa,$leaf->taxon; + } + print join(", ",@taxa)."\n"; + + @taxa=(); + $tree1->remove_branch($homo_sapiens); + foreach my $leaf ($tree1->get_leaves) { + push @taxa,$leaf->taxon; + } + print join(", ",@taxa)."\n"; + +=head1 FEEDBACK + +See AUTHOR + +=head1 AUTHOR - Dan Kortschak + +Email kortschak@rsbs.anu.edu.au + +=head1 CONTRIBUTORS + +Mainly Jason Stajich + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +# Code begins... + + +package Bio::Taxonomy::Tree; +use vars qw(@ISA); +use strict; + +# Object preamble - inherits from Bio::Root::Root + +use Bio::Root::Root; +use Bio::Tree::TreeFunctionsI; +use Bio::Tree::TreeI; +use Bio::Taxonomy::Taxon; + +# Import rank information from Bio::Taxonomy.pm +use vars qw(@RANK %RANK); + +@ISA = qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI); + +=head2 new + + Title : new + Usage : my $obj = new Bio::Taxonomy::Tree(); + Function: Builds a new Bio::Taxonomy::Tree object + Returns : Bio::Taxonomy::Tree + Args : + + +=cut + +sub new { + my($class,@args) = @_; + + my $self = $class->SUPER::new(@args); + $self->{'_rootnode'} = undef; + $self->{'_maxbranchlen'} = 0; + + my ($root)= $self->_rearrange([qw(ROOT)], @args); + if( $root ) { $self->set_root_node($root); } + return $self; +} + + +=head2 get_nodes + + Title : get_nodes + Usage : my @nodes = $tree->get_nodes() + Function: Return list of Bio::Taxonomy::Taxon objects + Returns : array of Bio::Taxonomy::Taxon objects + Args : (named values) hash with one value + order => 'b|breadth' first order or 'd|depth' first order + +=cut + +sub get_nodes{ + my ($self, @args) = @_; + + my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args); + $order ||= 'depth'; + $sortby ||= 'height'; + + if ($order =~ m/^b|(breadth)$/oi) { + my $node = $self->get_root_node; + my @children = ($node); + for (@children) { + push @children, $_->each_Descendent($sortby); + } + return @children; + } + + if ($order =~ m/^d|(depth)$/oi) { + # this is depth-first search I believe + my $node = $self->get_root_node; + my @children = ($node,$node->get_Descendents($sortby)); + return @children; + } +} + +=head2 get_root_node + + Title : get_root_node + Usage : my $node = $tree->get_root_node(); + Function: Get the Top Node in the tree, in this implementation + Trees only have one top node. + Returns : Bio::Taxonomy::Taxon object + Args : none + +=cut + + +sub get_root_node{ + my ($self) = @_; + return $self->{'_rootnode'}; +} + +=head2 set_root_node + + Title : set_root_node + Usage : $tree->set_root_node($node) + Function: Set the Root Node for the Tree + Returns : Bio::Taxonomy::Taxon + Args : Bio::Taxonomy::Taxon + +=cut + + +sub set_root_node{ + my ($self,$value) = @_; + if( defined $value ) { + if( ! $value->isa('Bio::Taxonomy::Taxon') ) { + $self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon"); + return $self->get_root_node; + } + $self->{'_rootnode'} = $value; + } + return $self->get_root_node; +} + + +=head2 get_leaves + + Title : get_leaves + Usage : my @nodes = $tree->get_leaves() + Function: Return list of Bio::Taxonomy::Taxon objects + Returns : array of Bio::Taxonomy::Taxon objects + Args : + +=cut + + +sub get_leaves{ + my ($self) = @_; + + my $node = $self->get_root_node; + my @leaves; + my @children = ($node); + for (@children) { + push @children, $_->each_Descendent(); + } + for (@children) { + push @leaves, $_ if $_->is_Leaf; + } + return @leaves; +} + +=head2 make_species_branch + + Title : make_species_branch + Usage : @nodes = $tree->make_species_branch($species,$taxonomy) + Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object + Returns : array of Bio::Taxonomy::Taxon objects + Args : Bio::Species and Bio::Taxonomy objects + +=cut + +# I'm not happy that make_species_branch and make_branch are seperate routines +# should be able to just make_branch and have it sort things out + +sub make_species_branch{ + my ($self,$species,$taxonomy) = @_; + + if (! $species->isa('Bio::Species') ) { + $self->throw("Trying to classify $species which is not a Bio::Species object"); + } + if (! $taxonomy->isa('Bio::Taxonomy') ) { + $self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object"); + } + + # this is done to make sure we aren't duplicating a path (let God sort them out) + if (defined $self->get_root_node) { + $self->get_root_node->remove_all_Descendents; + } + + my @nodes; + + # nb taxa in [i][0] and ranks in [i][1] + my @taxa=$taxonomy->classify($species); + + for (my $i = 0; $i < @taxa; $i++) { + $nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0], + -rank => $taxa[$i][1]); + } + + for (my $i = 0; $i < @taxa-1; $i++) { + $nodes[$i]->add_Descendent($nodes[$i+1]); + } + + $self->set_root_node($nodes[0]); + + return @nodes; +} + + +=head2 make_branch + + Title : make_branch + Usage : $tree->make_branch($node) + Function: Make a linear Bio::Taxonomy::Tree object from a leafish node + Returns : + Args : Bio::Taxonomy::Taxon object + +=cut + + +sub make_branch{ + my ($self,$node) = @_; + + # this is done to make sure we aren't duplicating a path (let God sort them out) + # note that if you are using a linked set of node which include node + # already in the tree, this will break + $self->get_root_node->remove_all_Descendents; + + while (defined $node->ancestor) { + $self->set_root_node($node); + $node=$node->ancestor; + } +} + + +=head2 splice + + Title : splice + Usage : @nodes = $tree->splice($tree) + Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two + Returns : array of Bio::Taxonomy::Taxon added to tree + Args : Bio::Taxonomy::Tree object + +=cut + + +sub splice{ + my ($self,$tree) = @_; + + my @nodes; + + my @newleaves = $tree->get_leaves; + foreach my $leaf (@newleaves) { + push @nodes,$self->add_branch($leaf); + } + + return @nodes; +} + +=head2 add_species + + Title : add_species + Usage : @nodes = $tree->add_species($species,$taxonomy) + Function: Return a of Bio::Taxonomy::Tree object with a new species added + Returns : array of Bio::Taxonomy::Taxon added to tree + Args : Bio::Species object + +=cut + + +sub add_species{ + my ($self,$species,$taxonomy) = @_; + + my $branch=Bio::Taxonomy::Tree->new; + my @nodes=$branch->make_species_branch($species,$taxonomy); + + my ($newleaf)=$branch->get_leaves; + + return $self->add_branch($newleaf); +} + +=head2 add_branch + + Title : add_branch + Usage : $tree->add_branch($node,boolean) + Function: Return a of Bio::Taxonomy::Tree object with a new branch added + Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree + Args : Bio::Taxonomy::Taxon object + boolean flag to force overwrite of descendent + (see Bio::Node->add_Descendent) + +=cut + + +sub add_branch { + my ($self,$node,$force) = @_; + + my $best_node_level=0; + my ($best_node,@nodes,$common); + + my @leaves=$self->get_leaves; + foreach my $leaf (@leaves) { + $common=$node->recent_common_ancestor($leaf); # the root of the part to add + if (defined $common && ($common->distance_to_root > $best_node_level)) { + $best_node_level = $common->distance_to_root; + $best_node = $common; + } + } + + return unless defined $best_node; + + push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents); + foreach my $node (@nodes) { + if ((defined $best_node->id && $best_node->id == $node->id) || + ($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) && + ($best_node->rank ne 'no rank')) { + foreach my $descendent ($common->each_Descendent) { + $node->add_Descendent($descendent,$force); + } + } + + $self->set_root_node($node) if $node->distance_to_root==0; + } + + return ($common->get_Descendents); +} + +=head2 remove_branch + + Title : remove_branch + Usage : $tree->remove_branch($node) + Function: remove a branch up to the next multifurcation + Returns : + Args : Bio::Taxonomy::Taxon object + +=cut + + +sub remove_branch{ + my ($self,$node) = @_; + + # we can define a branch at any point along it + + while (defined $node->ancestor) { + last if $node->ancestor->each_Descendent > 1; + $node=$node->ancestor; + } + $node->remove_all_Descendents; # I'm not sure if this is necessary, + # but I don't see that remove_Descendent + # has the side effect of deleting + # descendent nodes of the deletee + $node->ancestor->remove_Descendent($node); +} + + + +1;