diff variant_effect_predictor/Bio/Taxonomy/Tree.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Taxonomy/Tree.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,450 @@
+# $Id: Tree.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $
+#
+# BioPerl module for Bio::Taxonomy::Tree
+#
+# Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface.
+
+=head1 SYNOPSIS
+
+    # like from a TreeIO
+    my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd');
+    my $tree = $treeio->next_tree;
+    my @nodes = $tree->get_nodes;
+    my $root = $tree->get_root_node;
+    my @leaves = $tree->get_leaves;
+
+
+=head1 DESCRIPTION
+
+This object holds handles to Taxonomic Nodes which make up a tree.
+
+=head1 EXAMPLES
+
+              use Bio::Species;
+              use Bio::Taxonomy::Tree;
+
+              my $human=new Bio::Species;
+              my $chimp=new Bio::Species;
+              my $bonobo=new Bio::Species;
+
+              $human->classification(qw( sapiens Homo Hominidae
+                                         Catarrhini Primates Eutheria
+                                         Mammalia Euteleostomi Vertebrata 
+                                         Craniata Chordata
+                                         Metazoa Eukaryota ));
+              $chimp->classification(qw( troglodytes Pan Hominidae
+                                         Catarrhini Primates Eutheria
+                                         Mammalia Euteleostomi Vertebrata 
+                                         Craniata Chordata
+                                         Metazoa Eukaryota ));
+              $bonobo->classification(qw( paniscus Pan Hominidae
+                                          Catarrhini Primates Eutheria
+                                          Mammalia Euteleostomi Vertebrata 
+                                          Craniata Chordata
+                                          Metazoa Eukaryota ));
+
+              # ranks passed to $taxonomy match ranks of species
+              my @ranks = ('superkingdom','kingdom','phylum','subphylum',
+                           'no rank 1','no rank 2','class','no rank 3','order',
+                           'suborder','family','genus','species');
+
+              my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks,
+                                             -method => 'trust',
+                                             -order => -1);
+
+              my @nodes;
+
+              my $tree1=new Bio::Taxonomy::Tree;
+              my $tree2=new Bio::Taxonomy::Tree;
+
+              push @nodes,$tree1->make_species_branch($human,$taxonomy);
+              push @nodes,$tree2->make_species_branch($chimp,$taxonomy);
+
+              my ($homo_sapiens)=$tree1->get_leaves;
+
+              $tree1->splice($tree2);
+
+              push @nodes,$tree1->add_species($bonobo,$taxonomy);
+
+              my @taxa;
+              foreach my $leaf ($tree1->get_leaves) {
+                 push @taxa,$leaf->taxon;
+              }
+              print join(", ",@taxa)."\n";
+
+              @taxa=();
+              $tree1->remove_branch($homo_sapiens);
+              foreach my $leaf ($tree1->get_leaves) {
+                 push @taxa,$leaf->taxon;
+              }
+              print join(", ",@taxa)."\n";
+
+=head1 FEEDBACK
+
+See AUTHOR
+
+=head1 AUTHOR - Dan Kortschak
+
+Email kortschak@rsbs.anu.edu.au
+
+=head1 CONTRIBUTORS
+
+Mainly Jason Stajich
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Code begins...
+
+
+package Bio::Taxonomy::Tree;
+use vars qw(@ISA);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+use Bio::Tree::TreeFunctionsI;
+use Bio::Tree::TreeI;
+use Bio::Taxonomy::Taxon;
+
+# Import rank information from Bio::Taxonomy.pm
+use vars qw(@RANK %RANK);
+
+@ISA = qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Taxonomy::Tree();
+ Function: Builds a new Bio::Taxonomy::Tree object 
+ Returns : Bio::Taxonomy::Tree
+ Args    : 
+
+
+=cut
+
+sub new {
+  my($class,@args) = @_;
+  
+  my $self = $class->SUPER::new(@args);
+  $self->{'_rootnode'} = undef;
+  $self->{'_maxbranchlen'} = 0;
+
+  my ($root)= $self->_rearrange([qw(ROOT)], @args);
+  if( $root ) { $self->set_root_node($root); }
+  return $self;
+}
+
+
+=head2 get_nodes
+
+ Title   : get_nodes
+ Usage   : my @nodes = $tree->get_nodes()
+ Function: Return list of Bio::Taxonomy::Taxon objects
+ Returns : array of Bio::Taxonomy::Taxon objects
+ Args    : (named values) hash with one value 
+           order => 'b|breadth' first order or 'd|depth' first order
+
+=cut
+
+sub get_nodes{
+   my ($self, @args) = @_;
+   
+   my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args);
+   $order ||= 'depth';
+   $sortby ||= 'height';
+
+   if ($order =~ m/^b|(breadth)$/oi) {
+      my $node = $self->get_root_node;
+      my @children = ($node);
+      for (@children) {
+	   push @children, $_->each_Descendent($sortby);
+      }
+      return @children;
+   }
+
+   if ($order =~ m/^d|(depth)$/oi) {
+       # this is depth-first search I believe
+       my $node = $self->get_root_node;
+       my @children = ($node,$node->get_Descendents($sortby));
+       return @children;
+   }
+}
+
+=head2 get_root_node
+
+ Title   : get_root_node
+ Usage   : my $node = $tree->get_root_node();
+ Function: Get the Top Node in the tree, in this implementation
+           Trees only have one top node.
+ Returns : Bio::Taxonomy::Taxon object
+ Args    : none
+
+=cut
+
+
+sub get_root_node{
+   my ($self) = @_;
+   return $self->{'_rootnode'};
+}
+
+=head2 set_root_node
+
+ Title   : set_root_node
+ Usage   : $tree->set_root_node($node)
+ Function: Set the Root Node for the Tree
+ Returns : Bio::Taxonomy::Taxon
+ Args    : Bio::Taxonomy::Taxon
+
+=cut
+
+
+sub set_root_node{
+   my ($self,$value) = @_;
+   if( defined $value ) { 
+      if( ! $value->isa('Bio::Taxonomy::Taxon') ) { 
+	   $self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon");
+	   return $self->get_root_node;
+      }
+      $self->{'_rootnode'} = $value;
+   }
+   return $self->get_root_node;
+}
+
+
+=head2 get_leaves
+
+ Title   : get_leaves
+ Usage   : my @nodes = $tree->get_leaves()
+ Function: Return list of Bio::Taxonomy::Taxon objects
+ Returns : array of Bio::Taxonomy::Taxon objects
+ Args    : 
+
+=cut
+
+
+sub get_leaves{
+   my ($self) = @_;
+   
+   my $node = $self->get_root_node;
+   my @leaves;
+   my @children = ($node);
+   for (@children) {
+      push @children, $_->each_Descendent();
+   }
+   for (@children) {
+      push @leaves, $_ if $_->is_Leaf;
+   }
+   return @leaves;
+}
+
+=head2 make_species_branch
+
+ Title   : make_species_branch
+ Usage   : @nodes = $tree->make_species_branch($species,$taxonomy)
+ Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object
+ Returns : array of Bio::Taxonomy::Taxon objects
+ Args    : Bio::Species and Bio::Taxonomy objects
+
+=cut
+
+# I'm not happy that make_species_branch and make_branch are seperate routines
+# should be able to just make_branch and have it sort things out
+
+sub make_species_branch{
+   my ($self,$species,$taxonomy) = @_;
+   
+   if (! $species->isa('Bio::Species') ) {
+      $self->throw("Trying to classify $species which is not a Bio::Species object");
+   }
+   if (! $taxonomy->isa('Bio::Taxonomy') ) {
+      $self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object");
+   }
+
+   # this is done to make sure we aren't duplicating a path (let God sort them out)
+   if (defined $self->get_root_node) {
+      $self->get_root_node->remove_all_Descendents;
+   }
+
+   my @nodes;
+
+   # nb taxa in [i][0] and ranks in [i][1]
+   my @taxa=$taxonomy->classify($species);
+
+   for (my $i = 0; $i < @taxa; $i++) {
+      $nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0],
+                                           -rank  => $taxa[$i][1]);
+   }
+
+   for (my $i = 0; $i < @taxa-1; $i++) {
+      $nodes[$i]->add_Descendent($nodes[$i+1]);
+   }
+
+   $self->set_root_node($nodes[0]);
+
+   return @nodes;
+}
+
+
+=head2 make_branch
+
+ Title   : make_branch
+ Usage   : $tree->make_branch($node)
+ Function: Make a linear Bio::Taxonomy::Tree object from a leafish node
+ Returns :
+ Args    : Bio::Taxonomy::Taxon object
+
+=cut
+
+
+sub make_branch{
+   my ($self,$node) = @_;
+
+   # this is done to make sure we aren't duplicating a path (let God sort them out)
+   # note that if you are using a linked set of node which include node 
+   # already in the tree, this will break
+   $self->get_root_node->remove_all_Descendents;
+   
+   while (defined $node->ancestor) {
+      $self->set_root_node($node);
+      $node=$node->ancestor;
+   }
+}
+
+
+=head2 splice
+
+ Title   : splice
+ Usage   : @nodes = $tree->splice($tree)
+ Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two
+ Returns : array of Bio::Taxonomy::Taxon added to tree
+ Args    : Bio::Taxonomy::Tree object
+
+=cut
+
+
+sub splice{
+   my ($self,$tree) = @_;
+
+   my @nodes;
+
+   my @newleaves = $tree->get_leaves;
+   foreach my $leaf (@newleaves) {
+      push @nodes,$self->add_branch($leaf);
+   }
+
+   return @nodes;
+}
+
+=head2 add_species
+
+ Title   : add_species
+ Usage   : @nodes = $tree->add_species($species,$taxonomy)
+ Function: Return a of Bio::Taxonomy::Tree object with a new species added
+ Returns : array of Bio::Taxonomy::Taxon added to tree
+ Args    : Bio::Species object
+
+=cut
+
+
+sub add_species{
+   my ($self,$species,$taxonomy) = @_;
+
+   my $branch=Bio::Taxonomy::Tree->new;
+   my @nodes=$branch->make_species_branch($species,$taxonomy);
+
+   my ($newleaf)=$branch->get_leaves;
+  
+   return $self->add_branch($newleaf);
+}
+
+=head2 add_branch
+
+ Title   : add_branch
+ Usage   : $tree->add_branch($node,boolean)
+ Function: Return a of Bio::Taxonomy::Tree object with a new branch added
+ Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree
+ Args    : Bio::Taxonomy::Taxon object
+           boolean flag to force overwrite of descendent
+             (see Bio::Node->add_Descendent)
+
+=cut
+
+
+sub add_branch {
+   my ($self,$node,$force) = @_;
+
+   my $best_node_level=0;
+   my ($best_node,@nodes,$common);
+
+   my @leaves=$self->get_leaves;
+   foreach my $leaf (@leaves) {
+      $common=$node->recent_common_ancestor($leaf); # the root of the part to add
+      if (defined $common && ($common->distance_to_root > $best_node_level)) {
+         $best_node_level = $common->distance_to_root;
+         $best_node = $common;
+      }
+   }
+
+   return unless defined $best_node;
+
+   push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents);
+   foreach my $node (@nodes) {
+      if ((defined $best_node->id && $best_node->id == $node->id) ||
+         ($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) &&
+         ($best_node->rank ne 'no rank')) {
+         foreach my $descendent ($common->each_Descendent) {
+            $node->add_Descendent($descendent,$force);
+         }
+      }
+
+      $self->set_root_node($node) if $node->distance_to_root==0;
+   }
+
+   return ($common->get_Descendents);
+}
+
+=head2 remove_branch
+
+ Title   : remove_branch
+ Usage   : $tree->remove_branch($node)
+ Function: remove a branch up to the next multifurcation
+ Returns :
+ Args    : Bio::Taxonomy::Taxon object
+
+=cut
+
+
+sub remove_branch{
+   my ($self,$node) = @_;
+
+   # we can define a branch at any point along it
+   
+   while (defined $node->ancestor) {
+      last if $node->ancestor->each_Descendent > 1;
+      $node=$node->ancestor;
+   }
+   $node->remove_all_Descendents; # I'm not sure if this is necessary,
+                                  # but I don't see that remove_Descendent
+                                  # has the side effect of deleting
+                                  # descendent nodes of the deletee
+   $node->ancestor->remove_Descendent($node);
+}
+
+
+
+1;