diff variant_effect_predictor/Bio/Tree/RandomFactory.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Tree/RandomFactory.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,409 @@
+# $Id: RandomFactory.pm,v 1.8 2002/12/24 17:52:03 jason Exp $
+#
+# BioPerl module for Bio::Tree::RandomFactory
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
+
+=head1 SYNOPSIS
+
+  use Bio::Tree::RandomFactory
+  my $factory = new Bio::Tree::RandomFactory( -samples => \@taxonnames,
+  					      -maxcount => 10);
+
+  # or for anonymous samples
+
+  my $factory = new Bio::Tree::RandomFactory( -sample_size => 6, 
+					      -maxcount = 50);
+
+=head1 DESCRIPTION
+
+Builds a random tree every time next_tree is called or up to -maxcount times.
+
+This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
+
+Hudson, R. R. 1990. Gene genealogies and the coalescent
+       process. Pp. 1-44 in D. Futuyma and J.  Antonovics, eds. Oxford
+       surveys in evolutionary biology. Vol. 7. Oxford University
+       Press, New York
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.bioperl.org/
+
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason@bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tree::RandomFactory;
+use vars qw(@ISA $PRECISION_DIGITS);
+use strict;
+
+BEGIN { 
+    $PRECISION_DIGITS = 3; # Precision for the branchlength
+}
+
+use Bio::Factory::TreeFactoryI;
+use Bio::Root::Root;
+use Bio::TreeIO::TreeEventBuilder;
+use Bio::Tree::AlleleNode;
+
+@ISA = qw(Bio::Root::Root Bio::Factory::TreeFactoryI );
+
+=head2 new
+
+ Title   : new
+ Usage   : my $factory = new Bio::Tree::RandomFactory(-samples => \@samples,
+						      -maxcount=> $N);
+ Function: Initializes a Bio::Tree::RandomFactory object
+ Returns : Bio::Tree::RandomFactory
+ Args    :
+
+
+=cut
+
+sub new{
+   my ($class,@args) = @_;
+   my $self = $class->SUPER::new(@args);
+   
+   $self->{'_eventbuilder'} = new Bio::TreeIO::TreeEventBuilder();
+   $self->{'_treecounter'} = 0;
+   $self->{'_maxcount'} = 0;
+   my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT
+							       SAMPLES
+							       SAMPLE_SIZE)],
+							   @args);
+   my @samples;
+   
+   if( ! defined $samps ) { 
+       if( ! defined $samplesize || $samplesize <= 0 ) { 
+	   $self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified");
+       }
+       foreach ( 1..$samplesize ) { push @samples, "Samp$_"; }      
+   } else { 
+       if( ref($samps) =~ /ARRAY/i ) { 
+	   $self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?");
+       }
+       @samples = @$samps;
+   }
+   
+   $self->samples(\@samples);
+   $self->sample_size(scalar @samples);
+   if( defined $maxcount ) { 
+       $self->maxcount($maxcount);
+   }
+   return $self;
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $factory->next_tree
+ Function: Returns a random tree based on the initialized number of nodes
+           NOTE: if maxcount is not specified on initialization or
+                 set to a valid integer, subsequent calls to next_tree will 
+                 continue to return random trees and never return undef
+
+ Returns : Bio::Tree::TreeI object
+ Args    : none
+
+=cut
+
+sub next_tree{
+   my ($self) = @_;
+   # If maxcount is set to something non-zero then next tree will
+   # continue to return valid trees until maxcount is reached
+   # otherwise will always return trees 
+   return undef if( $self->maxcount &&
+		    $self->{'_treecounter'}++ >= $self->maxcount );
+   my $size = $self->sample_size;
+   
+   my $in;
+   my @tree = ();
+   my @list = ();
+   
+   for($in=0;$in < 2*$size -1; $in++ ) { 
+       push @tree, { 'nodenum' => "Node$in" };
+   }
+   # in C we would have 2 arrays
+   # an array of nodes (tree)
+   # and array of pointers to these nodes (list)
+   # and we just shuffle the list items to do the 
+   # tree topology generation
+   # instead in perl, we will have a list of hashes (nodes) called @tree
+   # and a list of integers representing the indexes in tree called @list
+
+   for($in=0;$in < $size;$in++)  {
+       $tree[$in]->{'time'} = 0;
+       $tree[$in]->{'desc1'} = undef;
+       $tree[$in]->{'desc2'} = undef;
+       push @list, $in;
+   }
+
+   my $t=0;
+   # generate times for the nodes
+   for($in = $size; $in > 1; $in-- ) {
+	$t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) );    
+	$tree[2 * $size - $in]->{'time'} =$t;
+    }
+   # topology generation
+   for ($in = $size; $in > 1; $in-- ) {
+       my $pick = int $self->random($in);    
+       my $nodeindex = $list[$pick];       
+       my $swap = 2 * $size - $in;       
+       $tree[$swap]->{'desc1'} = $nodeindex;	
+       $list[$pick] = $list[$in-1];       
+       $pick = int rand($in - 1);    
+       $nodeindex = $list[$pick];
+       $tree[$swap]->{'desc2'} = $nodeindex;	
+       $list[$pick] = $swap;
+   }
+   # Let's convert the hashes into nodes
+
+   my @nodes = ();   
+   foreach my $n ( @tree ) { 
+       push @nodes, 
+	   new Bio::Tree::AlleleNode(-id => $n->{'nodenum'},
+				     -branch_length => $n->{'time'});
+   }
+   my $ct = 0;
+   foreach my $node ( @nodes ) { 
+       my $n = $tree[$ct++];
+       if( defined $n->{'desc1'} ) {
+	   $node->add_Descendent($nodes[$n->{'desc1'}]);
+       }
+       if( defined $n->{'desc2'} ) { 
+	   $node->add_Descendent($nodes[$n->{'desc2'}]);
+       }
+   }   
+   my $T = new Bio::Tree::Tree(-root => pop @nodes );
+   return $T;
+}
+
+=head2 add_Mutations
+
+ Title   : add_Mutations
+ Usage   : $factory->add_Mutations($tree, $mutcount);
+ Function: Adds mutations to a tree via a random process weighted by 
+           branch length (it is a poisson distribution 
+			  as part of a coalescent process) 
+ Returns : none
+ Args    : $tree - Bio::Tree::TreeI 
+           $nummut - number of mutations
+
+
+=cut
+
+sub add_Mutations{
+   my ($self,$tree, $nummut) = @_;
+   my @branches;
+   my @lens;
+   my $branchlen = 0;
+   my $last = 0;
+   my @nodes = $tree->get_nodes();
+   my $precision = 10**$PRECISION_DIGITS;
+   my $i = 0;
+
+   # Jason's somewhat simplistics way of doing a poission
+   # distribution for a fixed number of mutations
+   # build an array and put the node number in a slot
+   # representing the branch to put a mutation on
+   # but weight the number of slots per branch by the 
+   # length of the branch ( ancestor's time - node time)
+   
+   foreach my $node ( @nodes ) {
+       if( $node->ancestor ) { 
+	   my $len = int ( ($node->ancestor->branch_length - 
+			    $node->branch_length) * $precision);
+	   if ( $len > 0 ) {
+	       for( my $j =0;$j < $len;$j++) {
+		   push @branches, $i;
+	       }
+	       $last += $len;
+	   }
+	   $branchlen += $len;
+       }
+       if( ! $node->isa('Bio::Tree::AlleleNode') ) {
+	   bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node
+       } 
+       $node->purge_markers;
+       $i++;
+   }
+   # sanity check
+    die("branch len is $branchlen arraylen is $last")
+        unless ( $branchlen == $last );
+   
+   for( my $j = 0; $j < $nummut; $j++)  {
+       my $index = int(rand($branchlen));
+       my $branch = $branches[$index];
+       $nodes[$branch]->add_alleles("Mutation$j", [1]);
+   }
+}
+
+=head2 maxcount
+
+ Title   : maxcount
+ Usage   : $obj->maxcount($newval)
+ Function: 
+ Example : 
+ Returns : value of maxcount
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub maxcount{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if( $value =~ /^(\d+)/ ) { 
+	   $self->{'maxcount'} = $1;
+       } else { 
+	   $self->warn("Must specify a valid Positive integer to maxcount");
+	   $self->{'maxcount'} = 0;
+       }
+  }
+   return $self->{'_maxcount'};
+}
+
+=head2 samples
+
+ Title   : samples
+ Usage   : $obj->samples($newval)
+ Function: 
+ Example : 
+ Returns : value of samples
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub samples{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if( ref($value) !~ /ARRAY/i ) { 
+	   $self->warn("Must specify a valid array ref to the method 'samples'");
+	   $value = [];
+       } 
+      $self->{'samples'} = $value;
+    }
+    return $self->{'samples'};
+
+}
+
+=head2 sample_size
+
+ Title   : sample_size
+ Usage   : $obj->sample_size($newval)
+ Function: 
+ Example : 
+ Returns : value of sample_size
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub sample_size{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'sample_size'} = $value;
+    }
+    return $self->{'sample_size'};
+
+}
+
+=head2 attach_EventHandler
+
+ Title   : attach_EventHandler
+ Usage   : $parser->attatch_EventHandler($handler)
+ Function: Adds an event handler to listen for events
+ Returns : none
+ Args    : Bio::Event::EventHandlerI
+
+=cut
+
+sub attach_EventHandler{
+    my ($self,$handler) = @_;
+    return if( ! $handler );
+    if( ! $handler->isa('Bio::Event::EventHandlerI') ) {
+	$self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI');
+    }
+    $self->{'_handler'} = $handler;
+    return;
+}
+
+=head2 _eventHandler
+
+ Title   : _eventHandler
+ Usage   : private
+ Function: Get the EventHandler
+ Returns : Bio::Event::EventHandlerI
+ Args    : none
+
+
+=cut
+
+sub _eventHandler{
+   my ($self) = @_;
+   return $self->{'_handler'};
+}
+
+=head2 random
+
+ Title   : random
+ Usage   : my $rfloat = $node->random($size)
+ Function: Generates a random number between 0 and $size
+           This is abstracted so that someone can override and provide their
+           own special RNG.  This is expected to be a uniform RNG.
+ Returns : Floating point random
+ Args    : $maximum size for random number (defaults to 1)
+
+
+=cut
+
+sub random{
+   my ($self,$max) = @_;
+   return rand($max);
+}
+
+1;