comparison variant_effect_predictor/Bio/Tree/RandomFactory.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 # $Id: RandomFactory.pm,v 1.8 2002/12/24 17:52:03 jason Exp $
2 #
3 # BioPerl module for Bio::Tree::RandomFactory
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
16
17 =head1 SYNOPSIS
18
19 use Bio::Tree::RandomFactory
20 my $factory = new Bio::Tree::RandomFactory( -samples => \@taxonnames,
21 -maxcount => 10);
22
23 # or for anonymous samples
24
25 my $factory = new Bio::Tree::RandomFactory( -sample_size => 6,
26 -maxcount = 50);
27
28 =head1 DESCRIPTION
29
30 Builds a random tree every time next_tree is called or up to -maxcount times.
31
32 This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
33
34 Hudson, R. R. 1990. Gene genealogies and the coalescent
35 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
36 surveys in evolutionary biology. Vol. 7. Oxford University
37 Press, New York
38
39
40 =head1 FEEDBACK
41
42 =head2 Mailing Lists
43
44 User feedback is an integral part of the evolution of this and other
45 Bioperl modules. Send your comments and suggestions preferably to
46 the Bioperl mailing list. Your participation is much appreciated.
47
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/MailList.shtml - About the mailing lists
50
51 =head2 Reporting Bugs
52
53 Report bugs to the Bioperl bug tracking system to help us keep track
54 of the bugs and their resolution. Bug reports can be submitted via
55 the web:
56
57 http://bugzilla.bioperl.org/
58
59
60 =head1 AUTHOR - Jason Stajich
61
62 Email jason@bioperl.org
63
64 =head1 CONTRIBUTORS
65
66 Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt>
67
68 =head1 APPENDIX
69
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
72
73 =cut
74
75
76 # Let the code begin...
77
78
79 package Bio::Tree::RandomFactory;
80 use vars qw(@ISA $PRECISION_DIGITS);
81 use strict;
82
83 BEGIN {
84 $PRECISION_DIGITS = 3; # Precision for the branchlength
85 }
86
87 use Bio::Factory::TreeFactoryI;
88 use Bio::Root::Root;
89 use Bio::TreeIO::TreeEventBuilder;
90 use Bio::Tree::AlleleNode;
91
92 @ISA = qw(Bio::Root::Root Bio::Factory::TreeFactoryI );
93
94 =head2 new
95
96 Title : new
97 Usage : my $factory = new Bio::Tree::RandomFactory(-samples => \@samples,
98 -maxcount=> $N);
99 Function: Initializes a Bio::Tree::RandomFactory object
100 Returns : Bio::Tree::RandomFactory
101 Args :
102
103
104 =cut
105
106 sub new{
107 my ($class,@args) = @_;
108 my $self = $class->SUPER::new(@args);
109
110 $self->{'_eventbuilder'} = new Bio::TreeIO::TreeEventBuilder();
111 $self->{'_treecounter'} = 0;
112 $self->{'_maxcount'} = 0;
113 my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT
114 SAMPLES
115 SAMPLE_SIZE)],
116 @args);
117 my @samples;
118
119 if( ! defined $samps ) {
120 if( ! defined $samplesize || $samplesize <= 0 ) {
121 $self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified");
122 }
123 foreach ( 1..$samplesize ) { push @samples, "Samp$_"; }
124 } else {
125 if( ref($samps) =~ /ARRAY/i ) {
126 $self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?");
127 }
128 @samples = @$samps;
129 }
130
131 $self->samples(\@samples);
132 $self->sample_size(scalar @samples);
133 if( defined $maxcount ) {
134 $self->maxcount($maxcount);
135 }
136 return $self;
137 }
138
139 =head2 next_tree
140
141 Title : next_tree
142 Usage : my $tree = $factory->next_tree
143 Function: Returns a random tree based on the initialized number of nodes
144 NOTE: if maxcount is not specified on initialization or
145 set to a valid integer, subsequent calls to next_tree will
146 continue to return random trees and never return undef
147
148 Returns : Bio::Tree::TreeI object
149 Args : none
150
151 =cut
152
153 sub next_tree{
154 my ($self) = @_;
155 # If maxcount is set to something non-zero then next tree will
156 # continue to return valid trees until maxcount is reached
157 # otherwise will always return trees
158 return undef if( $self->maxcount &&
159 $self->{'_treecounter'}++ >= $self->maxcount );
160 my $size = $self->sample_size;
161
162 my $in;
163 my @tree = ();
164 my @list = ();
165
166 for($in=0;$in < 2*$size -1; $in++ ) {
167 push @tree, { 'nodenum' => "Node$in" };
168 }
169 # in C we would have 2 arrays
170 # an array of nodes (tree)
171 # and array of pointers to these nodes (list)
172 # and we just shuffle the list items to do the
173 # tree topology generation
174 # instead in perl, we will have a list of hashes (nodes) called @tree
175 # and a list of integers representing the indexes in tree called @list
176
177 for($in=0;$in < $size;$in++) {
178 $tree[$in]->{'time'} = 0;
179 $tree[$in]->{'desc1'} = undef;
180 $tree[$in]->{'desc2'} = undef;
181 push @list, $in;
182 }
183
184 my $t=0;
185 # generate times for the nodes
186 for($in = $size; $in > 1; $in-- ) {
187 $t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) );
188 $tree[2 * $size - $in]->{'time'} =$t;
189 }
190 # topology generation
191 for ($in = $size; $in > 1; $in-- ) {
192 my $pick = int $self->random($in);
193 my $nodeindex = $list[$pick];
194 my $swap = 2 * $size - $in;
195 $tree[$swap]->{'desc1'} = $nodeindex;
196 $list[$pick] = $list[$in-1];
197 $pick = int rand($in - 1);
198 $nodeindex = $list[$pick];
199 $tree[$swap]->{'desc2'} = $nodeindex;
200 $list[$pick] = $swap;
201 }
202 # Let's convert the hashes into nodes
203
204 my @nodes = ();
205 foreach my $n ( @tree ) {
206 push @nodes,
207 new Bio::Tree::AlleleNode(-id => $n->{'nodenum'},
208 -branch_length => $n->{'time'});
209 }
210 my $ct = 0;
211 foreach my $node ( @nodes ) {
212 my $n = $tree[$ct++];
213 if( defined $n->{'desc1'} ) {
214 $node->add_Descendent($nodes[$n->{'desc1'}]);
215 }
216 if( defined $n->{'desc2'} ) {
217 $node->add_Descendent($nodes[$n->{'desc2'}]);
218 }
219 }
220 my $T = new Bio::Tree::Tree(-root => pop @nodes );
221 return $T;
222 }
223
224 =head2 add_Mutations
225
226 Title : add_Mutations
227 Usage : $factory->add_Mutations($tree, $mutcount);
228 Function: Adds mutations to a tree via a random process weighted by
229 branch length (it is a poisson distribution
230 as part of a coalescent process)
231 Returns : none
232 Args : $tree - Bio::Tree::TreeI
233 $nummut - number of mutations
234
235
236 =cut
237
238 sub add_Mutations{
239 my ($self,$tree, $nummut) = @_;
240 my @branches;
241 my @lens;
242 my $branchlen = 0;
243 my $last = 0;
244 my @nodes = $tree->get_nodes();
245 my $precision = 10**$PRECISION_DIGITS;
246 my $i = 0;
247
248 # Jason's somewhat simplistics way of doing a poission
249 # distribution for a fixed number of mutations
250 # build an array and put the node number in a slot
251 # representing the branch to put a mutation on
252 # but weight the number of slots per branch by the
253 # length of the branch ( ancestor's time - node time)
254
255 foreach my $node ( @nodes ) {
256 if( $node->ancestor ) {
257 my $len = int ( ($node->ancestor->branch_length -
258 $node->branch_length) * $precision);
259 if ( $len > 0 ) {
260 for( my $j =0;$j < $len;$j++) {
261 push @branches, $i;
262 }
263 $last += $len;
264 }
265 $branchlen += $len;
266 }
267 if( ! $node->isa('Bio::Tree::AlleleNode') ) {
268 bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node
269 }
270 $node->purge_markers;
271 $i++;
272 }
273 # sanity check
274 die("branch len is $branchlen arraylen is $last")
275 unless ( $branchlen == $last );
276
277 for( my $j = 0; $j < $nummut; $j++) {
278 my $index = int(rand($branchlen));
279 my $branch = $branches[$index];
280 $nodes[$branch]->add_alleles("Mutation$j", [1]);
281 }
282 }
283
284 =head2 maxcount
285
286 Title : maxcount
287 Usage : $obj->maxcount($newval)
288 Function:
289 Example :
290 Returns : value of maxcount
291 Args : newvalue (optional)
292
293
294 =cut
295
296 sub maxcount{
297 my ($self,$value) = @_;
298 if( defined $value) {
299 if( $value =~ /^(\d+)/ ) {
300 $self->{'maxcount'} = $1;
301 } else {
302 $self->warn("Must specify a valid Positive integer to maxcount");
303 $self->{'maxcount'} = 0;
304 }
305 }
306 return $self->{'_maxcount'};
307 }
308
309 =head2 samples
310
311 Title : samples
312 Usage : $obj->samples($newval)
313 Function:
314 Example :
315 Returns : value of samples
316 Args : newvalue (optional)
317
318
319 =cut
320
321 sub samples{
322 my ($self,$value) = @_;
323 if( defined $value) {
324 if( ref($value) !~ /ARRAY/i ) {
325 $self->warn("Must specify a valid array ref to the method 'samples'");
326 $value = [];
327 }
328 $self->{'samples'} = $value;
329 }
330 return $self->{'samples'};
331
332 }
333
334 =head2 sample_size
335
336 Title : sample_size
337 Usage : $obj->sample_size($newval)
338 Function:
339 Example :
340 Returns : value of sample_size
341 Args : newvalue (optional)
342
343
344 =cut
345
346 sub sample_size{
347 my ($self,$value) = @_;
348 if( defined $value) {
349 $self->{'sample_size'} = $value;
350 }
351 return $self->{'sample_size'};
352
353 }
354
355 =head2 attach_EventHandler
356
357 Title : attach_EventHandler
358 Usage : $parser->attatch_EventHandler($handler)
359 Function: Adds an event handler to listen for events
360 Returns : none
361 Args : Bio::Event::EventHandlerI
362
363 =cut
364
365 sub attach_EventHandler{
366 my ($self,$handler) = @_;
367 return if( ! $handler );
368 if( ! $handler->isa('Bio::Event::EventHandlerI') ) {
369 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI');
370 }
371 $self->{'_handler'} = $handler;
372 return;
373 }
374
375 =head2 _eventHandler
376
377 Title : _eventHandler
378 Usage : private
379 Function: Get the EventHandler
380 Returns : Bio::Event::EventHandlerI
381 Args : none
382
383
384 =cut
385
386 sub _eventHandler{
387 my ($self) = @_;
388 return $self->{'_handler'};
389 }
390
391 =head2 random
392
393 Title : random
394 Usage : my $rfloat = $node->random($size)
395 Function: Generates a random number between 0 and $size
396 This is abstracted so that someone can override and provide their
397 own special RNG. This is expected to be a uniform RNG.
398 Returns : Floating point random
399 Args : $maximum size for random number (defaults to 1)
400
401
402 =cut
403
404 sub random{
405 my ($self,$max) = @_;
406 return rand($max);
407 }
408
409 1;