Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/Tree/RandomFactory.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 # $Id: RandomFactory.pm,v 1.8 2002/12/24 17:52:03 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::Tree::RandomFactory | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Tree::RandomFactory - TreeFactory for generating Random Trees | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Tree::RandomFactory | |
20 my $factory = new Bio::Tree::RandomFactory( -samples => \@taxonnames, | |
21 -maxcount => 10); | |
22 | |
23 # or for anonymous samples | |
24 | |
25 my $factory = new Bio::Tree::RandomFactory( -sample_size => 6, | |
26 -maxcount = 50); | |
27 | |
28 =head1 DESCRIPTION | |
29 | |
30 Builds a random tree every time next_tree is called or up to -maxcount times. | |
31 | |
32 This algorithm is based on the make_tree algorithm from Richard Hudson 1990. | |
33 | |
34 Hudson, R. R. 1990. Gene genealogies and the coalescent | |
35 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford | |
36 surveys in evolutionary biology. Vol. 7. Oxford University | |
37 Press, New York | |
38 | |
39 | |
40 =head1 FEEDBACK | |
41 | |
42 =head2 Mailing Lists | |
43 | |
44 User feedback is an integral part of the evolution of this and other | |
45 Bioperl modules. Send your comments and suggestions preferably to | |
46 the Bioperl mailing list. Your participation is much appreciated. | |
47 | |
48 bioperl-l@bioperl.org - General discussion | |
49 http://bioperl.org/MailList.shtml - About the mailing lists | |
50 | |
51 =head2 Reporting Bugs | |
52 | |
53 Report bugs to the Bioperl bug tracking system to help us keep track | |
54 of the bugs and their resolution. Bug reports can be submitted via | |
55 the web: | |
56 | |
57 http://bugzilla.bioperl.org/ | |
58 | |
59 | |
60 =head1 AUTHOR - Jason Stajich | |
61 | |
62 Email jason@bioperl.org | |
63 | |
64 =head1 CONTRIBUTORS | |
65 | |
66 Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt> | |
67 | |
68 =head1 APPENDIX | |
69 | |
70 The rest of the documentation details each of the object methods. | |
71 Internal methods are usually preceded with a _ | |
72 | |
73 =cut | |
74 | |
75 | |
76 # Let the code begin... | |
77 | |
78 | |
79 package Bio::Tree::RandomFactory; | |
80 use vars qw(@ISA $PRECISION_DIGITS); | |
81 use strict; | |
82 | |
83 BEGIN { | |
84 $PRECISION_DIGITS = 3; # Precision for the branchlength | |
85 } | |
86 | |
87 use Bio::Factory::TreeFactoryI; | |
88 use Bio::Root::Root; | |
89 use Bio::TreeIO::TreeEventBuilder; | |
90 use Bio::Tree::AlleleNode; | |
91 | |
92 @ISA = qw(Bio::Root::Root Bio::Factory::TreeFactoryI ); | |
93 | |
94 =head2 new | |
95 | |
96 Title : new | |
97 Usage : my $factory = new Bio::Tree::RandomFactory(-samples => \@samples, | |
98 -maxcount=> $N); | |
99 Function: Initializes a Bio::Tree::RandomFactory object | |
100 Returns : Bio::Tree::RandomFactory | |
101 Args : | |
102 | |
103 | |
104 =cut | |
105 | |
106 sub new{ | |
107 my ($class,@args) = @_; | |
108 my $self = $class->SUPER::new(@args); | |
109 | |
110 $self->{'_eventbuilder'} = new Bio::TreeIO::TreeEventBuilder(); | |
111 $self->{'_treecounter'} = 0; | |
112 $self->{'_maxcount'} = 0; | |
113 my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT | |
114 SAMPLES | |
115 SAMPLE_SIZE)], | |
116 @args); | |
117 my @samples; | |
118 | |
119 if( ! defined $samps ) { | |
120 if( ! defined $samplesize || $samplesize <= 0 ) { | |
121 $self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified"); | |
122 } | |
123 foreach ( 1..$samplesize ) { push @samples, "Samp$_"; } | |
124 } else { | |
125 if( ref($samps) =~ /ARRAY/i ) { | |
126 $self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?"); | |
127 } | |
128 @samples = @$samps; | |
129 } | |
130 | |
131 $self->samples(\@samples); | |
132 $self->sample_size(scalar @samples); | |
133 if( defined $maxcount ) { | |
134 $self->maxcount($maxcount); | |
135 } | |
136 return $self; | |
137 } | |
138 | |
139 =head2 next_tree | |
140 | |
141 Title : next_tree | |
142 Usage : my $tree = $factory->next_tree | |
143 Function: Returns a random tree based on the initialized number of nodes | |
144 NOTE: if maxcount is not specified on initialization or | |
145 set to a valid integer, subsequent calls to next_tree will | |
146 continue to return random trees and never return undef | |
147 | |
148 Returns : Bio::Tree::TreeI object | |
149 Args : none | |
150 | |
151 =cut | |
152 | |
153 sub next_tree{ | |
154 my ($self) = @_; | |
155 # If maxcount is set to something non-zero then next tree will | |
156 # continue to return valid trees until maxcount is reached | |
157 # otherwise will always return trees | |
158 return undef if( $self->maxcount && | |
159 $self->{'_treecounter'}++ >= $self->maxcount ); | |
160 my $size = $self->sample_size; | |
161 | |
162 my $in; | |
163 my @tree = (); | |
164 my @list = (); | |
165 | |
166 for($in=0;$in < 2*$size -1; $in++ ) { | |
167 push @tree, { 'nodenum' => "Node$in" }; | |
168 } | |
169 # in C we would have 2 arrays | |
170 # an array of nodes (tree) | |
171 # and array of pointers to these nodes (list) | |
172 # and we just shuffle the list items to do the | |
173 # tree topology generation | |
174 # instead in perl, we will have a list of hashes (nodes) called @tree | |
175 # and a list of integers representing the indexes in tree called @list | |
176 | |
177 for($in=0;$in < $size;$in++) { | |
178 $tree[$in]->{'time'} = 0; | |
179 $tree[$in]->{'desc1'} = undef; | |
180 $tree[$in]->{'desc2'} = undef; | |
181 push @list, $in; | |
182 } | |
183 | |
184 my $t=0; | |
185 # generate times for the nodes | |
186 for($in = $size; $in > 1; $in-- ) { | |
187 $t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) ); | |
188 $tree[2 * $size - $in]->{'time'} =$t; | |
189 } | |
190 # topology generation | |
191 for ($in = $size; $in > 1; $in-- ) { | |
192 my $pick = int $self->random($in); | |
193 my $nodeindex = $list[$pick]; | |
194 my $swap = 2 * $size - $in; | |
195 $tree[$swap]->{'desc1'} = $nodeindex; | |
196 $list[$pick] = $list[$in-1]; | |
197 $pick = int rand($in - 1); | |
198 $nodeindex = $list[$pick]; | |
199 $tree[$swap]->{'desc2'} = $nodeindex; | |
200 $list[$pick] = $swap; | |
201 } | |
202 # Let's convert the hashes into nodes | |
203 | |
204 my @nodes = (); | |
205 foreach my $n ( @tree ) { | |
206 push @nodes, | |
207 new Bio::Tree::AlleleNode(-id => $n->{'nodenum'}, | |
208 -branch_length => $n->{'time'}); | |
209 } | |
210 my $ct = 0; | |
211 foreach my $node ( @nodes ) { | |
212 my $n = $tree[$ct++]; | |
213 if( defined $n->{'desc1'} ) { | |
214 $node->add_Descendent($nodes[$n->{'desc1'}]); | |
215 } | |
216 if( defined $n->{'desc2'} ) { | |
217 $node->add_Descendent($nodes[$n->{'desc2'}]); | |
218 } | |
219 } | |
220 my $T = new Bio::Tree::Tree(-root => pop @nodes ); | |
221 return $T; | |
222 } | |
223 | |
224 =head2 add_Mutations | |
225 | |
226 Title : add_Mutations | |
227 Usage : $factory->add_Mutations($tree, $mutcount); | |
228 Function: Adds mutations to a tree via a random process weighted by | |
229 branch length (it is a poisson distribution | |
230 as part of a coalescent process) | |
231 Returns : none | |
232 Args : $tree - Bio::Tree::TreeI | |
233 $nummut - number of mutations | |
234 | |
235 | |
236 =cut | |
237 | |
238 sub add_Mutations{ | |
239 my ($self,$tree, $nummut) = @_; | |
240 my @branches; | |
241 my @lens; | |
242 my $branchlen = 0; | |
243 my $last = 0; | |
244 my @nodes = $tree->get_nodes(); | |
245 my $precision = 10**$PRECISION_DIGITS; | |
246 my $i = 0; | |
247 | |
248 # Jason's somewhat simplistics way of doing a poission | |
249 # distribution for a fixed number of mutations | |
250 # build an array and put the node number in a slot | |
251 # representing the branch to put a mutation on | |
252 # but weight the number of slots per branch by the | |
253 # length of the branch ( ancestor's time - node time) | |
254 | |
255 foreach my $node ( @nodes ) { | |
256 if( $node->ancestor ) { | |
257 my $len = int ( ($node->ancestor->branch_length - | |
258 $node->branch_length) * $precision); | |
259 if ( $len > 0 ) { | |
260 for( my $j =0;$j < $len;$j++) { | |
261 push @branches, $i; | |
262 } | |
263 $last += $len; | |
264 } | |
265 $branchlen += $len; | |
266 } | |
267 if( ! $node->isa('Bio::Tree::AlleleNode') ) { | |
268 bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node | |
269 } | |
270 $node->purge_markers; | |
271 $i++; | |
272 } | |
273 # sanity check | |
274 die("branch len is $branchlen arraylen is $last") | |
275 unless ( $branchlen == $last ); | |
276 | |
277 for( my $j = 0; $j < $nummut; $j++) { | |
278 my $index = int(rand($branchlen)); | |
279 my $branch = $branches[$index]; | |
280 $nodes[$branch]->add_alleles("Mutation$j", [1]); | |
281 } | |
282 } | |
283 | |
284 =head2 maxcount | |
285 | |
286 Title : maxcount | |
287 Usage : $obj->maxcount($newval) | |
288 Function: | |
289 Example : | |
290 Returns : value of maxcount | |
291 Args : newvalue (optional) | |
292 | |
293 | |
294 =cut | |
295 | |
296 sub maxcount{ | |
297 my ($self,$value) = @_; | |
298 if( defined $value) { | |
299 if( $value =~ /^(\d+)/ ) { | |
300 $self->{'maxcount'} = $1; | |
301 } else { | |
302 $self->warn("Must specify a valid Positive integer to maxcount"); | |
303 $self->{'maxcount'} = 0; | |
304 } | |
305 } | |
306 return $self->{'_maxcount'}; | |
307 } | |
308 | |
309 =head2 samples | |
310 | |
311 Title : samples | |
312 Usage : $obj->samples($newval) | |
313 Function: | |
314 Example : | |
315 Returns : value of samples | |
316 Args : newvalue (optional) | |
317 | |
318 | |
319 =cut | |
320 | |
321 sub samples{ | |
322 my ($self,$value) = @_; | |
323 if( defined $value) { | |
324 if( ref($value) !~ /ARRAY/i ) { | |
325 $self->warn("Must specify a valid array ref to the method 'samples'"); | |
326 $value = []; | |
327 } | |
328 $self->{'samples'} = $value; | |
329 } | |
330 return $self->{'samples'}; | |
331 | |
332 } | |
333 | |
334 =head2 sample_size | |
335 | |
336 Title : sample_size | |
337 Usage : $obj->sample_size($newval) | |
338 Function: | |
339 Example : | |
340 Returns : value of sample_size | |
341 Args : newvalue (optional) | |
342 | |
343 | |
344 =cut | |
345 | |
346 sub sample_size{ | |
347 my ($self,$value) = @_; | |
348 if( defined $value) { | |
349 $self->{'sample_size'} = $value; | |
350 } | |
351 return $self->{'sample_size'}; | |
352 | |
353 } | |
354 | |
355 =head2 attach_EventHandler | |
356 | |
357 Title : attach_EventHandler | |
358 Usage : $parser->attatch_EventHandler($handler) | |
359 Function: Adds an event handler to listen for events | |
360 Returns : none | |
361 Args : Bio::Event::EventHandlerI | |
362 | |
363 =cut | |
364 | |
365 sub attach_EventHandler{ | |
366 my ($self,$handler) = @_; | |
367 return if( ! $handler ); | |
368 if( ! $handler->isa('Bio::Event::EventHandlerI') ) { | |
369 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI'); | |
370 } | |
371 $self->{'_handler'} = $handler; | |
372 return; | |
373 } | |
374 | |
375 =head2 _eventHandler | |
376 | |
377 Title : _eventHandler | |
378 Usage : private | |
379 Function: Get the EventHandler | |
380 Returns : Bio::Event::EventHandlerI | |
381 Args : none | |
382 | |
383 | |
384 =cut | |
385 | |
386 sub _eventHandler{ | |
387 my ($self) = @_; | |
388 return $self->{'_handler'}; | |
389 } | |
390 | |
391 =head2 random | |
392 | |
393 Title : random | |
394 Usage : my $rfloat = $node->random($size) | |
395 Function: Generates a random number between 0 and $size | |
396 This is abstracted so that someone can override and provide their | |
397 own special RNG. This is expected to be a uniform RNG. | |
398 Returns : Floating point random | |
399 Args : $maximum size for random number (defaults to 1) | |
400 | |
401 | |
402 =cut | |
403 | |
404 sub random{ | |
405 my ($self,$max) = @_; | |
406 return rand($max); | |
407 } | |
408 | |
409 1; |