0
|
1 # $Id: RandomFactory.pm,v 1.8 2002/12/24 17:52:03 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Tree::RandomFactory
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Tree::RandomFactory
|
|
20 my $factory = new Bio::Tree::RandomFactory( -samples => \@taxonnames,
|
|
21 -maxcount => 10);
|
|
22
|
|
23 # or for anonymous samples
|
|
24
|
|
25 my $factory = new Bio::Tree::RandomFactory( -sample_size => 6,
|
|
26 -maxcount = 50);
|
|
27
|
|
28 =head1 DESCRIPTION
|
|
29
|
|
30 Builds a random tree every time next_tree is called or up to -maxcount times.
|
|
31
|
|
32 This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
|
|
33
|
|
34 Hudson, R. R. 1990. Gene genealogies and the coalescent
|
|
35 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
|
|
36 surveys in evolutionary biology. Vol. 7. Oxford University
|
|
37 Press, New York
|
|
38
|
|
39
|
|
40 =head1 FEEDBACK
|
|
41
|
|
42 =head2 Mailing Lists
|
|
43
|
|
44 User feedback is an integral part of the evolution of this and other
|
|
45 Bioperl modules. Send your comments and suggestions preferably to
|
|
46 the Bioperl mailing list. Your participation is much appreciated.
|
|
47
|
|
48 bioperl-l@bioperl.org - General discussion
|
|
49 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
50
|
|
51 =head2 Reporting Bugs
|
|
52
|
|
53 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
54 of the bugs and their resolution. Bug reports can be submitted via
|
|
55 the web:
|
|
56
|
|
57 http://bugzilla.bioperl.org/
|
|
58
|
|
59
|
|
60 =head1 AUTHOR - Jason Stajich
|
|
61
|
|
62 Email jason@bioperl.org
|
|
63
|
|
64 =head1 CONTRIBUTORS
|
|
65
|
|
66 Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt>
|
|
67
|
|
68 =head1 APPENDIX
|
|
69
|
|
70 The rest of the documentation details each of the object methods.
|
|
71 Internal methods are usually preceded with a _
|
|
72
|
|
73 =cut
|
|
74
|
|
75
|
|
76 # Let the code begin...
|
|
77
|
|
78
|
|
79 package Bio::Tree::RandomFactory;
|
|
80 use vars qw(@ISA $PRECISION_DIGITS);
|
|
81 use strict;
|
|
82
|
|
83 BEGIN {
|
|
84 $PRECISION_DIGITS = 3; # Precision for the branchlength
|
|
85 }
|
|
86
|
|
87 use Bio::Factory::TreeFactoryI;
|
|
88 use Bio::Root::Root;
|
|
89 use Bio::TreeIO::TreeEventBuilder;
|
|
90 use Bio::Tree::AlleleNode;
|
|
91
|
|
92 @ISA = qw(Bio::Root::Root Bio::Factory::TreeFactoryI );
|
|
93
|
|
94 =head2 new
|
|
95
|
|
96 Title : new
|
|
97 Usage : my $factory = new Bio::Tree::RandomFactory(-samples => \@samples,
|
|
98 -maxcount=> $N);
|
|
99 Function: Initializes a Bio::Tree::RandomFactory object
|
|
100 Returns : Bio::Tree::RandomFactory
|
|
101 Args :
|
|
102
|
|
103
|
|
104 =cut
|
|
105
|
|
106 sub new{
|
|
107 my ($class,@args) = @_;
|
|
108 my $self = $class->SUPER::new(@args);
|
|
109
|
|
110 $self->{'_eventbuilder'} = new Bio::TreeIO::TreeEventBuilder();
|
|
111 $self->{'_treecounter'} = 0;
|
|
112 $self->{'_maxcount'} = 0;
|
|
113 my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT
|
|
114 SAMPLES
|
|
115 SAMPLE_SIZE)],
|
|
116 @args);
|
|
117 my @samples;
|
|
118
|
|
119 if( ! defined $samps ) {
|
|
120 if( ! defined $samplesize || $samplesize <= 0 ) {
|
|
121 $self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified");
|
|
122 }
|
|
123 foreach ( 1..$samplesize ) { push @samples, "Samp$_"; }
|
|
124 } else {
|
|
125 if( ref($samps) =~ /ARRAY/i ) {
|
|
126 $self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?");
|
|
127 }
|
|
128 @samples = @$samps;
|
|
129 }
|
|
130
|
|
131 $self->samples(\@samples);
|
|
132 $self->sample_size(scalar @samples);
|
|
133 if( defined $maxcount ) {
|
|
134 $self->maxcount($maxcount);
|
|
135 }
|
|
136 return $self;
|
|
137 }
|
|
138
|
|
139 =head2 next_tree
|
|
140
|
|
141 Title : next_tree
|
|
142 Usage : my $tree = $factory->next_tree
|
|
143 Function: Returns a random tree based on the initialized number of nodes
|
|
144 NOTE: if maxcount is not specified on initialization or
|
|
145 set to a valid integer, subsequent calls to next_tree will
|
|
146 continue to return random trees and never return undef
|
|
147
|
|
148 Returns : Bio::Tree::TreeI object
|
|
149 Args : none
|
|
150
|
|
151 =cut
|
|
152
|
|
153 sub next_tree{
|
|
154 my ($self) = @_;
|
|
155 # If maxcount is set to something non-zero then next tree will
|
|
156 # continue to return valid trees until maxcount is reached
|
|
157 # otherwise will always return trees
|
|
158 return undef if( $self->maxcount &&
|
|
159 $self->{'_treecounter'}++ >= $self->maxcount );
|
|
160 my $size = $self->sample_size;
|
|
161
|
|
162 my $in;
|
|
163 my @tree = ();
|
|
164 my @list = ();
|
|
165
|
|
166 for($in=0;$in < 2*$size -1; $in++ ) {
|
|
167 push @tree, { 'nodenum' => "Node$in" };
|
|
168 }
|
|
169 # in C we would have 2 arrays
|
|
170 # an array of nodes (tree)
|
|
171 # and array of pointers to these nodes (list)
|
|
172 # and we just shuffle the list items to do the
|
|
173 # tree topology generation
|
|
174 # instead in perl, we will have a list of hashes (nodes) called @tree
|
|
175 # and a list of integers representing the indexes in tree called @list
|
|
176
|
|
177 for($in=0;$in < $size;$in++) {
|
|
178 $tree[$in]->{'time'} = 0;
|
|
179 $tree[$in]->{'desc1'} = undef;
|
|
180 $tree[$in]->{'desc2'} = undef;
|
|
181 push @list, $in;
|
|
182 }
|
|
183
|
|
184 my $t=0;
|
|
185 # generate times for the nodes
|
|
186 for($in = $size; $in > 1; $in-- ) {
|
|
187 $t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) );
|
|
188 $tree[2 * $size - $in]->{'time'} =$t;
|
|
189 }
|
|
190 # topology generation
|
|
191 for ($in = $size; $in > 1; $in-- ) {
|
|
192 my $pick = int $self->random($in);
|
|
193 my $nodeindex = $list[$pick];
|
|
194 my $swap = 2 * $size - $in;
|
|
195 $tree[$swap]->{'desc1'} = $nodeindex;
|
|
196 $list[$pick] = $list[$in-1];
|
|
197 $pick = int rand($in - 1);
|
|
198 $nodeindex = $list[$pick];
|
|
199 $tree[$swap]->{'desc2'} = $nodeindex;
|
|
200 $list[$pick] = $swap;
|
|
201 }
|
|
202 # Let's convert the hashes into nodes
|
|
203
|
|
204 my @nodes = ();
|
|
205 foreach my $n ( @tree ) {
|
|
206 push @nodes,
|
|
207 new Bio::Tree::AlleleNode(-id => $n->{'nodenum'},
|
|
208 -branch_length => $n->{'time'});
|
|
209 }
|
|
210 my $ct = 0;
|
|
211 foreach my $node ( @nodes ) {
|
|
212 my $n = $tree[$ct++];
|
|
213 if( defined $n->{'desc1'} ) {
|
|
214 $node->add_Descendent($nodes[$n->{'desc1'}]);
|
|
215 }
|
|
216 if( defined $n->{'desc2'} ) {
|
|
217 $node->add_Descendent($nodes[$n->{'desc2'}]);
|
|
218 }
|
|
219 }
|
|
220 my $T = new Bio::Tree::Tree(-root => pop @nodes );
|
|
221 return $T;
|
|
222 }
|
|
223
|
|
224 =head2 add_Mutations
|
|
225
|
|
226 Title : add_Mutations
|
|
227 Usage : $factory->add_Mutations($tree, $mutcount);
|
|
228 Function: Adds mutations to a tree via a random process weighted by
|
|
229 branch length (it is a poisson distribution
|
|
230 as part of a coalescent process)
|
|
231 Returns : none
|
|
232 Args : $tree - Bio::Tree::TreeI
|
|
233 $nummut - number of mutations
|
|
234
|
|
235
|
|
236 =cut
|
|
237
|
|
238 sub add_Mutations{
|
|
239 my ($self,$tree, $nummut) = @_;
|
|
240 my @branches;
|
|
241 my @lens;
|
|
242 my $branchlen = 0;
|
|
243 my $last = 0;
|
|
244 my @nodes = $tree->get_nodes();
|
|
245 my $precision = 10**$PRECISION_DIGITS;
|
|
246 my $i = 0;
|
|
247
|
|
248 # Jason's somewhat simplistics way of doing a poission
|
|
249 # distribution for a fixed number of mutations
|
|
250 # build an array and put the node number in a slot
|
|
251 # representing the branch to put a mutation on
|
|
252 # but weight the number of slots per branch by the
|
|
253 # length of the branch ( ancestor's time - node time)
|
|
254
|
|
255 foreach my $node ( @nodes ) {
|
|
256 if( $node->ancestor ) {
|
|
257 my $len = int ( ($node->ancestor->branch_length -
|
|
258 $node->branch_length) * $precision);
|
|
259 if ( $len > 0 ) {
|
|
260 for( my $j =0;$j < $len;$j++) {
|
|
261 push @branches, $i;
|
|
262 }
|
|
263 $last += $len;
|
|
264 }
|
|
265 $branchlen += $len;
|
|
266 }
|
|
267 if( ! $node->isa('Bio::Tree::AlleleNode') ) {
|
|
268 bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node
|
|
269 }
|
|
270 $node->purge_markers;
|
|
271 $i++;
|
|
272 }
|
|
273 # sanity check
|
|
274 die("branch len is $branchlen arraylen is $last")
|
|
275 unless ( $branchlen == $last );
|
|
276
|
|
277 for( my $j = 0; $j < $nummut; $j++) {
|
|
278 my $index = int(rand($branchlen));
|
|
279 my $branch = $branches[$index];
|
|
280 $nodes[$branch]->add_alleles("Mutation$j", [1]);
|
|
281 }
|
|
282 }
|
|
283
|
|
284 =head2 maxcount
|
|
285
|
|
286 Title : maxcount
|
|
287 Usage : $obj->maxcount($newval)
|
|
288 Function:
|
|
289 Example :
|
|
290 Returns : value of maxcount
|
|
291 Args : newvalue (optional)
|
|
292
|
|
293
|
|
294 =cut
|
|
295
|
|
296 sub maxcount{
|
|
297 my ($self,$value) = @_;
|
|
298 if( defined $value) {
|
|
299 if( $value =~ /^(\d+)/ ) {
|
|
300 $self->{'maxcount'} = $1;
|
|
301 } else {
|
|
302 $self->warn("Must specify a valid Positive integer to maxcount");
|
|
303 $self->{'maxcount'} = 0;
|
|
304 }
|
|
305 }
|
|
306 return $self->{'_maxcount'};
|
|
307 }
|
|
308
|
|
309 =head2 samples
|
|
310
|
|
311 Title : samples
|
|
312 Usage : $obj->samples($newval)
|
|
313 Function:
|
|
314 Example :
|
|
315 Returns : value of samples
|
|
316 Args : newvalue (optional)
|
|
317
|
|
318
|
|
319 =cut
|
|
320
|
|
321 sub samples{
|
|
322 my ($self,$value) = @_;
|
|
323 if( defined $value) {
|
|
324 if( ref($value) !~ /ARRAY/i ) {
|
|
325 $self->warn("Must specify a valid array ref to the method 'samples'");
|
|
326 $value = [];
|
|
327 }
|
|
328 $self->{'samples'} = $value;
|
|
329 }
|
|
330 return $self->{'samples'};
|
|
331
|
|
332 }
|
|
333
|
|
334 =head2 sample_size
|
|
335
|
|
336 Title : sample_size
|
|
337 Usage : $obj->sample_size($newval)
|
|
338 Function:
|
|
339 Example :
|
|
340 Returns : value of sample_size
|
|
341 Args : newvalue (optional)
|
|
342
|
|
343
|
|
344 =cut
|
|
345
|
|
346 sub sample_size{
|
|
347 my ($self,$value) = @_;
|
|
348 if( defined $value) {
|
|
349 $self->{'sample_size'} = $value;
|
|
350 }
|
|
351 return $self->{'sample_size'};
|
|
352
|
|
353 }
|
|
354
|
|
355 =head2 attach_EventHandler
|
|
356
|
|
357 Title : attach_EventHandler
|
|
358 Usage : $parser->attatch_EventHandler($handler)
|
|
359 Function: Adds an event handler to listen for events
|
|
360 Returns : none
|
|
361 Args : Bio::Event::EventHandlerI
|
|
362
|
|
363 =cut
|
|
364
|
|
365 sub attach_EventHandler{
|
|
366 my ($self,$handler) = @_;
|
|
367 return if( ! $handler );
|
|
368 if( ! $handler->isa('Bio::Event::EventHandlerI') ) {
|
|
369 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI');
|
|
370 }
|
|
371 $self->{'_handler'} = $handler;
|
|
372 return;
|
|
373 }
|
|
374
|
|
375 =head2 _eventHandler
|
|
376
|
|
377 Title : _eventHandler
|
|
378 Usage : private
|
|
379 Function: Get the EventHandler
|
|
380 Returns : Bio::Event::EventHandlerI
|
|
381 Args : none
|
|
382
|
|
383
|
|
384 =cut
|
|
385
|
|
386 sub _eventHandler{
|
|
387 my ($self) = @_;
|
|
388 return $self->{'_handler'};
|
|
389 }
|
|
390
|
|
391 =head2 random
|
|
392
|
|
393 Title : random
|
|
394 Usage : my $rfloat = $node->random($size)
|
|
395 Function: Generates a random number between 0 and $size
|
|
396 This is abstracted so that someone can override and provide their
|
|
397 own special RNG. This is expected to be a uniform RNG.
|
|
398 Returns : Floating point random
|
|
399 Args : $maximum size for random number (defaults to 1)
|
|
400
|
|
401
|
|
402 =cut
|
|
403
|
|
404 sub random{
|
|
405 my ($self,$max) = @_;
|
|
406 return rand($max);
|
|
407 }
|
|
408
|
|
409 1;
|