Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tree/RandomFactory.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: RandomFactory.pm,v 1.8 2002/12/24 17:52:03 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Tree::RandomFactory | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Tree::RandomFactory - TreeFactory for generating Random Trees | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::Tree::RandomFactory | |
| 20 my $factory = new Bio::Tree::RandomFactory( -samples => \@taxonnames, | |
| 21 -maxcount => 10); | |
| 22 | |
| 23 # or for anonymous samples | |
| 24 | |
| 25 my $factory = new Bio::Tree::RandomFactory( -sample_size => 6, | |
| 26 -maxcount = 50); | |
| 27 | |
| 28 =head1 DESCRIPTION | |
| 29 | |
| 30 Builds a random tree every time next_tree is called or up to -maxcount times. | |
| 31 | |
| 32 This algorithm is based on the make_tree algorithm from Richard Hudson 1990. | |
| 33 | |
| 34 Hudson, R. R. 1990. Gene genealogies and the coalescent | |
| 35 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford | |
| 36 surveys in evolutionary biology. Vol. 7. Oxford University | |
| 37 Press, New York | |
| 38 | |
| 39 | |
| 40 =head1 FEEDBACK | |
| 41 | |
| 42 =head2 Mailing Lists | |
| 43 | |
| 44 User feedback is an integral part of the evolution of this and other | |
| 45 Bioperl modules. Send your comments and suggestions preferably to | |
| 46 the Bioperl mailing list. Your participation is much appreciated. | |
| 47 | |
| 48 bioperl-l@bioperl.org - General discussion | |
| 49 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 50 | |
| 51 =head2 Reporting Bugs | |
| 52 | |
| 53 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 54 of the bugs and their resolution. Bug reports can be submitted via | |
| 55 the web: | |
| 56 | |
| 57 http://bugzilla.bioperl.org/ | |
| 58 | |
| 59 | |
| 60 =head1 AUTHOR - Jason Stajich | |
| 61 | |
| 62 Email jason@bioperl.org | |
| 63 | |
| 64 =head1 CONTRIBUTORS | |
| 65 | |
| 66 Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt> | |
| 67 | |
| 68 =head1 APPENDIX | |
| 69 | |
| 70 The rest of the documentation details each of the object methods. | |
| 71 Internal methods are usually preceded with a _ | |
| 72 | |
| 73 =cut | |
| 74 | |
| 75 | |
| 76 # Let the code begin... | |
| 77 | |
| 78 | |
| 79 package Bio::Tree::RandomFactory; | |
| 80 use vars qw(@ISA $PRECISION_DIGITS); | |
| 81 use strict; | |
| 82 | |
| 83 BEGIN { | |
| 84 $PRECISION_DIGITS = 3; # Precision for the branchlength | |
| 85 } | |
| 86 | |
| 87 use Bio::Factory::TreeFactoryI; | |
| 88 use Bio::Root::Root; | |
| 89 use Bio::TreeIO::TreeEventBuilder; | |
| 90 use Bio::Tree::AlleleNode; | |
| 91 | |
| 92 @ISA = qw(Bio::Root::Root Bio::Factory::TreeFactoryI ); | |
| 93 | |
| 94 =head2 new | |
| 95 | |
| 96 Title : new | |
| 97 Usage : my $factory = new Bio::Tree::RandomFactory(-samples => \@samples, | |
| 98 -maxcount=> $N); | |
| 99 Function: Initializes a Bio::Tree::RandomFactory object | |
| 100 Returns : Bio::Tree::RandomFactory | |
| 101 Args : | |
| 102 | |
| 103 | |
| 104 =cut | |
| 105 | |
| 106 sub new{ | |
| 107 my ($class,@args) = @_; | |
| 108 my $self = $class->SUPER::new(@args); | |
| 109 | |
| 110 $self->{'_eventbuilder'} = new Bio::TreeIO::TreeEventBuilder(); | |
| 111 $self->{'_treecounter'} = 0; | |
| 112 $self->{'_maxcount'} = 0; | |
| 113 my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT | |
| 114 SAMPLES | |
| 115 SAMPLE_SIZE)], | |
| 116 @args); | |
| 117 my @samples; | |
| 118 | |
| 119 if( ! defined $samps ) { | |
| 120 if( ! defined $samplesize || $samplesize <= 0 ) { | |
| 121 $self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified"); | |
| 122 } | |
| 123 foreach ( 1..$samplesize ) { push @samples, "Samp$_"; } | |
| 124 } else { | |
| 125 if( ref($samps) =~ /ARRAY/i ) { | |
| 126 $self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?"); | |
| 127 } | |
| 128 @samples = @$samps; | |
| 129 } | |
| 130 | |
| 131 $self->samples(\@samples); | |
| 132 $self->sample_size(scalar @samples); | |
| 133 if( defined $maxcount ) { | |
| 134 $self->maxcount($maxcount); | |
| 135 } | |
| 136 return $self; | |
| 137 } | |
| 138 | |
| 139 =head2 next_tree | |
| 140 | |
| 141 Title : next_tree | |
| 142 Usage : my $tree = $factory->next_tree | |
| 143 Function: Returns a random tree based on the initialized number of nodes | |
| 144 NOTE: if maxcount is not specified on initialization or | |
| 145 set to a valid integer, subsequent calls to next_tree will | |
| 146 continue to return random trees and never return undef | |
| 147 | |
| 148 Returns : Bio::Tree::TreeI object | |
| 149 Args : none | |
| 150 | |
| 151 =cut | |
| 152 | |
| 153 sub next_tree{ | |
| 154 my ($self) = @_; | |
| 155 # If maxcount is set to something non-zero then next tree will | |
| 156 # continue to return valid trees until maxcount is reached | |
| 157 # otherwise will always return trees | |
| 158 return undef if( $self->maxcount && | |
| 159 $self->{'_treecounter'}++ >= $self->maxcount ); | |
| 160 my $size = $self->sample_size; | |
| 161 | |
| 162 my $in; | |
| 163 my @tree = (); | |
| 164 my @list = (); | |
| 165 | |
| 166 for($in=0;$in < 2*$size -1; $in++ ) { | |
| 167 push @tree, { 'nodenum' => "Node$in" }; | |
| 168 } | |
| 169 # in C we would have 2 arrays | |
| 170 # an array of nodes (tree) | |
| 171 # and array of pointers to these nodes (list) | |
| 172 # and we just shuffle the list items to do the | |
| 173 # tree topology generation | |
| 174 # instead in perl, we will have a list of hashes (nodes) called @tree | |
| 175 # and a list of integers representing the indexes in tree called @list | |
| 176 | |
| 177 for($in=0;$in < $size;$in++) { | |
| 178 $tree[$in]->{'time'} = 0; | |
| 179 $tree[$in]->{'desc1'} = undef; | |
| 180 $tree[$in]->{'desc2'} = undef; | |
| 181 push @list, $in; | |
| 182 } | |
| 183 | |
| 184 my $t=0; | |
| 185 # generate times for the nodes | |
| 186 for($in = $size; $in > 1; $in-- ) { | |
| 187 $t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) ); | |
| 188 $tree[2 * $size - $in]->{'time'} =$t; | |
| 189 } | |
| 190 # topology generation | |
| 191 for ($in = $size; $in > 1; $in-- ) { | |
| 192 my $pick = int $self->random($in); | |
| 193 my $nodeindex = $list[$pick]; | |
| 194 my $swap = 2 * $size - $in; | |
| 195 $tree[$swap]->{'desc1'} = $nodeindex; | |
| 196 $list[$pick] = $list[$in-1]; | |
| 197 $pick = int rand($in - 1); | |
| 198 $nodeindex = $list[$pick]; | |
| 199 $tree[$swap]->{'desc2'} = $nodeindex; | |
| 200 $list[$pick] = $swap; | |
| 201 } | |
| 202 # Let's convert the hashes into nodes | |
| 203 | |
| 204 my @nodes = (); | |
| 205 foreach my $n ( @tree ) { | |
| 206 push @nodes, | |
| 207 new Bio::Tree::AlleleNode(-id => $n->{'nodenum'}, | |
| 208 -branch_length => $n->{'time'}); | |
| 209 } | |
| 210 my $ct = 0; | |
| 211 foreach my $node ( @nodes ) { | |
| 212 my $n = $tree[$ct++]; | |
| 213 if( defined $n->{'desc1'} ) { | |
| 214 $node->add_Descendent($nodes[$n->{'desc1'}]); | |
| 215 } | |
| 216 if( defined $n->{'desc2'} ) { | |
| 217 $node->add_Descendent($nodes[$n->{'desc2'}]); | |
| 218 } | |
| 219 } | |
| 220 my $T = new Bio::Tree::Tree(-root => pop @nodes ); | |
| 221 return $T; | |
| 222 } | |
| 223 | |
| 224 =head2 add_Mutations | |
| 225 | |
| 226 Title : add_Mutations | |
| 227 Usage : $factory->add_Mutations($tree, $mutcount); | |
| 228 Function: Adds mutations to a tree via a random process weighted by | |
| 229 branch length (it is a poisson distribution | |
| 230 as part of a coalescent process) | |
| 231 Returns : none | |
| 232 Args : $tree - Bio::Tree::TreeI | |
| 233 $nummut - number of mutations | |
| 234 | |
| 235 | |
| 236 =cut | |
| 237 | |
| 238 sub add_Mutations{ | |
| 239 my ($self,$tree, $nummut) = @_; | |
| 240 my @branches; | |
| 241 my @lens; | |
| 242 my $branchlen = 0; | |
| 243 my $last = 0; | |
| 244 my @nodes = $tree->get_nodes(); | |
| 245 my $precision = 10**$PRECISION_DIGITS; | |
| 246 my $i = 0; | |
| 247 | |
| 248 # Jason's somewhat simplistics way of doing a poission | |
| 249 # distribution for a fixed number of mutations | |
| 250 # build an array and put the node number in a slot | |
| 251 # representing the branch to put a mutation on | |
| 252 # but weight the number of slots per branch by the | |
| 253 # length of the branch ( ancestor's time - node time) | |
| 254 | |
| 255 foreach my $node ( @nodes ) { | |
| 256 if( $node->ancestor ) { | |
| 257 my $len = int ( ($node->ancestor->branch_length - | |
| 258 $node->branch_length) * $precision); | |
| 259 if ( $len > 0 ) { | |
| 260 for( my $j =0;$j < $len;$j++) { | |
| 261 push @branches, $i; | |
| 262 } | |
| 263 $last += $len; | |
| 264 } | |
| 265 $branchlen += $len; | |
| 266 } | |
| 267 if( ! $node->isa('Bio::Tree::AlleleNode') ) { | |
| 268 bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node | |
| 269 } | |
| 270 $node->purge_markers; | |
| 271 $i++; | |
| 272 } | |
| 273 # sanity check | |
| 274 die("branch len is $branchlen arraylen is $last") | |
| 275 unless ( $branchlen == $last ); | |
| 276 | |
| 277 for( my $j = 0; $j < $nummut; $j++) { | |
| 278 my $index = int(rand($branchlen)); | |
| 279 my $branch = $branches[$index]; | |
| 280 $nodes[$branch]->add_alleles("Mutation$j", [1]); | |
| 281 } | |
| 282 } | |
| 283 | |
| 284 =head2 maxcount | |
| 285 | |
| 286 Title : maxcount | |
| 287 Usage : $obj->maxcount($newval) | |
| 288 Function: | |
| 289 Example : | |
| 290 Returns : value of maxcount | |
| 291 Args : newvalue (optional) | |
| 292 | |
| 293 | |
| 294 =cut | |
| 295 | |
| 296 sub maxcount{ | |
| 297 my ($self,$value) = @_; | |
| 298 if( defined $value) { | |
| 299 if( $value =~ /^(\d+)/ ) { | |
| 300 $self->{'maxcount'} = $1; | |
| 301 } else { | |
| 302 $self->warn("Must specify a valid Positive integer to maxcount"); | |
| 303 $self->{'maxcount'} = 0; | |
| 304 } | |
| 305 } | |
| 306 return $self->{'_maxcount'}; | |
| 307 } | |
| 308 | |
| 309 =head2 samples | |
| 310 | |
| 311 Title : samples | |
| 312 Usage : $obj->samples($newval) | |
| 313 Function: | |
| 314 Example : | |
| 315 Returns : value of samples | |
| 316 Args : newvalue (optional) | |
| 317 | |
| 318 | |
| 319 =cut | |
| 320 | |
| 321 sub samples{ | |
| 322 my ($self,$value) = @_; | |
| 323 if( defined $value) { | |
| 324 if( ref($value) !~ /ARRAY/i ) { | |
| 325 $self->warn("Must specify a valid array ref to the method 'samples'"); | |
| 326 $value = []; | |
| 327 } | |
| 328 $self->{'samples'} = $value; | |
| 329 } | |
| 330 return $self->{'samples'}; | |
| 331 | |
| 332 } | |
| 333 | |
| 334 =head2 sample_size | |
| 335 | |
| 336 Title : sample_size | |
| 337 Usage : $obj->sample_size($newval) | |
| 338 Function: | |
| 339 Example : | |
| 340 Returns : value of sample_size | |
| 341 Args : newvalue (optional) | |
| 342 | |
| 343 | |
| 344 =cut | |
| 345 | |
| 346 sub sample_size{ | |
| 347 my ($self,$value) = @_; | |
| 348 if( defined $value) { | |
| 349 $self->{'sample_size'} = $value; | |
| 350 } | |
| 351 return $self->{'sample_size'}; | |
| 352 | |
| 353 } | |
| 354 | |
| 355 =head2 attach_EventHandler | |
| 356 | |
| 357 Title : attach_EventHandler | |
| 358 Usage : $parser->attatch_EventHandler($handler) | |
| 359 Function: Adds an event handler to listen for events | |
| 360 Returns : none | |
| 361 Args : Bio::Event::EventHandlerI | |
| 362 | |
| 363 =cut | |
| 364 | |
| 365 sub attach_EventHandler{ | |
| 366 my ($self,$handler) = @_; | |
| 367 return if( ! $handler ); | |
| 368 if( ! $handler->isa('Bio::Event::EventHandlerI') ) { | |
| 369 $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI'); | |
| 370 } | |
| 371 $self->{'_handler'} = $handler; | |
| 372 return; | |
| 373 } | |
| 374 | |
| 375 =head2 _eventHandler | |
| 376 | |
| 377 Title : _eventHandler | |
| 378 Usage : private | |
| 379 Function: Get the EventHandler | |
| 380 Returns : Bio::Event::EventHandlerI | |
| 381 Args : none | |
| 382 | |
| 383 | |
| 384 =cut | |
| 385 | |
| 386 sub _eventHandler{ | |
| 387 my ($self) = @_; | |
| 388 return $self->{'_handler'}; | |
| 389 } | |
| 390 | |
| 391 =head2 random | |
| 392 | |
| 393 Title : random | |
| 394 Usage : my $rfloat = $node->random($size) | |
| 395 Function: Generates a random number between 0 and $size | |
| 396 This is abstracted so that someone can override and provide their | |
| 397 own special RNG. This is expected to be a uniform RNG. | |
| 398 Returns : Floating point random | |
| 399 Args : $maximum size for random number (defaults to 1) | |
| 400 | |
| 401 | |
| 402 =cut | |
| 403 | |
| 404 sub random{ | |
| 405 my ($self,$max) = @_; | |
| 406 return rand($max); | |
| 407 } | |
| 408 | |
| 409 1; |
