Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tree/Statistics.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Statistics.pm,v 1.6 2002/12/24 17:52:03 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Tree::Statistics | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Tree::Statistics - Calculate certain statistics for a Tree | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 Give standard usage here | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This object is a place to accumulate routines for calculating various | |
| 24 tree statistics from population genetic and phylogenetic methods. | |
| 25 | |
| 26 Currently Fu and Li's D is implemented. | |
| 27 Tajima's D planned. | |
| 28 | |
| 29 References forthcoming. | |
| 30 | |
| 31 =head1 FEEDBACK | |
| 32 | |
| 33 =head2 Mailing Lists | |
| 34 | |
| 35 User feedback is an integral part of the evolution of this and other | |
| 36 Bioperl modules. Send your comments and suggestions preferably to | |
| 37 the Bioperl mailing list. Your participation is much appreciated. | |
| 38 | |
| 39 bioperl-l@bioperl.org - General discussion | |
| 40 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 41 | |
| 42 =head2 Reporting Bugs | |
| 43 | |
| 44 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 45 of the bugs and their resolution. Bug reports can be submitted via | |
| 46 the web: | |
| 47 | |
| 48 http://bugzilla.bioperl.org/ | |
| 49 | |
| 50 =head1 AUTHOR - Aaron Mackey | |
| 51 | |
| 52 Email jason@bioperl.org | |
| 53 | |
| 54 =head1 CONTRIBUTORS | |
| 55 | |
| 56 Matt Hahn E<lt>matthew.hahn@duke.dukeE<gt> | |
| 57 | |
| 58 =head1 APPENDIX | |
| 59 | |
| 60 The rest of the documentation details each of the object methods. | |
| 61 Internal methods are usually preceded with a _ | |
| 62 | |
| 63 =cut | |
| 64 | |
| 65 | |
| 66 # Let the code begin... | |
| 67 | |
| 68 | |
| 69 package Bio::Tree::Statistics; | |
| 70 use vars qw(@ISA); | |
| 71 use strict; | |
| 72 | |
| 73 # Object preamble - inherits from Bio::Root::Root | |
| 74 | |
| 75 use Bio::Root::Root; | |
| 76 | |
| 77 @ISA = qw(Bio::Root::Root); | |
| 78 | |
| 79 =head2 new | |
| 80 | |
| 81 Title : new | |
| 82 Usage : my $obj = new Bio::Tree::Statistics(); | |
| 83 Function: Builds a new Bio::Tree::Statistics object | |
| 84 Returns : Bio::Tree::Statistics | |
| 85 Args : | |
| 86 | |
| 87 | |
| 88 =cut | |
| 89 | |
| 90 =head2 fu_and_li_D | |
| 91 | |
| 92 Title : fu_and_li_D | |
| 93 Usage : my $D = $statistics->fu_an_li_D($tree,$nummut); | |
| 94 Function: | |
| 95 For this we assume that the tree is made up of | |
| 96 Bio::Tree::AlleleNode's which contain markers and alleles | |
| 97 each marker is a 'mutation' | |
| 98 Returns : Fu and Li's D statistic for this Tree | |
| 99 Args : $tree - Bio::Tree::TreeI which contains Bio::Tree::AlleleNodes | |
| 100 | |
| 101 =cut | |
| 102 | |
| 103 sub fu_and_li_D{ | |
| 104 my ($self,$tree) = @_; | |
| 105 | |
| 106 # for this we assume that the tree is made up of | |
| 107 # allele nodes which contain markers and alleles | |
| 108 # each marker is a 'mutation' | |
| 109 my @nodes = $tree->get_nodes(); | |
| 110 my $muttotal =0; | |
| 111 my $tipmutcount = 0; | |
| 112 my $sampsize = 0; | |
| 113 foreach my $n ( @nodes ) { | |
| 114 if ($n->is_Leaf() ) { | |
| 115 $sampsize++; | |
| 116 $tipmutcount += $n->get_marker_names(); | |
| 117 } | |
| 118 $muttotal += $n->get_marker_names(); | |
| 119 } | |
| 120 | |
| 121 if( $muttotal <= 0 ) { | |
| 122 $self->warn("mutation total was not > 0, cannot calculate a Fu and Li D"); | |
| 123 return 0; | |
| 124 } | |
| 125 my $a = 0; | |
| 126 for(my $k= 1; $k < $sampsize; $k++ ) { | |
| 127 $a += ( 1 / $k ); | |
| 128 } | |
| 129 | |
| 130 my $b = 0; | |
| 131 for(my $k= 1; $k < $sampsize; $k++ ) { | |
| 132 $b += ( 1 / $k**2 ); | |
| 133 } | |
| 134 | |
| 135 my $c = 2 * ( ( ( $sampsize * $a ) - (2 * ( $sampsize -1 ))) / | |
| 136 ( ( $sampsize - 1) * ( $sampsize - 2 ) ) ); | |
| 137 | |
| 138 my $v = 1 + ( ( $a**2 / ( $b + $a**2 ) ) * ( $c - ( ( $sampsize + 1) / | |
| 139 ( $sampsize - 1) ) )); | |
| 140 | |
| 141 my $u = $a - 1 - $v; | |
| 142 my $D = ( $muttotal - ( $a * $tipmutcount) ) / | |
| 143 ( sqrt ( ($u * $muttotal) + ( $v * $muttotal**2) ) ); | |
| 144 | |
| 145 return $D; | |
| 146 } | |
| 147 | |
| 148 | |
| 149 1; |
