comparison variant_effect_predictor/Bio/Tree/Statistics.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: Statistics.pm,v 1.6 2002/12/24 17:52:03 jason Exp $
2 #
3 # BioPerl module for Bio::Tree::Statistics
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Tree::Statistics - Calculate certain statistics for a Tree
16
17 =head1 SYNOPSIS
18
19 Give standard usage here
20
21 =head1 DESCRIPTION
22
23 This object is a place to accumulate routines for calculating various
24 tree statistics from population genetic and phylogenetic methods.
25
26 Currently Fu and Li's D is implemented.
27 Tajima's D planned.
28
29 References forthcoming.
30
31 =head1 FEEDBACK
32
33 =head2 Mailing Lists
34
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to
37 the Bioperl mailing list. Your participation is much appreciated.
38
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/MailList.shtml - About the mailing lists
41
42 =head2 Reporting Bugs
43
44 Report bugs to the Bioperl bug tracking system to help us keep track
45 of the bugs and their resolution. Bug reports can be submitted via
46 the web:
47
48 http://bugzilla.bioperl.org/
49
50 =head1 AUTHOR - Aaron Mackey
51
52 Email jason@bioperl.org
53
54 =head1 CONTRIBUTORS
55
56 Matt Hahn E<lt>matthew.hahn@duke.dukeE<gt>
57
58 =head1 APPENDIX
59
60 The rest of the documentation details each of the object methods.
61 Internal methods are usually preceded with a _
62
63 =cut
64
65
66 # Let the code begin...
67
68
69 package Bio::Tree::Statistics;
70 use vars qw(@ISA);
71 use strict;
72
73 # Object preamble - inherits from Bio::Root::Root
74
75 use Bio::Root::Root;
76
77 @ISA = qw(Bio::Root::Root);
78
79 =head2 new
80
81 Title : new
82 Usage : my $obj = new Bio::Tree::Statistics();
83 Function: Builds a new Bio::Tree::Statistics object
84 Returns : Bio::Tree::Statistics
85 Args :
86
87
88 =cut
89
90 =head2 fu_and_li_D
91
92 Title : fu_and_li_D
93 Usage : my $D = $statistics->fu_an_li_D($tree,$nummut);
94 Function:
95 For this we assume that the tree is made up of
96 Bio::Tree::AlleleNode's which contain markers and alleles
97 each marker is a 'mutation'
98 Returns : Fu and Li's D statistic for this Tree
99 Args : $tree - Bio::Tree::TreeI which contains Bio::Tree::AlleleNodes
100
101 =cut
102
103 sub fu_and_li_D{
104 my ($self,$tree) = @_;
105
106 # for this we assume that the tree is made up of
107 # allele nodes which contain markers and alleles
108 # each marker is a 'mutation'
109 my @nodes = $tree->get_nodes();
110 my $muttotal =0;
111 my $tipmutcount = 0;
112 my $sampsize = 0;
113 foreach my $n ( @nodes ) {
114 if ($n->is_Leaf() ) {
115 $sampsize++;
116 $tipmutcount += $n->get_marker_names();
117 }
118 $muttotal += $n->get_marker_names();
119 }
120
121 if( $muttotal <= 0 ) {
122 $self->warn("mutation total was not > 0, cannot calculate a Fu and Li D");
123 return 0;
124 }
125 my $a = 0;
126 for(my $k= 1; $k < $sampsize; $k++ ) {
127 $a += ( 1 / $k );
128 }
129
130 my $b = 0;
131 for(my $k= 1; $k < $sampsize; $k++ ) {
132 $b += ( 1 / $k**2 );
133 }
134
135 my $c = 2 * ( ( ( $sampsize * $a ) - (2 * ( $sampsize -1 ))) /
136 ( ( $sampsize - 1) * ( $sampsize - 2 ) ) );
137
138 my $v = 1 + ( ( $a**2 / ( $b + $a**2 ) ) * ( $c - ( ( $sampsize + 1) /
139 ( $sampsize - 1) ) ));
140
141 my $u = $a - 1 - $v;
142 my $D = ( $muttotal - ( $a * $tipmutcount) ) /
143 ( sqrt ( ($u * $muttotal) + ( $v * $muttotal**2) ) );
144
145 return $D;
146 }
147
148
149 1;