0
|
1 # $Id: Statistics.pm,v 1.6 2002/12/24 17:52:03 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Tree::Statistics
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tree::Statistics - Calculate certain statistics for a Tree
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 Give standard usage here
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 This object is a place to accumulate routines for calculating various
|
|
24 tree statistics from population genetic and phylogenetic methods.
|
|
25
|
|
26 Currently Fu and Li's D is implemented.
|
|
27 Tajima's D planned.
|
|
28
|
|
29 References forthcoming.
|
|
30
|
|
31 =head1 FEEDBACK
|
|
32
|
|
33 =head2 Mailing Lists
|
|
34
|
|
35 User feedback is an integral part of the evolution of this and other
|
|
36 Bioperl modules. Send your comments and suggestions preferably to
|
|
37 the Bioperl mailing list. Your participation is much appreciated.
|
|
38
|
|
39 bioperl-l@bioperl.org - General discussion
|
|
40 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
41
|
|
42 =head2 Reporting Bugs
|
|
43
|
|
44 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
45 of the bugs and their resolution. Bug reports can be submitted via
|
|
46 the web:
|
|
47
|
|
48 http://bugzilla.bioperl.org/
|
|
49
|
|
50 =head1 AUTHOR - Aaron Mackey
|
|
51
|
|
52 Email jason@bioperl.org
|
|
53
|
|
54 =head1 CONTRIBUTORS
|
|
55
|
|
56 Matt Hahn E<lt>matthew.hahn@duke.dukeE<gt>
|
|
57
|
|
58 =head1 APPENDIX
|
|
59
|
|
60 The rest of the documentation details each of the object methods.
|
|
61 Internal methods are usually preceded with a _
|
|
62
|
|
63 =cut
|
|
64
|
|
65
|
|
66 # Let the code begin...
|
|
67
|
|
68
|
|
69 package Bio::Tree::Statistics;
|
|
70 use vars qw(@ISA);
|
|
71 use strict;
|
|
72
|
|
73 # Object preamble - inherits from Bio::Root::Root
|
|
74
|
|
75 use Bio::Root::Root;
|
|
76
|
|
77 @ISA = qw(Bio::Root::Root);
|
|
78
|
|
79 =head2 new
|
|
80
|
|
81 Title : new
|
|
82 Usage : my $obj = new Bio::Tree::Statistics();
|
|
83 Function: Builds a new Bio::Tree::Statistics object
|
|
84 Returns : Bio::Tree::Statistics
|
|
85 Args :
|
|
86
|
|
87
|
|
88 =cut
|
|
89
|
|
90 =head2 fu_and_li_D
|
|
91
|
|
92 Title : fu_and_li_D
|
|
93 Usage : my $D = $statistics->fu_an_li_D($tree,$nummut);
|
|
94 Function:
|
|
95 For this we assume that the tree is made up of
|
|
96 Bio::Tree::AlleleNode's which contain markers and alleles
|
|
97 each marker is a 'mutation'
|
|
98 Returns : Fu and Li's D statistic for this Tree
|
|
99 Args : $tree - Bio::Tree::TreeI which contains Bio::Tree::AlleleNodes
|
|
100
|
|
101 =cut
|
|
102
|
|
103 sub fu_and_li_D{
|
|
104 my ($self,$tree) = @_;
|
|
105
|
|
106 # for this we assume that the tree is made up of
|
|
107 # allele nodes which contain markers and alleles
|
|
108 # each marker is a 'mutation'
|
|
109 my @nodes = $tree->get_nodes();
|
|
110 my $muttotal =0;
|
|
111 my $tipmutcount = 0;
|
|
112 my $sampsize = 0;
|
|
113 foreach my $n ( @nodes ) {
|
|
114 if ($n->is_Leaf() ) {
|
|
115 $sampsize++;
|
|
116 $tipmutcount += $n->get_marker_names();
|
|
117 }
|
|
118 $muttotal += $n->get_marker_names();
|
|
119 }
|
|
120
|
|
121 if( $muttotal <= 0 ) {
|
|
122 $self->warn("mutation total was not > 0, cannot calculate a Fu and Li D");
|
|
123 return 0;
|
|
124 }
|
|
125 my $a = 0;
|
|
126 for(my $k= 1; $k < $sampsize; $k++ ) {
|
|
127 $a += ( 1 / $k );
|
|
128 }
|
|
129
|
|
130 my $b = 0;
|
|
131 for(my $k= 1; $k < $sampsize; $k++ ) {
|
|
132 $b += ( 1 / $k**2 );
|
|
133 }
|
|
134
|
|
135 my $c = 2 * ( ( ( $sampsize * $a ) - (2 * ( $sampsize -1 ))) /
|
|
136 ( ( $sampsize - 1) * ( $sampsize - 2 ) ) );
|
|
137
|
|
138 my $v = 1 + ( ( $a**2 / ( $b + $a**2 ) ) * ( $c - ( ( $sampsize + 1) /
|
|
139 ( $sampsize - 1) ) ));
|
|
140
|
|
141 my $u = $a - 1 - $v;
|
|
142 my $D = ( $muttotal - ( $a * $tipmutcount) ) /
|
|
143 ( sqrt ( ($u * $muttotal) + ( $v * $muttotal**2) ) );
|
|
144
|
|
145 return $D;
|
|
146 }
|
|
147
|
|
148
|
|
149 1;
|