0
|
1 # $Id: SearchDist.pm,v 1.16 2002/10/22 07:38:24 lapp Exp $
|
|
2
|
|
3 #
|
|
4 # BioPerl module for Bio::SearchDist
|
|
5 #
|
|
6 # Cared for by Ewan Birney <birney@sanger.ac.uk>
|
|
7 #
|
|
8 # Copyright Ewan Birney
|
|
9 #
|
|
10 # You may distribute this module under the same terms as perl itself
|
|
11
|
|
12 # POD documentation - main docs before the code
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 Bio::SearchDist - A perl wrapper around Sean Eddy's histogram object
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 $dis = Bio::SearchDist->new();
|
|
21 foreach $score ( @scores ) {
|
|
22 $dis->add_score($score);
|
|
23 }
|
|
24
|
|
25 if( $dis->fit_evd() ) {
|
|
26 foreach $score ( @scores ) {
|
|
27 $evalue = $dis->evalue($score);
|
|
28 print "Score $score had an evalue of $evalue\n";
|
|
29 }
|
|
30 } else {
|
|
31 warn("Could not fit histogram to an EVD!");
|
|
32 }
|
|
33
|
|
34 =head1 DESCRIPTION
|
|
35
|
|
36 The Bio::SearchDist object is a wrapper around Sean Eddy's excellent
|
|
37 histogram object. The histogram object can bascially take in a number
|
|
38 of scores which are sensibly distributed somewhere around 0 that come
|
|
39 from a supposed Extreme Value Distribution. Having add all the scores
|
|
40 from a database search via the add_score method you can then fit a
|
|
41 extreme value distribution using fit_evd(). Once fitted you can then
|
|
42 get out the evalue for each score (or a new score) using
|
|
43 evalue($score).
|
|
44
|
|
45 The fitting procedure is better described in Sean Eddy's own code
|
|
46 (available from http://hmmer.wustl.edu, or in the histogram.h header
|
|
47 file in Compile/SW). Bascially it fits a EVD via a maximum likelhood
|
|
48 method with pruning of the top end of the distribution so that real
|
|
49 positives are discarded in the fitting procedure. This comes from
|
|
50 an orginally idea of Richard Mott's and the likelhood fitting
|
|
51 is from a book by Lawless [should ref here].
|
|
52
|
|
53
|
|
54 The object relies on the fact that the scores are sensibly distributed
|
|
55 around about 0 and that integer bins are sensible for the
|
|
56 histogram. Scores based on bits are often ideal for this (bits based
|
|
57 scoring mechanisms is what this histogram object was originally
|
|
58 designed for).
|
|
59
|
|
60
|
|
61 =head1 CONTACT
|
|
62
|
|
63 The original code this was based on comes from the histogram module as
|
|
64 part of the HMMer2 package. Look at http://hmmer.wustl.edu/
|
|
65
|
|
66 Its use in Bioperl is via the Compiled XS extension which is cared for
|
|
67 by Ewan Birney (birney@sanger.ac.uk). Please contact Ewan first about
|
|
68 the use of this module
|
|
69
|
|
70 =head1 FEEDBACK
|
|
71
|
|
72 =head2 Mailing Lists
|
|
73
|
|
74 User feedback is an integral part of the evolution of this and other
|
|
75 Bioperl modules. Send your comments and suggestions preferably to one
|
|
76 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
77
|
|
78 bioperl-l@bioperl.org - General discussion
|
|
79 http://bio.perl.org/MailList.html - About the mailing lists
|
|
80
|
|
81 =head2 Reporting Bugs
|
|
82
|
|
83 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
84 the bugs and their resolution. Bug reports can be submitted via email
|
|
85 or the web:
|
|
86
|
|
87 bioperl-bugs@bio.perl.org
|
|
88 http://bugzilla.bioperl.org/
|
|
89
|
|
90 =head1 APPENDIX
|
|
91
|
|
92 The rest of the documentation details each of the object
|
|
93 methods. Internal methods are usually preceded with a _
|
|
94
|
|
95 =cut
|
|
96
|
|
97
|
|
98 # Let the code begin...
|
|
99
|
|
100
|
|
101 package Bio::SearchDist;
|
|
102 use vars qw(@ISA);
|
|
103 use strict;
|
|
104
|
|
105 use Bio::Root::Root;
|
|
106
|
|
107 BEGIN {
|
|
108 eval {
|
|
109 require Bio::Ext::Align;
|
|
110 };
|
|
111 if ( $@ ) {
|
|
112 print $@;
|
|
113 print STDERR ("\nThe C-compiled engine for histogram object (Bio::Ext::Align) has not been installed.\n Please install the bioperl-ext package\n\n");
|
|
114 exit(1);
|
|
115 }
|
|
116 }
|
|
117
|
|
118
|
|
119 @ISA = qw(Bio::Root::Root);
|
|
120
|
|
121 sub new {
|
|
122 my($class,@args) = @_;
|
|
123 my $self = $class->SUPER::new(@args);
|
|
124 my($min, $max, $lump) =
|
|
125 $self->_rearrange([qw(MIN MAX LUMP)], @args);
|
|
126
|
|
127 if( ! $min ) {
|
|
128 $min = -100;
|
|
129 }
|
|
130
|
|
131 if( ! $max ) {
|
|
132 $max = +100;
|
|
133 }
|
|
134
|
|
135 if( ! $lump ) {
|
|
136 $lump = 50;
|
|
137 }
|
|
138
|
|
139 $self->_engine(&Bio::Ext::Align::new_Histogram($min,$max,$lump));
|
|
140
|
|
141 return $self;
|
|
142 }
|
|
143
|
|
144 =head2 add_score
|
|
145
|
|
146 Title : add_score
|
|
147 Usage : $dis->add_score(300);
|
|
148 Function: Adds a single score to the distribution
|
|
149 Returns : nothing
|
|
150 Args :
|
|
151
|
|
152
|
|
153 =cut
|
|
154
|
|
155 sub add_score{
|
|
156 my ($self,$score) = @_;
|
|
157 my ($eng);
|
|
158 $eng = $self->_engine();
|
|
159 #$eng->AddToHistogram($score);
|
|
160 $eng->add($score);
|
|
161 }
|
|
162
|
|
163 =head2 fit_evd
|
|
164
|
|
165 Title : fit_evd
|
|
166 Usage : $dis->fit_evd();
|
|
167 Function: fits an evd to the current distribution
|
|
168 Returns : 1 if it fits successfully, 0 if not
|
|
169 Args :
|
|
170
|
|
171
|
|
172 =cut
|
|
173
|
|
174 sub fit_evd{
|
|
175 my ($self,@args) = @_;
|
|
176
|
|
177 return $self->_engine()->fit_EVD(10000,1);
|
|
178 }
|
|
179
|
|
180 =head2 fit_Gaussian
|
|
181
|
|
182 Title : fit_Gaussian
|
|
183 Usage :
|
|
184 Function:
|
|
185 Example :
|
|
186 Returns :
|
|
187 Args :
|
|
188
|
|
189
|
|
190 =cut
|
|
191
|
|
192 sub fit_Gaussian{
|
|
193 my ($self,$high) = @_;
|
|
194
|
|
195 if( ! defined $high ) {
|
|
196 $high = 10000;
|
|
197 }
|
|
198
|
|
199 return $self->_engine()->fit_Gaussian($high);
|
|
200 }
|
|
201
|
|
202
|
|
203 =head2 evalue
|
|
204
|
|
205 Title : evalue
|
|
206 Usage : $eval = $dis->evalue($score)
|
|
207 Function: Returns the evalue of this score
|
|
208 Returns : float
|
|
209 Args :
|
|
210
|
|
211
|
|
212 =cut
|
|
213
|
|
214 sub evalue{
|
|
215 my ($self,$score) = @_;
|
|
216
|
|
217 return $self->_engine()->evalue($score);
|
|
218
|
|
219 }
|
|
220
|
|
221
|
|
222
|
|
223 =head2 _engine
|
|
224
|
|
225 Title : _engine
|
|
226 Usage : $obj->_engine($newval)
|
|
227 Function: underlyine bp_sw:: histogram engine
|
|
228 Returns : value of _engine
|
|
229 Args : newvalue (optional)
|
|
230
|
|
231
|
|
232 =cut
|
|
233
|
|
234 sub _engine{
|
|
235 my ($self,$value) = @_;
|
|
236 if( defined $value) {
|
|
237 $self->{'_engine'} = $value;
|
|
238 }
|
|
239 return $self->{'_engine'};
|
|
240 }
|
|
241
|
|
242
|
|
243 ## End of Package
|
|
244
|
|
245 1;
|
|
246 __END__
|
|
247
|