comparison variant_effect_predictor/Bio/SearchDist.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: SearchDist.pm,v 1.16 2002/10/22 07:38:24 lapp Exp $
2
3 #
4 # BioPerl module for Bio::SearchDist
5 #
6 # Cared for by Ewan Birney <birney@sanger.ac.uk>
7 #
8 # Copyright Ewan Birney
9 #
10 # You may distribute this module under the same terms as perl itself
11
12 # POD documentation - main docs before the code
13
14 =head1 NAME
15
16 Bio::SearchDist - A perl wrapper around Sean Eddy's histogram object
17
18 =head1 SYNOPSIS
19
20 $dis = Bio::SearchDist->new();
21 foreach $score ( @scores ) {
22 $dis->add_score($score);
23 }
24
25 if( $dis->fit_evd() ) {
26 foreach $score ( @scores ) {
27 $evalue = $dis->evalue($score);
28 print "Score $score had an evalue of $evalue\n";
29 }
30 } else {
31 warn("Could not fit histogram to an EVD!");
32 }
33
34 =head1 DESCRIPTION
35
36 The Bio::SearchDist object is a wrapper around Sean Eddy's excellent
37 histogram object. The histogram object can bascially take in a number
38 of scores which are sensibly distributed somewhere around 0 that come
39 from a supposed Extreme Value Distribution. Having add all the scores
40 from a database search via the add_score method you can then fit a
41 extreme value distribution using fit_evd(). Once fitted you can then
42 get out the evalue for each score (or a new score) using
43 evalue($score).
44
45 The fitting procedure is better described in Sean Eddy's own code
46 (available from http://hmmer.wustl.edu, or in the histogram.h header
47 file in Compile/SW). Bascially it fits a EVD via a maximum likelhood
48 method with pruning of the top end of the distribution so that real
49 positives are discarded in the fitting procedure. This comes from
50 an orginally idea of Richard Mott's and the likelhood fitting
51 is from a book by Lawless [should ref here].
52
53
54 The object relies on the fact that the scores are sensibly distributed
55 around about 0 and that integer bins are sensible for the
56 histogram. Scores based on bits are often ideal for this (bits based
57 scoring mechanisms is what this histogram object was originally
58 designed for).
59
60
61 =head1 CONTACT
62
63 The original code this was based on comes from the histogram module as
64 part of the HMMer2 package. Look at http://hmmer.wustl.edu/
65
66 Its use in Bioperl is via the Compiled XS extension which is cared for
67 by Ewan Birney (birney@sanger.ac.uk). Please contact Ewan first about
68 the use of this module
69
70 =head1 FEEDBACK
71
72 =head2 Mailing Lists
73
74 User feedback is an integral part of the evolution of this and other
75 Bioperl modules. Send your comments and suggestions preferably to one
76 of the Bioperl mailing lists. Your participation is much appreciated.
77
78 bioperl-l@bioperl.org - General discussion
79 http://bio.perl.org/MailList.html - About the mailing lists
80
81 =head2 Reporting Bugs
82
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via email
85 or the web:
86
87 bioperl-bugs@bio.perl.org
88 http://bugzilla.bioperl.org/
89
90 =head1 APPENDIX
91
92 The rest of the documentation details each of the object
93 methods. Internal methods are usually preceded with a _
94
95 =cut
96
97
98 # Let the code begin...
99
100
101 package Bio::SearchDist;
102 use vars qw(@ISA);
103 use strict;
104
105 use Bio::Root::Root;
106
107 BEGIN {
108 eval {
109 require Bio::Ext::Align;
110 };
111 if ( $@ ) {
112 print $@;
113 print STDERR ("\nThe C-compiled engine for histogram object (Bio::Ext::Align) has not been installed.\n Please install the bioperl-ext package\n\n");
114 exit(1);
115 }
116 }
117
118
119 @ISA = qw(Bio::Root::Root);
120
121 sub new {
122 my($class,@args) = @_;
123 my $self = $class->SUPER::new(@args);
124 my($min, $max, $lump) =
125 $self->_rearrange([qw(MIN MAX LUMP)], @args);
126
127 if( ! $min ) {
128 $min = -100;
129 }
130
131 if( ! $max ) {
132 $max = +100;
133 }
134
135 if( ! $lump ) {
136 $lump = 50;
137 }
138
139 $self->_engine(&Bio::Ext::Align::new_Histogram($min,$max,$lump));
140
141 return $self;
142 }
143
144 =head2 add_score
145
146 Title : add_score
147 Usage : $dis->add_score(300);
148 Function: Adds a single score to the distribution
149 Returns : nothing
150 Args :
151
152
153 =cut
154
155 sub add_score{
156 my ($self,$score) = @_;
157 my ($eng);
158 $eng = $self->_engine();
159 #$eng->AddToHistogram($score);
160 $eng->add($score);
161 }
162
163 =head2 fit_evd
164
165 Title : fit_evd
166 Usage : $dis->fit_evd();
167 Function: fits an evd to the current distribution
168 Returns : 1 if it fits successfully, 0 if not
169 Args :
170
171
172 =cut
173
174 sub fit_evd{
175 my ($self,@args) = @_;
176
177 return $self->_engine()->fit_EVD(10000,1);
178 }
179
180 =head2 fit_Gaussian
181
182 Title : fit_Gaussian
183 Usage :
184 Function:
185 Example :
186 Returns :
187 Args :
188
189
190 =cut
191
192 sub fit_Gaussian{
193 my ($self,$high) = @_;
194
195 if( ! defined $high ) {
196 $high = 10000;
197 }
198
199 return $self->_engine()->fit_Gaussian($high);
200 }
201
202
203 =head2 evalue
204
205 Title : evalue
206 Usage : $eval = $dis->evalue($score)
207 Function: Returns the evalue of this score
208 Returns : float
209 Args :
210
211
212 =cut
213
214 sub evalue{
215 my ($self,$score) = @_;
216
217 return $self->_engine()->evalue($score);
218
219 }
220
221
222
223 =head2 _engine
224
225 Title : _engine
226 Usage : $obj->_engine($newval)
227 Function: underlyine bp_sw:: histogram engine
228 Returns : value of _engine
229 Args : newvalue (optional)
230
231
232 =cut
233
234 sub _engine{
235 my ($self,$value) = @_;
236 if( defined $value) {
237 $self->{'_engine'} = $value;
238 }
239 return $self->{'_engine'};
240 }
241
242
243 ## End of Package
244
245 1;
246 __END__
247