Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/MZEF.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: MZEF.pm,v 1.9 2002/10/22 07:38:46 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Tools::MZEF | |
4 # | |
5 # Cared for by Hilmar Lapp <hlapp@gmx.net> | |
6 # | |
7 # Copyright Hilmar Lapp | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Tools::MZEF - Results of one MZEF run | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef'); | |
20 # filehandle: | |
21 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT ); | |
22 # to indicate that the sequence was reversed prior to feeding it to MZEF | |
23 # and that you want to have this reflected in the strand() attribute of | |
24 # the exons, as well have the coordinates translated to the non-reversed | |
25 # sequence | |
26 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef', | |
27 -strand => -1 ); | |
28 | |
29 # parse the results | |
30 # note: this class is-a Bio::Tools::AnalysisResult which implements | |
31 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same | |
32 while($gene = $mzef->next_prediction()) { | |
33 # $gene is an instance of Bio::Tools::Prediction::Gene | |
34 | |
35 # $gene->exons() returns an array of | |
36 # Bio::Tools::Prediction::Exon objects | |
37 # all exons: | |
38 @exon_arr = $gene->exons(); | |
39 | |
40 # internal exons only | |
41 @intrl_exons = $gene->exons('Internal'); | |
42 # note that presently MZEF predicts only internal exons! | |
43 } | |
44 | |
45 # essential if you gave a filename at initialization (otherwise the file | |
46 # will stay open) | |
47 $mzef->close(); | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 The MZEF module provides a parser for MZEF gene structure prediction | |
52 output. | |
53 | |
54 This module inherits off L<Bio::Tools::AnalysisResult> and therefore | |
55 implements L<Bio::SeqAnalysisParserI>. | |
56 | |
57 =head1 FEEDBACK | |
58 | |
59 =head2 Mailing Lists | |
60 | |
61 User feedback is an integral part of the evolution of this and other | |
62 Bioperl modules. Send your comments and suggestions preferably to one | |
63 of the Bioperl mailing lists. Your participation is much appreciated. | |
64 | |
65 bioperl-l@bioperl.org - General discussion | |
66 http://bio.perl.org/MailList.html - About the mailing lists | |
67 | |
68 =head2 Reporting Bugs | |
69 | |
70 Report bugs to the Bioperl bug tracking system to help us keep track | |
71 the bugs and their resolution. Bug reports can be submitted via email | |
72 or the web: | |
73 | |
74 bioperl-bugs@bio.perl.org | |
75 http://bugzilla.bioperl.org/ | |
76 | |
77 =head1 AUTHOR - Hilmar Lapp | |
78 | |
79 Email hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com) | |
80 | |
81 Describe contact details here | |
82 | |
83 =head1 APPENDIX | |
84 | |
85 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
86 | |
87 =cut | |
88 | |
89 | |
90 # Let the code begin... | |
91 | |
92 | |
93 package Bio::Tools::MZEF; | |
94 use vars qw(@ISA); | |
95 use strict; | |
96 | |
97 use Bio::Tools::AnalysisResult; | |
98 use Bio::Tools::Prediction::Gene; | |
99 use Bio::Tools::Prediction::Exon; | |
100 | |
101 @ISA = qw(Bio::Tools::AnalysisResult); | |
102 | |
103 sub _initialize_state { | |
104 my($self,@args) = @_; | |
105 | |
106 # first call the inherited method! | |
107 my $make = $self->SUPER::_initialize_state(@args); | |
108 | |
109 # handle our own parameters | |
110 my ($strand, $params) = | |
111 $self->_rearrange([qw(STRAND | |
112 )], | |
113 @args); | |
114 | |
115 # our private state variables | |
116 $strand = 1 unless defined($strand); | |
117 $self->{'_strand'} = $strand; | |
118 $self->{'_preds_parsed'} = 0; | |
119 $self->{'_has_cds'} = 0; | |
120 # array of pre-parsed predictions | |
121 $self->{'_preds'} = []; | |
122 } | |
123 | |
124 =head2 analysis_method | |
125 | |
126 Usage : $mzef->analysis_method(); | |
127 Purpose : Inherited method. Overridden to ensure that the name matches | |
128 /mzef/i. | |
129 Returns : String | |
130 Argument : n/a | |
131 | |
132 =cut | |
133 | |
134 #------------- | |
135 sub analysis_method { | |
136 #------------- | |
137 my ($self, $method) = @_; | |
138 if($method && ($method !~ /mzef/i)) { | |
139 $self->throw("method $method not supported in " . ref($self)); | |
140 } | |
141 return $self->SUPER::analysis_method($method); | |
142 } | |
143 | |
144 =head2 next_feature | |
145 | |
146 Title : next_feature | |
147 Usage : while($gene = $mzef->next_feature()) { | |
148 # do something | |
149 } | |
150 Function: Returns the next gene structure prediction of the MZEF result | |
151 file. Call this method repeatedly until FALSE is returned. | |
152 | |
153 The returned object is actually a SeqFeatureI implementing object. | |
154 This method is required for classes implementing the | |
155 SeqAnalysisParserI interface, and is merely an alias for | |
156 next_prediction() at present. | |
157 | |
158 Note that with the present version of MZEF there will only be one | |
159 object returned, because MZEF does not predict individual genes | |
160 but just potential internal exons. | |
161 Example : | |
162 Returns : A Bio::Tools::Prediction::Gene object. | |
163 Args : | |
164 | |
165 =cut | |
166 | |
167 sub next_feature { | |
168 my ($self,@args) = @_; | |
169 # even though next_prediction doesn't expect any args (and this method | |
170 # does neither), we pass on args in order to be prepared if this changes | |
171 # ever | |
172 return $self->next_prediction(@args); | |
173 } | |
174 | |
175 =head2 next_prediction | |
176 | |
177 Title : next_prediction | |
178 Usage : while($gene = $mzef->next_prediction()) { | |
179 # do something | |
180 } | |
181 Function: Returns the next gene structure prediction of the MZEF result | |
182 file. Call this method repeatedly until FALSE is returned. | |
183 | |
184 Note that with the present version of MZEF there will only be one | |
185 object returned, because MZEF does not predict individual genes | |
186 but just potential internal exons. | |
187 Example : | |
188 Returns : A Bio::Tools::Prediction::Gene object. | |
189 Args : | |
190 | |
191 =cut | |
192 | |
193 sub next_prediction { | |
194 my ($self) = @_; | |
195 my $gene; | |
196 | |
197 # if the prediction section hasn't been parsed yet, we do this now | |
198 $self->_parse_predictions() unless $self->_predictions_parsed(); | |
199 | |
200 # return the next gene structure (transcript) | |
201 return $self->_prediction(); | |
202 } | |
203 | |
204 =head2 _parse_predictions | |
205 | |
206 Title : _parse_predictions() | |
207 Usage : $obj->_parse_predictions() | |
208 Function: Parses the prediction section. Automatically called by | |
209 next_prediction() if not yet done. | |
210 Example : | |
211 Returns : | |
212 | |
213 =cut | |
214 | |
215 sub _parse_predictions { | |
216 my ($self) = @_; | |
217 my ($method); # set but not used presently | |
218 my $exon_tag = "InternalExon"; | |
219 my $gene; | |
220 # my $seqname; # name given in output is poorly formatted | |
221 my $seqlen; | |
222 my $prednr = 1; | |
223 | |
224 while(defined($_ = $self->_readline())) { | |
225 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) { | |
226 # exon or signal | |
227 if(! defined($gene)) { | |
228 $gene = Bio::Tools::Prediction::Gene->new( | |
229 '-primary' => "GenePrediction$prednr", | |
230 '-source' => 'MZEF'); | |
231 } | |
232 # we handle start-end first because may not be space delimited | |
233 # for large numbers | |
234 my ($start,$end) = ($1,$2); | |
235 s/^\s*(\d+)\s*-\s*(\d+)\s+//; | |
236 # split the rest into fields | |
237 chomp(); | |
238 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss | |
239 # index: 0 1 2 3 4 5 6 7 | |
240 my @flds = split(' ', $_); | |
241 # create the feature object depending on the type of signal -- | |
242 # which is always an (internal) exon for MZEF | |
243 my $predobj = Bio::Tools::Prediction::Exon->new(); | |
244 # set common fields | |
245 $predobj->source_tag('MZEF'); | |
246 $predobj->significance($flds[0]); | |
247 $predobj->score($flds[0]); # what shall we set as overall score? | |
248 $predobj->strand($self->{'_strand'}); # MZEF searches only one | |
249 if($predobj->strand() == 1) { | |
250 $predobj->start($start); | |
251 $predobj->end($end); | |
252 } else { | |
253 $predobj->start($seqlen-$end+1); | |
254 $predobj->end($seqlen-$start+1); | |
255 } | |
256 # set scores | |
257 $predobj->start_signal_score($flds[5]); | |
258 $predobj->end_signal_score($flds[7]); | |
259 $predobj->coding_signal_score($flds[6]); | |
260 # frame -- we simply extract the one with highest score from the | |
261 # orf field, and store the individual scores for now | |
262 my $frm = index($flds[4], "1"); | |
263 $predobj->frame(($frm < 0) ? undef : $frm); | |
264 $predobj->primary_tag($exon_tag); | |
265 $predobj->is_coding(1); | |
266 # add to gene structure (should be done only when start and end | |
267 # are set, in order to allow for proper expansion of the range) | |
268 $gene->add_exon($predobj); | |
269 next; | |
270 } | |
271 if(/^\s*Internal .*(MZEF)/) { | |
272 $self->analysis_method($1); | |
273 next; | |
274 } | |
275 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) { | |
276 # $seqname = $1; # this is too poor currently (file name truncated | |
277 # to 10 chars) in order to be sensible enough | |
278 $seqlen = $2; | |
279 next; | |
280 } | |
281 } | |
282 # $gene->seq_id($seqname); | |
283 $self->_add_prediction($gene) if defined($gene); | |
284 $self->_predictions_parsed(1); | |
285 } | |
286 | |
287 =head2 _prediction | |
288 | |
289 Title : _prediction() | |
290 Usage : $gene = $obj->_prediction() | |
291 Function: internal | |
292 Example : | |
293 Returns : | |
294 | |
295 =cut | |
296 | |
297 sub _prediction { | |
298 my ($self) = @_; | |
299 | |
300 return undef unless(exists($self->{'_preds'}) && @{$self->{'_preds'}}); | |
301 return shift(@{$self->{'_preds'}}); | |
302 } | |
303 | |
304 =head2 _add_prediction | |
305 | |
306 Title : _add_prediction() | |
307 Usage : $obj->_add_prediction($gene) | |
308 Function: internal | |
309 Example : | |
310 Returns : | |
311 | |
312 =cut | |
313 | |
314 sub _add_prediction { | |
315 my ($self, $gene) = @_; | |
316 | |
317 if(! exists($self->{'_preds'})) { | |
318 $self->{'_preds'} = []; | |
319 } | |
320 push(@{$self->{'_preds'}}, $gene); | |
321 } | |
322 | |
323 =head2 _predictions_parsed | |
324 | |
325 Title : _predictions_parsed | |
326 Usage : $obj->_predictions_parsed | |
327 Function: internal | |
328 Example : | |
329 Returns : TRUE or FALSE | |
330 | |
331 =cut | |
332 | |
333 sub _predictions_parsed { | |
334 my ($self, $val) = @_; | |
335 | |
336 $self->{'_preds_parsed'} = $val if $val; | |
337 if(! exists($self->{'_preds_parsed'})) { | |
338 $self->{'_preds_parsed'} = 0; | |
339 } | |
340 return $self->{'_preds_parsed'}; | |
341 } | |
342 | |
343 | |
344 1; |