comparison variant_effect_predictor/Bio/Tools/MZEF.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: MZEF.pm,v 1.9 2002/10/22 07:38:46 lapp Exp $
2 #
3 # BioPerl module for Bio::Tools::MZEF
4 #
5 # Cared for by Hilmar Lapp <hlapp@gmx.net>
6 #
7 # Copyright Hilmar Lapp
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Tools::MZEF - Results of one MZEF run
16
17 =head1 SYNOPSIS
18
19 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
20 # filehandle:
21 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
22 # to indicate that the sequence was reversed prior to feeding it to MZEF
23 # and that you want to have this reflected in the strand() attribute of
24 # the exons, as well have the coordinates translated to the non-reversed
25 # sequence
26 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
27 -strand => -1 );
28
29 # parse the results
30 # note: this class is-a Bio::Tools::AnalysisResult which implements
31 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
32 while($gene = $mzef->next_prediction()) {
33 # $gene is an instance of Bio::Tools::Prediction::Gene
34
35 # $gene->exons() returns an array of
36 # Bio::Tools::Prediction::Exon objects
37 # all exons:
38 @exon_arr = $gene->exons();
39
40 # internal exons only
41 @intrl_exons = $gene->exons('Internal');
42 # note that presently MZEF predicts only internal exons!
43 }
44
45 # essential if you gave a filename at initialization (otherwise the file
46 # will stay open)
47 $mzef->close();
48
49 =head1 DESCRIPTION
50
51 The MZEF module provides a parser for MZEF gene structure prediction
52 output.
53
54 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
55 implements L<Bio::SeqAnalysisParserI>.
56
57 =head1 FEEDBACK
58
59 =head2 Mailing Lists
60
61 User feedback is an integral part of the evolution of this and other
62 Bioperl modules. Send your comments and suggestions preferably to one
63 of the Bioperl mailing lists. Your participation is much appreciated.
64
65 bioperl-l@bioperl.org - General discussion
66 http://bio.perl.org/MailList.html - About the mailing lists
67
68 =head2 Reporting Bugs
69
70 Report bugs to the Bioperl bug tracking system to help us keep track
71 the bugs and their resolution. Bug reports can be submitted via email
72 or the web:
73
74 bioperl-bugs@bio.perl.org
75 http://bugzilla.bioperl.org/
76
77 =head1 AUTHOR - Hilmar Lapp
78
79 Email hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com)
80
81 Describe contact details here
82
83 =head1 APPENDIX
84
85 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
86
87 =cut
88
89
90 # Let the code begin...
91
92
93 package Bio::Tools::MZEF;
94 use vars qw(@ISA);
95 use strict;
96
97 use Bio::Tools::AnalysisResult;
98 use Bio::Tools::Prediction::Gene;
99 use Bio::Tools::Prediction::Exon;
100
101 @ISA = qw(Bio::Tools::AnalysisResult);
102
103 sub _initialize_state {
104 my($self,@args) = @_;
105
106 # first call the inherited method!
107 my $make = $self->SUPER::_initialize_state(@args);
108
109 # handle our own parameters
110 my ($strand, $params) =
111 $self->_rearrange([qw(STRAND
112 )],
113 @args);
114
115 # our private state variables
116 $strand = 1 unless defined($strand);
117 $self->{'_strand'} = $strand;
118 $self->{'_preds_parsed'} = 0;
119 $self->{'_has_cds'} = 0;
120 # array of pre-parsed predictions
121 $self->{'_preds'} = [];
122 }
123
124 =head2 analysis_method
125
126 Usage : $mzef->analysis_method();
127 Purpose : Inherited method. Overridden to ensure that the name matches
128 /mzef/i.
129 Returns : String
130 Argument : n/a
131
132 =cut
133
134 #-------------
135 sub analysis_method {
136 #-------------
137 my ($self, $method) = @_;
138 if($method && ($method !~ /mzef/i)) {
139 $self->throw("method $method not supported in " . ref($self));
140 }
141 return $self->SUPER::analysis_method($method);
142 }
143
144 =head2 next_feature
145
146 Title : next_feature
147 Usage : while($gene = $mzef->next_feature()) {
148 # do something
149 }
150 Function: Returns the next gene structure prediction of the MZEF result
151 file. Call this method repeatedly until FALSE is returned.
152
153 The returned object is actually a SeqFeatureI implementing object.
154 This method is required for classes implementing the
155 SeqAnalysisParserI interface, and is merely an alias for
156 next_prediction() at present.
157
158 Note that with the present version of MZEF there will only be one
159 object returned, because MZEF does not predict individual genes
160 but just potential internal exons.
161 Example :
162 Returns : A Bio::Tools::Prediction::Gene object.
163 Args :
164
165 =cut
166
167 sub next_feature {
168 my ($self,@args) = @_;
169 # even though next_prediction doesn't expect any args (and this method
170 # does neither), we pass on args in order to be prepared if this changes
171 # ever
172 return $self->next_prediction(@args);
173 }
174
175 =head2 next_prediction
176
177 Title : next_prediction
178 Usage : while($gene = $mzef->next_prediction()) {
179 # do something
180 }
181 Function: Returns the next gene structure prediction of the MZEF result
182 file. Call this method repeatedly until FALSE is returned.
183
184 Note that with the present version of MZEF there will only be one
185 object returned, because MZEF does not predict individual genes
186 but just potential internal exons.
187 Example :
188 Returns : A Bio::Tools::Prediction::Gene object.
189 Args :
190
191 =cut
192
193 sub next_prediction {
194 my ($self) = @_;
195 my $gene;
196
197 # if the prediction section hasn't been parsed yet, we do this now
198 $self->_parse_predictions() unless $self->_predictions_parsed();
199
200 # return the next gene structure (transcript)
201 return $self->_prediction();
202 }
203
204 =head2 _parse_predictions
205
206 Title : _parse_predictions()
207 Usage : $obj->_parse_predictions()
208 Function: Parses the prediction section. Automatically called by
209 next_prediction() if not yet done.
210 Example :
211 Returns :
212
213 =cut
214
215 sub _parse_predictions {
216 my ($self) = @_;
217 my ($method); # set but not used presently
218 my $exon_tag = "InternalExon";
219 my $gene;
220 # my $seqname; # name given in output is poorly formatted
221 my $seqlen;
222 my $prednr = 1;
223
224 while(defined($_ = $self->_readline())) {
225 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
226 # exon or signal
227 if(! defined($gene)) {
228 $gene = Bio::Tools::Prediction::Gene->new(
229 '-primary' => "GenePrediction$prednr",
230 '-source' => 'MZEF');
231 }
232 # we handle start-end first because may not be space delimited
233 # for large numbers
234 my ($start,$end) = ($1,$2);
235 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
236 # split the rest into fields
237 chomp();
238 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
239 # index: 0 1 2 3 4 5 6 7
240 my @flds = split(' ', $_);
241 # create the feature object depending on the type of signal --
242 # which is always an (internal) exon for MZEF
243 my $predobj = Bio::Tools::Prediction::Exon->new();
244 # set common fields
245 $predobj->source_tag('MZEF');
246 $predobj->significance($flds[0]);
247 $predobj->score($flds[0]); # what shall we set as overall score?
248 $predobj->strand($self->{'_strand'}); # MZEF searches only one
249 if($predobj->strand() == 1) {
250 $predobj->start($start);
251 $predobj->end($end);
252 } else {
253 $predobj->start($seqlen-$end+1);
254 $predobj->end($seqlen-$start+1);
255 }
256 # set scores
257 $predobj->start_signal_score($flds[5]);
258 $predobj->end_signal_score($flds[7]);
259 $predobj->coding_signal_score($flds[6]);
260 # frame -- we simply extract the one with highest score from the
261 # orf field, and store the individual scores for now
262 my $frm = index($flds[4], "1");
263 $predobj->frame(($frm < 0) ? undef : $frm);
264 $predobj->primary_tag($exon_tag);
265 $predobj->is_coding(1);
266 # add to gene structure (should be done only when start and end
267 # are set, in order to allow for proper expansion of the range)
268 $gene->add_exon($predobj);
269 next;
270 }
271 if(/^\s*Internal .*(MZEF)/) {
272 $self->analysis_method($1);
273 next;
274 }
275 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
276 # $seqname = $1; # this is too poor currently (file name truncated
277 # to 10 chars) in order to be sensible enough
278 $seqlen = $2;
279 next;
280 }
281 }
282 # $gene->seq_id($seqname);
283 $self->_add_prediction($gene) if defined($gene);
284 $self->_predictions_parsed(1);
285 }
286
287 =head2 _prediction
288
289 Title : _prediction()
290 Usage : $gene = $obj->_prediction()
291 Function: internal
292 Example :
293 Returns :
294
295 =cut
296
297 sub _prediction {
298 my ($self) = @_;
299
300 return undef unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
301 return shift(@{$self->{'_preds'}});
302 }
303
304 =head2 _add_prediction
305
306 Title : _add_prediction()
307 Usage : $obj->_add_prediction($gene)
308 Function: internal
309 Example :
310 Returns :
311
312 =cut
313
314 sub _add_prediction {
315 my ($self, $gene) = @_;
316
317 if(! exists($self->{'_preds'})) {
318 $self->{'_preds'} = [];
319 }
320 push(@{$self->{'_preds'}}, $gene);
321 }
322
323 =head2 _predictions_parsed
324
325 Title : _predictions_parsed
326 Usage : $obj->_predictions_parsed
327 Function: internal
328 Example :
329 Returns : TRUE or FALSE
330
331 =cut
332
333 sub _predictions_parsed {
334 my ($self, $val) = @_;
335
336 $self->{'_preds_parsed'} = $val if $val;
337 if(! exists($self->{'_preds_parsed'})) {
338 $self->{'_preds_parsed'} = 0;
339 }
340 return $self->{'_preds_parsed'};
341 }
342
343
344 1;