0
|
1 # $Id: MZEF.pm,v 1.9 2002/10/22 07:38:46 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Tools::MZEF
|
|
4 #
|
|
5 # Cared for by Hilmar Lapp <hlapp@gmx.net>
|
|
6 #
|
|
7 # Copyright Hilmar Lapp
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tools::MZEF - Results of one MZEF run
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
|
|
20 # filehandle:
|
|
21 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
|
|
22 # to indicate that the sequence was reversed prior to feeding it to MZEF
|
|
23 # and that you want to have this reflected in the strand() attribute of
|
|
24 # the exons, as well have the coordinates translated to the non-reversed
|
|
25 # sequence
|
|
26 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
|
|
27 -strand => -1 );
|
|
28
|
|
29 # parse the results
|
|
30 # note: this class is-a Bio::Tools::AnalysisResult which implements
|
|
31 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
|
|
32 while($gene = $mzef->next_prediction()) {
|
|
33 # $gene is an instance of Bio::Tools::Prediction::Gene
|
|
34
|
|
35 # $gene->exons() returns an array of
|
|
36 # Bio::Tools::Prediction::Exon objects
|
|
37 # all exons:
|
|
38 @exon_arr = $gene->exons();
|
|
39
|
|
40 # internal exons only
|
|
41 @intrl_exons = $gene->exons('Internal');
|
|
42 # note that presently MZEF predicts only internal exons!
|
|
43 }
|
|
44
|
|
45 # essential if you gave a filename at initialization (otherwise the file
|
|
46 # will stay open)
|
|
47 $mzef->close();
|
|
48
|
|
49 =head1 DESCRIPTION
|
|
50
|
|
51 The MZEF module provides a parser for MZEF gene structure prediction
|
|
52 output.
|
|
53
|
|
54 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
|
|
55 implements L<Bio::SeqAnalysisParserI>.
|
|
56
|
|
57 =head1 FEEDBACK
|
|
58
|
|
59 =head2 Mailing Lists
|
|
60
|
|
61 User feedback is an integral part of the evolution of this and other
|
|
62 Bioperl modules. Send your comments and suggestions preferably to one
|
|
63 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
64
|
|
65 bioperl-l@bioperl.org - General discussion
|
|
66 http://bio.perl.org/MailList.html - About the mailing lists
|
|
67
|
|
68 =head2 Reporting Bugs
|
|
69
|
|
70 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
71 the bugs and their resolution. Bug reports can be submitted via email
|
|
72 or the web:
|
|
73
|
|
74 bioperl-bugs@bio.perl.org
|
|
75 http://bugzilla.bioperl.org/
|
|
76
|
|
77 =head1 AUTHOR - Hilmar Lapp
|
|
78
|
|
79 Email hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com)
|
|
80
|
|
81 Describe contact details here
|
|
82
|
|
83 =head1 APPENDIX
|
|
84
|
|
85 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
86
|
|
87 =cut
|
|
88
|
|
89
|
|
90 # Let the code begin...
|
|
91
|
|
92
|
|
93 package Bio::Tools::MZEF;
|
|
94 use vars qw(@ISA);
|
|
95 use strict;
|
|
96
|
|
97 use Bio::Tools::AnalysisResult;
|
|
98 use Bio::Tools::Prediction::Gene;
|
|
99 use Bio::Tools::Prediction::Exon;
|
|
100
|
|
101 @ISA = qw(Bio::Tools::AnalysisResult);
|
|
102
|
|
103 sub _initialize_state {
|
|
104 my($self,@args) = @_;
|
|
105
|
|
106 # first call the inherited method!
|
|
107 my $make = $self->SUPER::_initialize_state(@args);
|
|
108
|
|
109 # handle our own parameters
|
|
110 my ($strand, $params) =
|
|
111 $self->_rearrange([qw(STRAND
|
|
112 )],
|
|
113 @args);
|
|
114
|
|
115 # our private state variables
|
|
116 $strand = 1 unless defined($strand);
|
|
117 $self->{'_strand'} = $strand;
|
|
118 $self->{'_preds_parsed'} = 0;
|
|
119 $self->{'_has_cds'} = 0;
|
|
120 # array of pre-parsed predictions
|
|
121 $self->{'_preds'} = [];
|
|
122 }
|
|
123
|
|
124 =head2 analysis_method
|
|
125
|
|
126 Usage : $mzef->analysis_method();
|
|
127 Purpose : Inherited method. Overridden to ensure that the name matches
|
|
128 /mzef/i.
|
|
129 Returns : String
|
|
130 Argument : n/a
|
|
131
|
|
132 =cut
|
|
133
|
|
134 #-------------
|
|
135 sub analysis_method {
|
|
136 #-------------
|
|
137 my ($self, $method) = @_;
|
|
138 if($method && ($method !~ /mzef/i)) {
|
|
139 $self->throw("method $method not supported in " . ref($self));
|
|
140 }
|
|
141 return $self->SUPER::analysis_method($method);
|
|
142 }
|
|
143
|
|
144 =head2 next_feature
|
|
145
|
|
146 Title : next_feature
|
|
147 Usage : while($gene = $mzef->next_feature()) {
|
|
148 # do something
|
|
149 }
|
|
150 Function: Returns the next gene structure prediction of the MZEF result
|
|
151 file. Call this method repeatedly until FALSE is returned.
|
|
152
|
|
153 The returned object is actually a SeqFeatureI implementing object.
|
|
154 This method is required for classes implementing the
|
|
155 SeqAnalysisParserI interface, and is merely an alias for
|
|
156 next_prediction() at present.
|
|
157
|
|
158 Note that with the present version of MZEF there will only be one
|
|
159 object returned, because MZEF does not predict individual genes
|
|
160 but just potential internal exons.
|
|
161 Example :
|
|
162 Returns : A Bio::Tools::Prediction::Gene object.
|
|
163 Args :
|
|
164
|
|
165 =cut
|
|
166
|
|
167 sub next_feature {
|
|
168 my ($self,@args) = @_;
|
|
169 # even though next_prediction doesn't expect any args (and this method
|
|
170 # does neither), we pass on args in order to be prepared if this changes
|
|
171 # ever
|
|
172 return $self->next_prediction(@args);
|
|
173 }
|
|
174
|
|
175 =head2 next_prediction
|
|
176
|
|
177 Title : next_prediction
|
|
178 Usage : while($gene = $mzef->next_prediction()) {
|
|
179 # do something
|
|
180 }
|
|
181 Function: Returns the next gene structure prediction of the MZEF result
|
|
182 file. Call this method repeatedly until FALSE is returned.
|
|
183
|
|
184 Note that with the present version of MZEF there will only be one
|
|
185 object returned, because MZEF does not predict individual genes
|
|
186 but just potential internal exons.
|
|
187 Example :
|
|
188 Returns : A Bio::Tools::Prediction::Gene object.
|
|
189 Args :
|
|
190
|
|
191 =cut
|
|
192
|
|
193 sub next_prediction {
|
|
194 my ($self) = @_;
|
|
195 my $gene;
|
|
196
|
|
197 # if the prediction section hasn't been parsed yet, we do this now
|
|
198 $self->_parse_predictions() unless $self->_predictions_parsed();
|
|
199
|
|
200 # return the next gene structure (transcript)
|
|
201 return $self->_prediction();
|
|
202 }
|
|
203
|
|
204 =head2 _parse_predictions
|
|
205
|
|
206 Title : _parse_predictions()
|
|
207 Usage : $obj->_parse_predictions()
|
|
208 Function: Parses the prediction section. Automatically called by
|
|
209 next_prediction() if not yet done.
|
|
210 Example :
|
|
211 Returns :
|
|
212
|
|
213 =cut
|
|
214
|
|
215 sub _parse_predictions {
|
|
216 my ($self) = @_;
|
|
217 my ($method); # set but not used presently
|
|
218 my $exon_tag = "InternalExon";
|
|
219 my $gene;
|
|
220 # my $seqname; # name given in output is poorly formatted
|
|
221 my $seqlen;
|
|
222 my $prednr = 1;
|
|
223
|
|
224 while(defined($_ = $self->_readline())) {
|
|
225 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
|
|
226 # exon or signal
|
|
227 if(! defined($gene)) {
|
|
228 $gene = Bio::Tools::Prediction::Gene->new(
|
|
229 '-primary' => "GenePrediction$prednr",
|
|
230 '-source' => 'MZEF');
|
|
231 }
|
|
232 # we handle start-end first because may not be space delimited
|
|
233 # for large numbers
|
|
234 my ($start,$end) = ($1,$2);
|
|
235 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
|
|
236 # split the rest into fields
|
|
237 chomp();
|
|
238 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
|
|
239 # index: 0 1 2 3 4 5 6 7
|
|
240 my @flds = split(' ', $_);
|
|
241 # create the feature object depending on the type of signal --
|
|
242 # which is always an (internal) exon for MZEF
|
|
243 my $predobj = Bio::Tools::Prediction::Exon->new();
|
|
244 # set common fields
|
|
245 $predobj->source_tag('MZEF');
|
|
246 $predobj->significance($flds[0]);
|
|
247 $predobj->score($flds[0]); # what shall we set as overall score?
|
|
248 $predobj->strand($self->{'_strand'}); # MZEF searches only one
|
|
249 if($predobj->strand() == 1) {
|
|
250 $predobj->start($start);
|
|
251 $predobj->end($end);
|
|
252 } else {
|
|
253 $predobj->start($seqlen-$end+1);
|
|
254 $predobj->end($seqlen-$start+1);
|
|
255 }
|
|
256 # set scores
|
|
257 $predobj->start_signal_score($flds[5]);
|
|
258 $predobj->end_signal_score($flds[7]);
|
|
259 $predobj->coding_signal_score($flds[6]);
|
|
260 # frame -- we simply extract the one with highest score from the
|
|
261 # orf field, and store the individual scores for now
|
|
262 my $frm = index($flds[4], "1");
|
|
263 $predobj->frame(($frm < 0) ? undef : $frm);
|
|
264 $predobj->primary_tag($exon_tag);
|
|
265 $predobj->is_coding(1);
|
|
266 # add to gene structure (should be done only when start and end
|
|
267 # are set, in order to allow for proper expansion of the range)
|
|
268 $gene->add_exon($predobj);
|
|
269 next;
|
|
270 }
|
|
271 if(/^\s*Internal .*(MZEF)/) {
|
|
272 $self->analysis_method($1);
|
|
273 next;
|
|
274 }
|
|
275 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
|
|
276 # $seqname = $1; # this is too poor currently (file name truncated
|
|
277 # to 10 chars) in order to be sensible enough
|
|
278 $seqlen = $2;
|
|
279 next;
|
|
280 }
|
|
281 }
|
|
282 # $gene->seq_id($seqname);
|
|
283 $self->_add_prediction($gene) if defined($gene);
|
|
284 $self->_predictions_parsed(1);
|
|
285 }
|
|
286
|
|
287 =head2 _prediction
|
|
288
|
|
289 Title : _prediction()
|
|
290 Usage : $gene = $obj->_prediction()
|
|
291 Function: internal
|
|
292 Example :
|
|
293 Returns :
|
|
294
|
|
295 =cut
|
|
296
|
|
297 sub _prediction {
|
|
298 my ($self) = @_;
|
|
299
|
|
300 return undef unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
|
|
301 return shift(@{$self->{'_preds'}});
|
|
302 }
|
|
303
|
|
304 =head2 _add_prediction
|
|
305
|
|
306 Title : _add_prediction()
|
|
307 Usage : $obj->_add_prediction($gene)
|
|
308 Function: internal
|
|
309 Example :
|
|
310 Returns :
|
|
311
|
|
312 =cut
|
|
313
|
|
314 sub _add_prediction {
|
|
315 my ($self, $gene) = @_;
|
|
316
|
|
317 if(! exists($self->{'_preds'})) {
|
|
318 $self->{'_preds'} = [];
|
|
319 }
|
|
320 push(@{$self->{'_preds'}}, $gene);
|
|
321 }
|
|
322
|
|
323 =head2 _predictions_parsed
|
|
324
|
|
325 Title : _predictions_parsed
|
|
326 Usage : $obj->_predictions_parsed
|
|
327 Function: internal
|
|
328 Example :
|
|
329 Returns : TRUE or FALSE
|
|
330
|
|
331 =cut
|
|
332
|
|
333 sub _predictions_parsed {
|
|
334 my ($self, $val) = @_;
|
|
335
|
|
336 $self->{'_preds_parsed'} = $val if $val;
|
|
337 if(! exists($self->{'_preds_parsed'})) {
|
|
338 $self->{'_preds_parsed'} = 0;
|
|
339 }
|
|
340 return $self->{'_preds_parsed'};
|
|
341 }
|
|
342
|
|
343
|
|
344 1;
|