annotate variant_effect_predictor/Bio/Tools/MZEF.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # $Id: MZEF.pm,v 1.9 2002/10/22 07:38:46 lapp Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # BioPerl module for Bio::Tools::MZEF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Cared for by Hilmar Lapp <hlapp@gmx.net>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright Hilmar Lapp
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Bio::Tools::MZEF - Results of one MZEF run
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20 # filehandle:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22 # to indicate that the sequence was reversed prior to feeding it to MZEF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 # and that you want to have this reflected in the strand() attribute of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 # the exons, as well have the coordinates translated to the non-reversed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 # sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 -strand => -1 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 # parse the results
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 # note: this class is-a Bio::Tools::AnalysisResult which implements
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 while($gene = $mzef->next_prediction()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 # $gene is an instance of Bio::Tools::Prediction::Gene
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 # $gene->exons() returns an array of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 # Bio::Tools::Prediction::Exon objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 # all exons:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 @exon_arr = $gene->exons();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 # internal exons only
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 @intrl_exons = $gene->exons('Internal');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 # note that presently MZEF predicts only internal exons!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 # essential if you gave a filename at initialization (otherwise the file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 # will stay open)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 $mzef->close();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 The MZEF module provides a parser for MZEF gene structure prediction
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 output.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 implements L<Bio::SeqAnalysisParserI>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 Bioperl modules. Send your comments and suggestions preferably to one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 of the Bioperl mailing lists. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 http://bio.perl.org/MailList.html - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 the bugs and their resolution. Bug reports can be submitted via email
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 bioperl-bugs@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 =head1 AUTHOR - Hilmar Lapp
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 Email hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 Describe contact details here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 package Bio::Tools::MZEF;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 use vars qw(@ISA);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 use Bio::Tools::AnalysisResult;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 use Bio::Tools::Prediction::Gene;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 use Bio::Tools::Prediction::Exon;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 @ISA = qw(Bio::Tools::AnalysisResult);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 sub _initialize_state {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 my($self,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 # first call the inherited method!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 my $make = $self->SUPER::_initialize_state(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 # handle our own parameters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 my ($strand, $params) =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 $self->_rearrange([qw(STRAND
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 )],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 @args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 # our private state variables
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 $strand = 1 unless defined($strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 $self->{'_strand'} = $strand;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 $self->{'_preds_parsed'} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 $self->{'_has_cds'} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 # array of pre-parsed predictions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 $self->{'_preds'} = [];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 =head2 analysis_method
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 Usage : $mzef->analysis_method();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 Purpose : Inherited method. Overridden to ensure that the name matches
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 /mzef/i.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 Returns : String
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 #-------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 sub analysis_method {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 #-------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 my ($self, $method) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 if($method && ($method !~ /mzef/i)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 $self->throw("method $method not supported in " . ref($self));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 return $self->SUPER::analysis_method($method);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 =head2 next_feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 Title : next_feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 Usage : while($gene = $mzef->next_feature()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 # do something
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 Function: Returns the next gene structure prediction of the MZEF result
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 file. Call this method repeatedly until FALSE is returned.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 The returned object is actually a SeqFeatureI implementing object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 This method is required for classes implementing the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 SeqAnalysisParserI interface, and is merely an alias for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 next_prediction() at present.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 Note that with the present version of MZEF there will only be one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 object returned, because MZEF does not predict individual genes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 but just potential internal exons.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 Returns : A Bio::Tools::Prediction::Gene object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 Args :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 sub next_feature {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 my ($self,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 # even though next_prediction doesn't expect any args (and this method
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 # does neither), we pass on args in order to be prepared if this changes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 # ever
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 return $self->next_prediction(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 =head2 next_prediction
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 Title : next_prediction
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 Usage : while($gene = $mzef->next_prediction()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 # do something
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 Function: Returns the next gene structure prediction of the MZEF result
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 file. Call this method repeatedly until FALSE is returned.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 Note that with the present version of MZEF there will only be one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 object returned, because MZEF does not predict individual genes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 but just potential internal exons.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 Returns : A Bio::Tools::Prediction::Gene object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 Args :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 sub next_prediction {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 my $gene;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 # if the prediction section hasn't been parsed yet, we do this now
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 $self->_parse_predictions() unless $self->_predictions_parsed();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 # return the next gene structure (transcript)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 return $self->_prediction();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 =head2 _parse_predictions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 Title : _parse_predictions()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 Usage : $obj->_parse_predictions()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 Function: Parses the prediction section. Automatically called by
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 next_prediction() if not yet done.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 Returns :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 sub _parse_predictions {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 my ($method); # set but not used presently
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 my $exon_tag = "InternalExon";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 my $gene;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 # my $seqname; # name given in output is poorly formatted
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 my $seqlen;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 my $prednr = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 while(defined($_ = $self->_readline())) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 # exon or signal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 if(! defined($gene)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 $gene = Bio::Tools::Prediction::Gene->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 '-primary' => "GenePrediction$prednr",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 '-source' => 'MZEF');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 # we handle start-end first because may not be space delimited
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 # for large numbers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 my ($start,$end) = ($1,$2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 # split the rest into fields
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 chomp();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 # index: 0 1 2 3 4 5 6 7
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 my @flds = split(' ', $_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 # create the feature object depending on the type of signal --
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 # which is always an (internal) exon for MZEF
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 my $predobj = Bio::Tools::Prediction::Exon->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 # set common fields
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 $predobj->source_tag('MZEF');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 $predobj->significance($flds[0]);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 $predobj->score($flds[0]); # what shall we set as overall score?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 $predobj->strand($self->{'_strand'}); # MZEF searches only one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 if($predobj->strand() == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 $predobj->start($start);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 $predobj->end($end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 $predobj->start($seqlen-$end+1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 $predobj->end($seqlen-$start+1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 # set scores
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 $predobj->start_signal_score($flds[5]);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 $predobj->end_signal_score($flds[7]);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 $predobj->coding_signal_score($flds[6]);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 # frame -- we simply extract the one with highest score from the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 # orf field, and store the individual scores for now
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 my $frm = index($flds[4], "1");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 $predobj->frame(($frm < 0) ? undef : $frm);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 $predobj->primary_tag($exon_tag);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 $predobj->is_coding(1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 # add to gene structure (should be done only when start and end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 # are set, in order to allow for proper expansion of the range)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 $gene->add_exon($predobj);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 if(/^\s*Internal .*(MZEF)/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 $self->analysis_method($1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 # $seqname = $1; # this is too poor currently (file name truncated
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 # to 10 chars) in order to be sensible enough
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 $seqlen = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 next;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 # $gene->seq_id($seqname);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 $self->_add_prediction($gene) if defined($gene);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 $self->_predictions_parsed(1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 =head2 _prediction
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 Title : _prediction()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 Usage : $gene = $obj->_prediction()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 Function: internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 Returns :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 sub _prediction {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 return undef unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 return shift(@{$self->{'_preds'}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 =head2 _add_prediction
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 Title : _add_prediction()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 Usage : $obj->_add_prediction($gene)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 Function: internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 Returns :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 sub _add_prediction {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 my ($self, $gene) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 if(! exists($self->{'_preds'})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 $self->{'_preds'} = [];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 push(@{$self->{'_preds'}}, $gene);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 =head2 _predictions_parsed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 Title : _predictions_parsed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 Usage : $obj->_predictions_parsed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 Function: internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 Returns : TRUE or FALSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 sub _predictions_parsed {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 my ($self, $val) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 $self->{'_preds_parsed'} = $val if $val;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 if(! exists($self->{'_preds_parsed'})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 $self->{'_preds_parsed'} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 return $self->{'_preds_parsed'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 1;