Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/MZEF.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: MZEF.pm,v 1.9 2002/10/22 07:38:46 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Tools::MZEF | |
| 4 # | |
| 5 # Cared for by Hilmar Lapp <hlapp@gmx.net> | |
| 6 # | |
| 7 # Copyright Hilmar Lapp | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Tools::MZEF - Results of one MZEF run | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef'); | |
| 20 # filehandle: | |
| 21 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT ); | |
| 22 # to indicate that the sequence was reversed prior to feeding it to MZEF | |
| 23 # and that you want to have this reflected in the strand() attribute of | |
| 24 # the exons, as well have the coordinates translated to the non-reversed | |
| 25 # sequence | |
| 26 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef', | |
| 27 -strand => -1 ); | |
| 28 | |
| 29 # parse the results | |
| 30 # note: this class is-a Bio::Tools::AnalysisResult which implements | |
| 31 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same | |
| 32 while($gene = $mzef->next_prediction()) { | |
| 33 # $gene is an instance of Bio::Tools::Prediction::Gene | |
| 34 | |
| 35 # $gene->exons() returns an array of | |
| 36 # Bio::Tools::Prediction::Exon objects | |
| 37 # all exons: | |
| 38 @exon_arr = $gene->exons(); | |
| 39 | |
| 40 # internal exons only | |
| 41 @intrl_exons = $gene->exons('Internal'); | |
| 42 # note that presently MZEF predicts only internal exons! | |
| 43 } | |
| 44 | |
| 45 # essential if you gave a filename at initialization (otherwise the file | |
| 46 # will stay open) | |
| 47 $mzef->close(); | |
| 48 | |
| 49 =head1 DESCRIPTION | |
| 50 | |
| 51 The MZEF module provides a parser for MZEF gene structure prediction | |
| 52 output. | |
| 53 | |
| 54 This module inherits off L<Bio::Tools::AnalysisResult> and therefore | |
| 55 implements L<Bio::SeqAnalysisParserI>. | |
| 56 | |
| 57 =head1 FEEDBACK | |
| 58 | |
| 59 =head2 Mailing Lists | |
| 60 | |
| 61 User feedback is an integral part of the evolution of this and other | |
| 62 Bioperl modules. Send your comments and suggestions preferably to one | |
| 63 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 64 | |
| 65 bioperl-l@bioperl.org - General discussion | |
| 66 http://bio.perl.org/MailList.html - About the mailing lists | |
| 67 | |
| 68 =head2 Reporting Bugs | |
| 69 | |
| 70 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 71 the bugs and their resolution. Bug reports can be submitted via email | |
| 72 or the web: | |
| 73 | |
| 74 bioperl-bugs@bio.perl.org | |
| 75 http://bugzilla.bioperl.org/ | |
| 76 | |
| 77 =head1 AUTHOR - Hilmar Lapp | |
| 78 | |
| 79 Email hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com) | |
| 80 | |
| 81 Describe contact details here | |
| 82 | |
| 83 =head1 APPENDIX | |
| 84 | |
| 85 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
| 86 | |
| 87 =cut | |
| 88 | |
| 89 | |
| 90 # Let the code begin... | |
| 91 | |
| 92 | |
| 93 package Bio::Tools::MZEF; | |
| 94 use vars qw(@ISA); | |
| 95 use strict; | |
| 96 | |
| 97 use Bio::Tools::AnalysisResult; | |
| 98 use Bio::Tools::Prediction::Gene; | |
| 99 use Bio::Tools::Prediction::Exon; | |
| 100 | |
| 101 @ISA = qw(Bio::Tools::AnalysisResult); | |
| 102 | |
| 103 sub _initialize_state { | |
| 104 my($self,@args) = @_; | |
| 105 | |
| 106 # first call the inherited method! | |
| 107 my $make = $self->SUPER::_initialize_state(@args); | |
| 108 | |
| 109 # handle our own parameters | |
| 110 my ($strand, $params) = | |
| 111 $self->_rearrange([qw(STRAND | |
| 112 )], | |
| 113 @args); | |
| 114 | |
| 115 # our private state variables | |
| 116 $strand = 1 unless defined($strand); | |
| 117 $self->{'_strand'} = $strand; | |
| 118 $self->{'_preds_parsed'} = 0; | |
| 119 $self->{'_has_cds'} = 0; | |
| 120 # array of pre-parsed predictions | |
| 121 $self->{'_preds'} = []; | |
| 122 } | |
| 123 | |
| 124 =head2 analysis_method | |
| 125 | |
| 126 Usage : $mzef->analysis_method(); | |
| 127 Purpose : Inherited method. Overridden to ensure that the name matches | |
| 128 /mzef/i. | |
| 129 Returns : String | |
| 130 Argument : n/a | |
| 131 | |
| 132 =cut | |
| 133 | |
| 134 #------------- | |
| 135 sub analysis_method { | |
| 136 #------------- | |
| 137 my ($self, $method) = @_; | |
| 138 if($method && ($method !~ /mzef/i)) { | |
| 139 $self->throw("method $method not supported in " . ref($self)); | |
| 140 } | |
| 141 return $self->SUPER::analysis_method($method); | |
| 142 } | |
| 143 | |
| 144 =head2 next_feature | |
| 145 | |
| 146 Title : next_feature | |
| 147 Usage : while($gene = $mzef->next_feature()) { | |
| 148 # do something | |
| 149 } | |
| 150 Function: Returns the next gene structure prediction of the MZEF result | |
| 151 file. Call this method repeatedly until FALSE is returned. | |
| 152 | |
| 153 The returned object is actually a SeqFeatureI implementing object. | |
| 154 This method is required for classes implementing the | |
| 155 SeqAnalysisParserI interface, and is merely an alias for | |
| 156 next_prediction() at present. | |
| 157 | |
| 158 Note that with the present version of MZEF there will only be one | |
| 159 object returned, because MZEF does not predict individual genes | |
| 160 but just potential internal exons. | |
| 161 Example : | |
| 162 Returns : A Bio::Tools::Prediction::Gene object. | |
| 163 Args : | |
| 164 | |
| 165 =cut | |
| 166 | |
| 167 sub next_feature { | |
| 168 my ($self,@args) = @_; | |
| 169 # even though next_prediction doesn't expect any args (and this method | |
| 170 # does neither), we pass on args in order to be prepared if this changes | |
| 171 # ever | |
| 172 return $self->next_prediction(@args); | |
| 173 } | |
| 174 | |
| 175 =head2 next_prediction | |
| 176 | |
| 177 Title : next_prediction | |
| 178 Usage : while($gene = $mzef->next_prediction()) { | |
| 179 # do something | |
| 180 } | |
| 181 Function: Returns the next gene structure prediction of the MZEF result | |
| 182 file. Call this method repeatedly until FALSE is returned. | |
| 183 | |
| 184 Note that with the present version of MZEF there will only be one | |
| 185 object returned, because MZEF does not predict individual genes | |
| 186 but just potential internal exons. | |
| 187 Example : | |
| 188 Returns : A Bio::Tools::Prediction::Gene object. | |
| 189 Args : | |
| 190 | |
| 191 =cut | |
| 192 | |
| 193 sub next_prediction { | |
| 194 my ($self) = @_; | |
| 195 my $gene; | |
| 196 | |
| 197 # if the prediction section hasn't been parsed yet, we do this now | |
| 198 $self->_parse_predictions() unless $self->_predictions_parsed(); | |
| 199 | |
| 200 # return the next gene structure (transcript) | |
| 201 return $self->_prediction(); | |
| 202 } | |
| 203 | |
| 204 =head2 _parse_predictions | |
| 205 | |
| 206 Title : _parse_predictions() | |
| 207 Usage : $obj->_parse_predictions() | |
| 208 Function: Parses the prediction section. Automatically called by | |
| 209 next_prediction() if not yet done. | |
| 210 Example : | |
| 211 Returns : | |
| 212 | |
| 213 =cut | |
| 214 | |
| 215 sub _parse_predictions { | |
| 216 my ($self) = @_; | |
| 217 my ($method); # set but not used presently | |
| 218 my $exon_tag = "InternalExon"; | |
| 219 my $gene; | |
| 220 # my $seqname; # name given in output is poorly formatted | |
| 221 my $seqlen; | |
| 222 my $prednr = 1; | |
| 223 | |
| 224 while(defined($_ = $self->_readline())) { | |
| 225 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) { | |
| 226 # exon or signal | |
| 227 if(! defined($gene)) { | |
| 228 $gene = Bio::Tools::Prediction::Gene->new( | |
| 229 '-primary' => "GenePrediction$prednr", | |
| 230 '-source' => 'MZEF'); | |
| 231 } | |
| 232 # we handle start-end first because may not be space delimited | |
| 233 # for large numbers | |
| 234 my ($start,$end) = ($1,$2); | |
| 235 s/^\s*(\d+)\s*-\s*(\d+)\s+//; | |
| 236 # split the rest into fields | |
| 237 chomp(); | |
| 238 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss | |
| 239 # index: 0 1 2 3 4 5 6 7 | |
| 240 my @flds = split(' ', $_); | |
| 241 # create the feature object depending on the type of signal -- | |
| 242 # which is always an (internal) exon for MZEF | |
| 243 my $predobj = Bio::Tools::Prediction::Exon->new(); | |
| 244 # set common fields | |
| 245 $predobj->source_tag('MZEF'); | |
| 246 $predobj->significance($flds[0]); | |
| 247 $predobj->score($flds[0]); # what shall we set as overall score? | |
| 248 $predobj->strand($self->{'_strand'}); # MZEF searches only one | |
| 249 if($predobj->strand() == 1) { | |
| 250 $predobj->start($start); | |
| 251 $predobj->end($end); | |
| 252 } else { | |
| 253 $predobj->start($seqlen-$end+1); | |
| 254 $predobj->end($seqlen-$start+1); | |
| 255 } | |
| 256 # set scores | |
| 257 $predobj->start_signal_score($flds[5]); | |
| 258 $predobj->end_signal_score($flds[7]); | |
| 259 $predobj->coding_signal_score($flds[6]); | |
| 260 # frame -- we simply extract the one with highest score from the | |
| 261 # orf field, and store the individual scores for now | |
| 262 my $frm = index($flds[4], "1"); | |
| 263 $predobj->frame(($frm < 0) ? undef : $frm); | |
| 264 $predobj->primary_tag($exon_tag); | |
| 265 $predobj->is_coding(1); | |
| 266 # add to gene structure (should be done only when start and end | |
| 267 # are set, in order to allow for proper expansion of the range) | |
| 268 $gene->add_exon($predobj); | |
| 269 next; | |
| 270 } | |
| 271 if(/^\s*Internal .*(MZEF)/) { | |
| 272 $self->analysis_method($1); | |
| 273 next; | |
| 274 } | |
| 275 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) { | |
| 276 # $seqname = $1; # this is too poor currently (file name truncated | |
| 277 # to 10 chars) in order to be sensible enough | |
| 278 $seqlen = $2; | |
| 279 next; | |
| 280 } | |
| 281 } | |
| 282 # $gene->seq_id($seqname); | |
| 283 $self->_add_prediction($gene) if defined($gene); | |
| 284 $self->_predictions_parsed(1); | |
| 285 } | |
| 286 | |
| 287 =head2 _prediction | |
| 288 | |
| 289 Title : _prediction() | |
| 290 Usage : $gene = $obj->_prediction() | |
| 291 Function: internal | |
| 292 Example : | |
| 293 Returns : | |
| 294 | |
| 295 =cut | |
| 296 | |
| 297 sub _prediction { | |
| 298 my ($self) = @_; | |
| 299 | |
| 300 return undef unless(exists($self->{'_preds'}) && @{$self->{'_preds'}}); | |
| 301 return shift(@{$self->{'_preds'}}); | |
| 302 } | |
| 303 | |
| 304 =head2 _add_prediction | |
| 305 | |
| 306 Title : _add_prediction() | |
| 307 Usage : $obj->_add_prediction($gene) | |
| 308 Function: internal | |
| 309 Example : | |
| 310 Returns : | |
| 311 | |
| 312 =cut | |
| 313 | |
| 314 sub _add_prediction { | |
| 315 my ($self, $gene) = @_; | |
| 316 | |
| 317 if(! exists($self->{'_preds'})) { | |
| 318 $self->{'_preds'} = []; | |
| 319 } | |
| 320 push(@{$self->{'_preds'}}, $gene); | |
| 321 } | |
| 322 | |
| 323 =head2 _predictions_parsed | |
| 324 | |
| 325 Title : _predictions_parsed | |
| 326 Usage : $obj->_predictions_parsed | |
| 327 Function: internal | |
| 328 Example : | |
| 329 Returns : TRUE or FALSE | |
| 330 | |
| 331 =cut | |
| 332 | |
| 333 sub _predictions_parsed { | |
| 334 my ($self, $val) = @_; | |
| 335 | |
| 336 $self->{'_preds_parsed'} = $val if $val; | |
| 337 if(! exists($self->{'_preds_parsed'})) { | |
| 338 $self->{'_preds_parsed'} = 0; | |
| 339 } | |
| 340 return $self->{'_preds_parsed'}; | |
| 341 } | |
| 342 | |
| 343 | |
| 344 1; |
