diff variant_effect_predictor/Bio/Tools/MZEF.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Tools/MZEF.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,344 @@
+# $Id: MZEF.pm,v 1.9 2002/10/22 07:38:46 lapp Exp $
+#
+# BioPerl module for Bio::Tools::MZEF
+#
+# Cared for by Hilmar Lapp <hlapp@gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::MZEF - Results of one MZEF run
+
+=head1 SYNOPSIS
+
+   $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
+   # filehandle:
+   $mzef = Bio::Tools::MZEF->new( -fh  => \*INPUT );
+   # to indicate that the sequence was reversed prior to feeding it to MZEF
+   # and that you want to have this reflected in the strand() attribute of 
+   # the exons, as well have the coordinates translated to the non-reversed
+   # sequence
+   $mzef = Bio::Tools::MZEF->new( -file   => 'result.mzef',
+                                  -strand => -1 );
+
+   # parse the results
+   # note: this class is-a Bio::Tools::AnalysisResult which implements
+   # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
+   while($gene = $mzef->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene
+
+       # $gene->exons() returns an array of 
+       # Bio::Tools::Prediction::Exon objects
+       # all exons:
+       @exon_arr = $gene->exons();
+
+       # internal exons only
+       @intrl_exons = $gene->exons('Internal');
+       # note that presently MZEF predicts only internal exons!
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # will stay open)
+   $mzef->close();
+
+=head1 DESCRIPTION
+
+The MZEF module provides a parser for MZEF gene structure prediction
+output.
+
+This module inherits off L<Bio::Tools::AnalysisResult> and therefore
+implements L<Bio::SeqAnalysisParserI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org          - General discussion
+  http://bio.perl.org/MailList.html             - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com)
+
+Describe contact details here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::MZEF;
+use vars qw(@ISA);
+use strict;
+
+use Bio::Tools::AnalysisResult;
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+
+@ISA = qw(Bio::Tools::AnalysisResult);
+
+sub _initialize_state {
+    my($self,@args) = @_;
+
+    # first call the inherited method!
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    # handle our own parameters
+    my ($strand, $params) =
+	$self->_rearrange([qw(STRAND
+			      )],
+			  @args);
+
+    # our private state variables
+    $strand = 1 unless defined($strand);
+    $self->{'_strand'} = $strand;
+    $self->{'_preds_parsed'} = 0;
+    $self->{'_has_cds'} = 0;
+    # array of pre-parsed predictions
+    $self->{'_preds'} = [];
+}
+
+=head2 analysis_method
+
+ Usage     : $mzef->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /mzef/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /mzef/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $mzef->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the MZEF result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+           Note that with the present version of MZEF there will only be one
+           object returned, because MZEF does not predict individual genes
+           but just potential internal exons.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self,@args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $mzef->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the MZEF result
+           file. Call this method repeatedly until FALSE is returned.
+
+           Note that with the present version of MZEF there will only be one
+           object returned, because MZEF does not predict individual genes
+           but just potential internal exons.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my $gene;
+
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions() unless $self->_predictions_parsed();
+
+    # return the next gene structure (transcript)
+    return $self->_prediction();
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+    my ($method); # set but not used presently
+    my $exon_tag = "InternalExon";
+    my $gene;
+    # my $seqname; # name given in output is poorly formatted
+    my $seqlen;
+    my $prednr = 1;
+
+    while(defined($_ = $self->_readline())) {
+	if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
+	    # exon or signal
+	    if(! defined($gene)) {
+		$gene = Bio::Tools::Prediction::Gene->new(
+                                       '-primary' => "GenePrediction$prednr",
+				       '-source' => 'MZEF');
+	    }
+	    # we handle start-end first because may not be space delimited
+	    # for large numbers
+	    my ($start,$end) = ($1,$2);
+	    s/^\s*(\d+)\s*-\s*(\d+)\s+//;
+	    # split the rest into fields
+	    chomp();
+	    # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
+	    # index:              0   1   2   3   4   5   6   7
+	    my @flds = split(' ', $_);
+	    # create the feature object depending on the type of signal --
+	    # which is always an (internal) exon for MZEF
+	    my $predobj = Bio::Tools::Prediction::Exon->new();
+	    # set common fields
+	    $predobj->source_tag('MZEF');
+	    $predobj->significance($flds[0]);
+	    $predobj->score($flds[0]); # what shall we set as overall score?
+	    $predobj->strand($self->{'_strand'}); # MZEF searches only one
+	    if($predobj->strand() == 1) {
+		$predobj->start($start);
+		$predobj->end($end);
+	    } else {
+		$predobj->start($seqlen-$end+1);
+		$predobj->end($seqlen-$start+1);
+	    }
+	    # set scores
+	    $predobj->start_signal_score($flds[5]);
+	    $predobj->end_signal_score($flds[7]);
+	    $predobj->coding_signal_score($flds[6]);
+	    # frame -- we simply extract the one with highest score from the
+	    # orf field, and store the individual scores for now
+	    my $frm = index($flds[4], "1");
+	    $predobj->frame(($frm < 0) ? undef : $frm);
+	    $predobj->primary_tag($exon_tag);
+	    $predobj->is_coding(1);
+	    # add to gene structure (should be done only when start and end
+	    # are set, in order to allow for proper expansion of the range)
+	    $gene->add_exon($predobj);		
+	    next;
+	}
+	if(/^\s*Internal .*(MZEF)/) {
+	    $self->analysis_method($1);
+	    next;
+	}
+	if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
+	    # $seqname = $1; # this is too poor currently (file name truncated
+                             # to 10 chars) in order to be sensible enough
+	    $seqlen = $2;
+	    next;
+	}
+    }
+    # $gene->seq_id($seqname);
+    $self->_add_prediction($gene) if defined($gene);
+    $self->_predictions_parsed(1);
+}
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return undef unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
+    return shift(@{$self->{'_preds'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_preds'})) {
+	$self->{'_preds'} = [];
+    }
+    push(@{$self->{'_preds'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+
+1;