diff variant_effect_predictor/Bio/Tools/Est2Genome.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Tools/Est2Genome.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,259 @@
+# $Id: Est2Genome.pm,v 1.11 2002/12/05 13:46:36 heikki Exp $
+#
+# BioPerl module for Bio::Tools::Est2Genome
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Est2Genome - Parse est2genome output, makes simple Bio::SeqFeature::Generic objects
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Est2Genome;
+
+  my $featureiter = new Bio::Tools::Est2Genome(-file => 'output.est2genome');
+
+  # This is going to be fixed to use the SeqAnalysisI next_feature
+  # Method eventually when we have the objects to put the data in
+  # properly
+  while( my $f = $featureiter->parse_next_gene ) {
+   # process Bio::SeqFeature::Generic objects here
+  }
+
+=head1 DESCRIPTION
+
+This module is a parser for est2genome [EMBOSS] alignments of est/cdna
+sequence to genomic DNA.  This is generally accepted as the best
+program for predicting splice sites based on est/cdnas*.
+
+This module currently does not try pull out the ungapped alignments
+(Segment) but may in the future.
+
+
+* AFAIK
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason@bioperl.org
+
+Describe contact details here
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Est2Genome;
+use vars qw(@ISA);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+use Bio::Tools::AnalysisResult;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::Gene::Intron;
+use Bio::SeqFeature::Gene::GeneStructure;
+use Bio::SeqFeature::SimilarityPair;
+
+@ISA = qw(Bio::Tools::AnalysisResult );
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Est2Genome();
+ Function: Builds a new Bio::Tools::Est2Genome object
+ Returns : an instance of Bio::Tools::Est2Genome
+ Args    : -file => 'output.est2genome' or
+           -fh   => \*EST2GENOMEOUTPUT
+           -genomefirst => 1  # genome was the first input (not standard)
+
+=cut
+
+sub _initialize_state {
+    my($self,@args) = @_;
+
+    # call the inherited method first
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    my ($genome_is_first) = $self->_rearrange([qw(GENOMEFIRST)], @args);
+
+    delete($self->{'_genome_is_first'});
+    $self->{'_genome_is_first'} = $genome_is_first if(defined($genome_is_first));
+    $self->analysis_method("est2genome");
+}
+
+=head2 analysis_method
+
+ Usage     : $sim4->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /est2genome/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method {
+#-------------
+    my ($self, $method) = @_;
+    if($method && ($method !~ /est2genome/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 parse_next_gene
+
+ Title   : parse_next_gene
+ Usage   : @gene = $est2genome_result->parse_next_gene;
+           foreach $exon (@exons) {
+               # do something
+           }
+
+ Function: Parses the next alignments of the est2genome result file and
+           returns the found exons as an array of
+           Bio::SeqFeature::SimilarityPair objects. Call
+           this method repeatedly until an empty array is returned to get the
+           results for all alignments.
+
+           The $exon->seq_id() attribute will be set to the identifier of the
+           respective sequence for both sequences.
+           The length is accessible via the seqlength()
+           attribute of $exon->query() and
+           $exon->est_hit().
+ Returns : An array (or array reference) of Bio::SeqFeature::SimilarityPair and Bio::SeqFeature::Generic objects
+ Args    : none
+
+
+=cut
+
+sub parse_next_gene {
+   my ($self) = @_;
+   my $seensegment = 0;
+   my @features;
+   my ($qstrand,$hstrand) = (1,1);
+   my $lasthseqname;
+   while( defined($_ = $self->_readline) ) {
+       if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) {
+	   if( $seensegment ) {
+	       $self->_pushback($_);
+	       return wantarray ? @features : \@features;
+	   }
+	   $hstrand = -1 if $1 eq 'reversed';
+	   $qstrand = -1 if $4 eq 'REVERSED GENE';
+	   $self->debug( "1=$1, 2=$2, 4=$4\n");
+       }
+       elsif( /^Exon/ ) {
+	   my ($name,$len,$score,$qstart,$qend,$qseqname,
+	       $hstart,$hend, $hseqname) = split;
+	   $lasthseqname = $hseqname;
+	   my $query = new Bio::SeqFeature::Similarity(-primary => $name,
+						       -source  => $self->analysis_method,
+						       -seq_id => $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE
+						       -start   => $qstart,
+						       -end     => $qend,
+						       -strand  => $qstrand,
+						       -score   => $score,
+						       -tag => {
+#							   'Location' => "$hstart..$hend",
+							   'Sequence' => "$hseqname",
+							   }
+						       );
+	   my $hit = new Bio::SeqFeature::Similarity(-primary => 'exon_hit',
+						     -source  => $self->analysis_method,
+						     -seq_id => $hseqname,
+						     -start   => $hstart,
+						     -end     => $hend,
+						     -strand  => $hstrand,
+						     -score   => $score,
+						     -tag => {
+#							 'Location' => "$qstart..$qend",
+							 'Sequence' => "$qseqname",
+							
+						     }
+						     );
+	   push @features, new Bio::SeqFeature::SimilarityPair
+	       (-query => $query,
+		-hit   => $hit,
+		-source => $self->analysis_method);
+       } elsif( /^([\-\+\?])(Intron)/) {
+	   my ($name,$len,$score,$qstart,$qend,$qseqname) = split;
+	   push @features, new Bio::SeqFeature::Generic(-primary => $2,
+							-source => $self->analysis_method,
+							-start => $qstart,
+							-end   => $qend,
+							-strand => $qstrand,
+							-score  => $score,
+							-seq_id => $qseqname,
+							-tag => {
+							    'Sequence' => $lasthseqname});
+       } elsif( /^Span/ ) {
+       } elsif( /^Segment/ ) {
+	   $seensegment = 1;
+       } elsif( /^\s+$/ ) { # do nothing
+       } else {
+	   $self->warn( "unknown line $_\n");
+       }
+   }
+   return undef unless( @features );
+   return wantarray ? @features : \@features;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none
+
+=cut
+
+sub next_feature {
+    my ($self) = shift;
+    $self->throw("We haven't really done this right, yet, use parse_next_gene");
+}
+
+
+1;