Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Tools/Genomewise.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Tools/Genomewise.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,219 @@ +# $Id: Genomewise.pm,v 1.1.2.1 2003/03/25 12:32:16 heikki Exp $ +# +# BioPerl module for Bio::Tools::Genomewise +# +# Copyright Jason Stajich <jason@bioperl.org> +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Genomewise - Results of one Genomewise run + +=head1 SYNOPSIS + + use Bio::Tools::Genomewise; + my $gw = Bio::Tools::Genomewise(-file=>"genomewise.out"); + + while (my $gene = $gw->next_prediction){ + my @transcripts = $gw->transcripts; + foreach my $t(@transcripts){ + my @exons = $t->exons; + foreach my $e(@exons){ + print $e->start." ".$e->end."\n"; + } + } + } + +=head1 DESCRIPTION + +This is the parser for the output of Genewise. It takes either a file +handle or a file name and returns a +Bio::SeqFeature::Gene::GeneStructure object. You will need to specify +the proper target sequence id on the object with the +$feature-E<gt>seq_id($seqid). + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Fugu Team + + Email: fugui@worf.fugu-sg.org + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Tools::Genomewise; +use vars qw(@ISA $Srctag); +use strict; + +use Bio::Tools::Genewise; +use Bio::Tools::AnalysisResult; +use Bio::SeqFeature::Generic; +use Bio::SeqFeature::Gene::Exon; +use Bio::SeqFeature::FeaturePair; +use Bio::SeqFeature::Gene::Transcript; +use Bio::SeqFeature::Gene::GeneStructure; + +@ISA = qw(Bio::Tools::Genewise); + +$Srctag = 'genomewise'; + +=head2 new + + Title : new + Usage : $obj->new(-file=>"genewise.out"); + $obj->new(-fh=>\*GW); + Function: Constructor for genomewise wrapper. Takes either a file or filehandle + Example : + Returns : L<Bio::Tools::Genomewise> + +=cut + +sub new { + my($class,@args) = @_; + my $self = $class->SUPER::new(@args); + return $self; +} + +=head2 _get_strand + + Title : _get_strand + Usage : $obj->_get_strand + Function: takes start and end values, swap them if start>end and returns end + Example : + Returns :$start,$end,$strand + +=cut + +=head2 score + + Title : score + Usage : $obj->score + Function: get/set for score info + Example : + Returns : a score value + +=cut + +=head2 _prot_id + + Title : _prot_id + Usage : $obj->_prot_id + Function: get/set for protein id + Example : + Returns :a protein id + +=cut + +=head2 _target_id + + Title : _target_id + Usage : $obj->_target_id + Function: get/set for genomic sequence id + Example : + Returns :a target id + +=cut + + +=head2 next_prediction + + Title : next_prediction + Usage : while($gene = $genewise->next_prediction()) { + # do something + } + Function: Returns the gene structure prediction of the Genomewise result + file. Call this method repeatedly until FALSE is returned. + + Example : + Returns : a Bio::SeqFeature::Gene::GeneStructure object + Args : + +=cut + + +sub next_prediction { + my ($self) = @_; + + my $genes; + while ($_ = $self->_readline) { + $self->debug( $_ ) if( $self->verbose > 0); + last if( /^\/\//); + + if( /^Gene\s+\d+\s*$/ ) { + $genes = new Bio::SeqFeature::Gene::GeneStructure + (-source => $Srctag, + -seq_id => $self->_target_id, # if this had been specified + ); + $_ = $self->_readline; + $self->debug( $_ ) if( $self->verbose > 0); + + unless ( /^Gene\s+(\d+)\s+(\d+)\s*$/ ) { + $self->warn("Unparseable genomewise output"); + last; + } + my $transcript = new Bio::SeqFeature::Gene::Transcript + (-source => $Srctag, + -seq_id => $self->_target_id, # if this had been specified + -start => $1, + -end => $2, + ); + my $nbr = 1; + while( $_ = $self->_readline ) { + $self->debug( $_ ) if( $self->verbose > 0); + + unless( m/^\s+Exon\s+(\d+)\s+(\d+)\s+phase\s+(\d+)/ ){ + $self->_pushback($_); + last; + } + my ($e_start,$e_end,$phase,$e_strand) = ($1,$2,$3); + + ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start, + $e_end); + $transcript->strand($e_strand) unless $transcript->strand != 0; + + my $exon = new Bio::SeqFeature::Gene::Exon + (-seq_id=>$self->_target_id, + -source => $Srctag, + -start=>$e_start, + -end=>$e_end, + -frame => $phase, + -strand=>$e_strand); + $exon->add_tag_value("Exon",$nbr++); + $exon->add_tag_value('phase',$phase); + $transcript->add_exon($exon); + } + $genes->add_transcript($transcript); + last; # only process a single gene at a time + } + } + return $genes; +} +1;