Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Tools/Genewise.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Tools/Genewise.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,294 @@ +# $Id: Genewise.pm,v 1.10 2002/12/18 01:54:51 jason Exp $ +# +# BioPerl module for Bio::Tools::Genewise +# +# Copyright Fugu Team +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Genewise - Results of one Genewise run + +=head1 SYNOPSIS + + use Bio::Tools::Genewise; + my $gw = Bio::Tools::Genewise(-file=>"genewise.out"); + + while (my $gene = $gw->next_prediction){ + my @transcripts = $gene->transcripts; + foreach my $t(@transcripts){ + my @exons = $t->exons; + foreach my $e(@exons){ + print $e->start." ".$e->end."\n"; + } + } + } + +=head1 DESCRIPTION + +This is the parser for the output of Genewise. It takes either a file handle or +a file name and returns a Bio::SeqFeature::Gene::GeneStructure object. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Fugu Team + + Email: fugui@worf.fugu-sg.org + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Tools::Genewise; +use vars qw(@ISA $Srctag); +use strict; +use Symbol; + +use Bio::Root::Root; +use Bio::Root::IO; +use Bio::Tools::AnalysisResult; +use Bio::SeqFeature::Generic; +use Bio::SeqFeature::Gene::Exon; +use Bio::SeqFeature::FeaturePair; +use Bio::SeqFeature::Gene::Transcript; +use Bio::SeqFeature::Gene::GeneStructure; + +@ISA = qw(Bio::Root::Root Bio::Root::IO); +$Srctag = 'genewise'; + +=head2 new + + Title : new + Usage : $obj->new(-file=>"genewise.out"); + $obj->new(-fh=>\*GW); + Function: Constructor for genewise wrapper. Takes either a file or filehandle + Example : + Returns : L<Bio::Tools::Genewise> + +=cut + +sub new { + my($class,@args) = @_; + my $self = $class->SUPER::new(@args); + $self->_initialize_io(@args); + return $self; +} + +=head2 _get_strand + + Title : _get_strand + Usage : $obj->_get_strand + Function: takes start and end values, swap them if start>end and returns end + Example : + Returns :$start,$end,$strand + +=cut + +sub _get_strand { + my ($self,$start,$end) = @_; + $start || $self->throw("Need a start"); + $end || $self->throw("Need an end"); + my $strand; + if ($start > $end) { + my $tmp = $start; + $start = $end; + $end = $tmp; + $strand = -1; + } + else { + $strand = 1; + } + return ($start,$end,$strand); +} + +=head2 score + + Title : score + Usage : $obj->score + Function: get/set for score info + Example : + Returns : a score value + +=cut + +sub _score { + my ($self,$val) = @_; + if($val){ + $self->{'_score'} = $val; + } + return $self->{'_score'}; +} + +=head2 _prot_id + + Title : _prot_id + Usage : $obj->_prot_id + Function: get/set for protein id + Example : + Returns :a protein id + +=cut + +sub _prot_id { + my ($self,$val) = @_; + if($val){ + $self->{'_prot_id'} = $val; + } + return $self->{'_prot_id'}; +} + +=head2 _target_id + + Title : _target_id + Usage : $obj->_target_id + Function: get/set for genomic sequence id + Example : + Returns :a target id + +=cut + +sub _target_id { + my ($self,$val) = @_; + if($val){ + $self->{'_target_id'} = $val; + } + return $self->{'_target_id'}; +} + + +=head2 next_prediction + + Title : next_prediction + Usage : while($gene = $genewise->next_prediction()) { + # do something + } + Function: Returns the gene structure prediction of the Genewise result + file. Call this method repeatedly until FALSE is returned. + + Example : + Returns : a Bio::SeqFeature::Gene::GeneStructure object + Args : + +=cut + + +sub next_prediction { + my ($self) = @_; + + my $genes = new Bio::SeqFeature::Gene::GeneStructure(-source => $Srctag); + my $transcript = new Bio::SeqFeature::Gene::Transcript(-source => $Srctag); + + local ($/) = "//"; + my $score; + my $prot_id; + my $target_id; + while ( defined($_ = $self->_readline) ) { + $self->debug( $_ ) if( $self->verbose > 0); + ($score) = $_=~m/Score\s+(\d+[\.][\d]+)/; + $self->_score($score) unless defined $self->_score; + ($prot_id) = $_=~m/Query protein:\s+(\S+)/; + $self->_prot_id($prot_id) unless defined $self->_prot_id; + ($target_id) = $_=~m/Target Sequence\s+(\S+)/; + $self->_target_id($target_id) unless defined $self->_target_id; + next unless /Gene\s+\d+\n/; + + #grab exon + supporting feature info + my @exons; + + unless ( @exons = $_ =~ m/(Exon .+\s+Supporting .+)/g ) { + @exons = $_ =~ m/(Exon .+\s+)/g; + + } + my $nbr = 1; + + #loop through each exon-supporting feature pair + foreach my $e (@exons){ + my ($e_start,$e_end,$phase) = $e =~ m/Exon\s+(\d+)\s+(\d+)\s+phase\s+(\d+)/; + my $e_strand; + ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,$e_end); + $transcript->strand($e_strand) unless $transcript->strand != 0; + + my $exon = new Bio::SeqFeature::Gene::Exon + (-seq_id=>$self->_target_id, + -source => $Srctag, + -start=>$e_start, + -end=>$e_end, + #-frame => $phase, + -strand=>$e_strand); + $exon->add_tag_value('phase',$phase); + if( $self->_prot_id ) { + $exon->add_tag_value('Sequence',"Protein:".$self->_prot_id); + } + $exon->add_tag_value("Exon",$nbr++); + + if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { + my ($geno_start,$geno_end, + $prot_start, + $prot_end) = ($1,$2,$3,$4); + + my $prot_strand; + ($prot_start,$prot_end, + $prot_strand) = $self->_get_strand($prot_start,$prot_end); + + my $pf = new Bio::SeqFeature::Generic + ( -start => $prot_start, + -end => $prot_end, + -seq_id => $self->_prot_id, + -score => $self->_score, + -strand => $prot_strand, + -source => $Srctag, + -primary=> 'supporting_protein_feature', + ); + my $geno_strand; + ($geno_start,$geno_end, + $geno_strand) = $self->_get_strand($geno_start,$geno_end); + my $gf = new Bio::SeqFeature::Generic + ( -start => $geno_start, + -end => $geno_end, + -seq_id => $self->_target_id, + -score => $self->_score, + -strand => $geno_strand, + -source => $Srctag, + -primary => 'supporting_genomic_feature', + ); + my $fp = new Bio::SeqFeature::FeaturePair(-feature1=>$gf, + -feature2=>$pf); + + $exon->add_tag_value( 'supporting_feature' => $fp ); + } + $transcript->add_exon($exon); + } + $transcript->seq_id($self->_target_id); + $genes->add_transcript($transcript); + $genes->seq_id($self->_target_id); + return $genes; + } +} +1;