Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/Genewise.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Genewise.pm,v 1.10 2002/12/18 01:54:51 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Tools::Genewise | |
| 4 # | |
| 5 # Copyright Fugu Team | |
| 6 # | |
| 7 # You may distribute this module under the same terms as perl itself | |
| 8 | |
| 9 # POD documentation - main docs before the code | |
| 10 | |
| 11 =head1 NAME | |
| 12 | |
| 13 Bio::Tools::Genewise - Results of one Genewise run | |
| 14 | |
| 15 =head1 SYNOPSIS | |
| 16 | |
| 17 use Bio::Tools::Genewise; | |
| 18 my $gw = Bio::Tools::Genewise(-file=>"genewise.out"); | |
| 19 | |
| 20 while (my $gene = $gw->next_prediction){ | |
| 21 my @transcripts = $gene->transcripts; | |
| 22 foreach my $t(@transcripts){ | |
| 23 my @exons = $t->exons; | |
| 24 foreach my $e(@exons){ | |
| 25 print $e->start." ".$e->end."\n"; | |
| 26 } | |
| 27 } | |
| 28 } | |
| 29 | |
| 30 =head1 DESCRIPTION | |
| 31 | |
| 32 This is the parser for the output of Genewise. It takes either a file handle or | |
| 33 a file name and returns a Bio::SeqFeature::Gene::GeneStructure object. | |
| 34 | |
| 35 =head1 FEEDBACK | |
| 36 | |
| 37 =head2 Mailing Lists | |
| 38 | |
| 39 User feedback is an integral part of the evolution of this and other | |
| 40 Bioperl modules. Send your comments and suggestions preferably to one | |
| 41 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 42 | |
| 43 bioperl-l@bioperl.org - General discussion | |
| 44 http://bio.perl.org/MailList.html - About the mailing lists | |
| 45 | |
| 46 =head2 Reporting Bugs | |
| 47 | |
| 48 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 49 the bugs and their resolution. Bug reports can be submitted via email | |
| 50 or the web: | |
| 51 | |
| 52 bioperl-bugs@bio.perl.org | |
| 53 http://bugzilla.bioperl.org/ | |
| 54 | |
| 55 =head1 AUTHOR - Fugu Team | |
| 56 | |
| 57 Email: fugui@worf.fugu-sg.org | |
| 58 | |
| 59 =head1 APPENDIX | |
| 60 | |
| 61 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
| 62 | |
| 63 =cut | |
| 64 | |
| 65 | |
| 66 # Let the code begin... | |
| 67 | |
| 68 | |
| 69 package Bio::Tools::Genewise; | |
| 70 use vars qw(@ISA $Srctag); | |
| 71 use strict; | |
| 72 use Symbol; | |
| 73 | |
| 74 use Bio::Root::Root; | |
| 75 use Bio::Root::IO; | |
| 76 use Bio::Tools::AnalysisResult; | |
| 77 use Bio::SeqFeature::Generic; | |
| 78 use Bio::SeqFeature::Gene::Exon; | |
| 79 use Bio::SeqFeature::FeaturePair; | |
| 80 use Bio::SeqFeature::Gene::Transcript; | |
| 81 use Bio::SeqFeature::Gene::GeneStructure; | |
| 82 | |
| 83 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
| 84 $Srctag = 'genewise'; | |
| 85 | |
| 86 =head2 new | |
| 87 | |
| 88 Title : new | |
| 89 Usage : $obj->new(-file=>"genewise.out"); | |
| 90 $obj->new(-fh=>\*GW); | |
| 91 Function: Constructor for genewise wrapper. Takes either a file or filehandle | |
| 92 Example : | |
| 93 Returns : L<Bio::Tools::Genewise> | |
| 94 | |
| 95 =cut | |
| 96 | |
| 97 sub new { | |
| 98 my($class,@args) = @_; | |
| 99 my $self = $class->SUPER::new(@args); | |
| 100 $self->_initialize_io(@args); | |
| 101 return $self; | |
| 102 } | |
| 103 | |
| 104 =head2 _get_strand | |
| 105 | |
| 106 Title : _get_strand | |
| 107 Usage : $obj->_get_strand | |
| 108 Function: takes start and end values, swap them if start>end and returns end | |
| 109 Example : | |
| 110 Returns :$start,$end,$strand | |
| 111 | |
| 112 =cut | |
| 113 | |
| 114 sub _get_strand { | |
| 115 my ($self,$start,$end) = @_; | |
| 116 $start || $self->throw("Need a start"); | |
| 117 $end || $self->throw("Need an end"); | |
| 118 my $strand; | |
| 119 if ($start > $end) { | |
| 120 my $tmp = $start; | |
| 121 $start = $end; | |
| 122 $end = $tmp; | |
| 123 $strand = -1; | |
| 124 } | |
| 125 else { | |
| 126 $strand = 1; | |
| 127 } | |
| 128 return ($start,$end,$strand); | |
| 129 } | |
| 130 | |
| 131 =head2 score | |
| 132 | |
| 133 Title : score | |
| 134 Usage : $obj->score | |
| 135 Function: get/set for score info | |
| 136 Example : | |
| 137 Returns : a score value | |
| 138 | |
| 139 =cut | |
| 140 | |
| 141 sub _score { | |
| 142 my ($self,$val) = @_; | |
| 143 if($val){ | |
| 144 $self->{'_score'} = $val; | |
| 145 } | |
| 146 return $self->{'_score'}; | |
| 147 } | |
| 148 | |
| 149 =head2 _prot_id | |
| 150 | |
| 151 Title : _prot_id | |
| 152 Usage : $obj->_prot_id | |
| 153 Function: get/set for protein id | |
| 154 Example : | |
| 155 Returns :a protein id | |
| 156 | |
| 157 =cut | |
| 158 | |
| 159 sub _prot_id { | |
| 160 my ($self,$val) = @_; | |
| 161 if($val){ | |
| 162 $self->{'_prot_id'} = $val; | |
| 163 } | |
| 164 return $self->{'_prot_id'}; | |
| 165 } | |
| 166 | |
| 167 =head2 _target_id | |
| 168 | |
| 169 Title : _target_id | |
| 170 Usage : $obj->_target_id | |
| 171 Function: get/set for genomic sequence id | |
| 172 Example : | |
| 173 Returns :a target id | |
| 174 | |
| 175 =cut | |
| 176 | |
| 177 sub _target_id { | |
| 178 my ($self,$val) = @_; | |
| 179 if($val){ | |
| 180 $self->{'_target_id'} = $val; | |
| 181 } | |
| 182 return $self->{'_target_id'}; | |
| 183 } | |
| 184 | |
| 185 | |
| 186 =head2 next_prediction | |
| 187 | |
| 188 Title : next_prediction | |
| 189 Usage : while($gene = $genewise->next_prediction()) { | |
| 190 # do something | |
| 191 } | |
| 192 Function: Returns the gene structure prediction of the Genewise result | |
| 193 file. Call this method repeatedly until FALSE is returned. | |
| 194 | |
| 195 Example : | |
| 196 Returns : a Bio::SeqFeature::Gene::GeneStructure object | |
| 197 Args : | |
| 198 | |
| 199 =cut | |
| 200 | |
| 201 | |
| 202 sub next_prediction { | |
| 203 my ($self) = @_; | |
| 204 | |
| 205 my $genes = new Bio::SeqFeature::Gene::GeneStructure(-source => $Srctag); | |
| 206 my $transcript = new Bio::SeqFeature::Gene::Transcript(-source => $Srctag); | |
| 207 | |
| 208 local ($/) = "//"; | |
| 209 my $score; | |
| 210 my $prot_id; | |
| 211 my $target_id; | |
| 212 while ( defined($_ = $self->_readline) ) { | |
| 213 $self->debug( $_ ) if( $self->verbose > 0); | |
| 214 ($score) = $_=~m/Score\s+(\d+[\.][\d]+)/; | |
| 215 $self->_score($score) unless defined $self->_score; | |
| 216 ($prot_id) = $_=~m/Query protein:\s+(\S+)/; | |
| 217 $self->_prot_id($prot_id) unless defined $self->_prot_id; | |
| 218 ($target_id) = $_=~m/Target Sequence\s+(\S+)/; | |
| 219 $self->_target_id($target_id) unless defined $self->_target_id; | |
| 220 next unless /Gene\s+\d+\n/; | |
| 221 | |
| 222 #grab exon + supporting feature info | |
| 223 my @exons; | |
| 224 | |
| 225 unless ( @exons = $_ =~ m/(Exon .+\s+Supporting .+)/g ) { | |
| 226 @exons = $_ =~ m/(Exon .+\s+)/g; | |
| 227 | |
| 228 } | |
| 229 my $nbr = 1; | |
| 230 | |
| 231 #loop through each exon-supporting feature pair | |
| 232 foreach my $e (@exons){ | |
| 233 my ($e_start,$e_end,$phase) = $e =~ m/Exon\s+(\d+)\s+(\d+)\s+phase\s+(\d+)/; | |
| 234 my $e_strand; | |
| 235 ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,$e_end); | |
| 236 $transcript->strand($e_strand) unless $transcript->strand != 0; | |
| 237 | |
| 238 my $exon = new Bio::SeqFeature::Gene::Exon | |
| 239 (-seq_id=>$self->_target_id, | |
| 240 -source => $Srctag, | |
| 241 -start=>$e_start, | |
| 242 -end=>$e_end, | |
| 243 #-frame => $phase, | |
| 244 -strand=>$e_strand); | |
| 245 $exon->add_tag_value('phase',$phase); | |
| 246 if( $self->_prot_id ) { | |
| 247 $exon->add_tag_value('Sequence',"Protein:".$self->_prot_id); | |
| 248 } | |
| 249 $exon->add_tag_value("Exon",$nbr++); | |
| 250 | |
| 251 if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { | |
| 252 my ($geno_start,$geno_end, | |
| 253 $prot_start, | |
| 254 $prot_end) = ($1,$2,$3,$4); | |
| 255 | |
| 256 my $prot_strand; | |
| 257 ($prot_start,$prot_end, | |
| 258 $prot_strand) = $self->_get_strand($prot_start,$prot_end); | |
| 259 | |
| 260 my $pf = new Bio::SeqFeature::Generic | |
| 261 ( -start => $prot_start, | |
| 262 -end => $prot_end, | |
| 263 -seq_id => $self->_prot_id, | |
| 264 -score => $self->_score, | |
| 265 -strand => $prot_strand, | |
| 266 -source => $Srctag, | |
| 267 -primary=> 'supporting_protein_feature', | |
| 268 ); | |
| 269 my $geno_strand; | |
| 270 ($geno_start,$geno_end, | |
| 271 $geno_strand) = $self->_get_strand($geno_start,$geno_end); | |
| 272 my $gf = new Bio::SeqFeature::Generic | |
| 273 ( -start => $geno_start, | |
| 274 -end => $geno_end, | |
| 275 -seq_id => $self->_target_id, | |
| 276 -score => $self->_score, | |
| 277 -strand => $geno_strand, | |
| 278 -source => $Srctag, | |
| 279 -primary => 'supporting_genomic_feature', | |
| 280 ); | |
| 281 my $fp = new Bio::SeqFeature::FeaturePair(-feature1=>$gf, | |
| 282 -feature2=>$pf); | |
| 283 | |
| 284 $exon->add_tag_value( 'supporting_feature' => $fp ); | |
| 285 } | |
| 286 $transcript->add_exon($exon); | |
| 287 } | |
| 288 $transcript->seq_id($self->_target_id); | |
| 289 $genes->add_transcript($transcript); | |
| 290 $genes->seq_id($self->_target_id); | |
| 291 return $genes; | |
| 292 } | |
| 293 } | |
| 294 1; |
