0
|
1 # $Id: Genewise.pm,v 1.10 2002/12/18 01:54:51 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Tools::Genewise
|
|
4 #
|
|
5 # Copyright Fugu Team
|
|
6 #
|
|
7 # You may distribute this module under the same terms as perl itself
|
|
8
|
|
9 # POD documentation - main docs before the code
|
|
10
|
|
11 =head1 NAME
|
|
12
|
|
13 Bio::Tools::Genewise - Results of one Genewise run
|
|
14
|
|
15 =head1 SYNOPSIS
|
|
16
|
|
17 use Bio::Tools::Genewise;
|
|
18 my $gw = Bio::Tools::Genewise(-file=>"genewise.out");
|
|
19
|
|
20 while (my $gene = $gw->next_prediction){
|
|
21 my @transcripts = $gene->transcripts;
|
|
22 foreach my $t(@transcripts){
|
|
23 my @exons = $t->exons;
|
|
24 foreach my $e(@exons){
|
|
25 print $e->start." ".$e->end."\n";
|
|
26 }
|
|
27 }
|
|
28 }
|
|
29
|
|
30 =head1 DESCRIPTION
|
|
31
|
|
32 This is the parser for the output of Genewise. It takes either a file handle or
|
|
33 a file name and returns a Bio::SeqFeature::Gene::GeneStructure object.
|
|
34
|
|
35 =head1 FEEDBACK
|
|
36
|
|
37 =head2 Mailing Lists
|
|
38
|
|
39 User feedback is an integral part of the evolution of this and other
|
|
40 Bioperl modules. Send your comments and suggestions preferably to one
|
|
41 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
42
|
|
43 bioperl-l@bioperl.org - General discussion
|
|
44 http://bio.perl.org/MailList.html - About the mailing lists
|
|
45
|
|
46 =head2 Reporting Bugs
|
|
47
|
|
48 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
49 the bugs and their resolution. Bug reports can be submitted via email
|
|
50 or the web:
|
|
51
|
|
52 bioperl-bugs@bio.perl.org
|
|
53 http://bugzilla.bioperl.org/
|
|
54
|
|
55 =head1 AUTHOR - Fugu Team
|
|
56
|
|
57 Email: fugui@worf.fugu-sg.org
|
|
58
|
|
59 =head1 APPENDIX
|
|
60
|
|
61 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
62
|
|
63 =cut
|
|
64
|
|
65
|
|
66 # Let the code begin...
|
|
67
|
|
68
|
|
69 package Bio::Tools::Genewise;
|
|
70 use vars qw(@ISA $Srctag);
|
|
71 use strict;
|
|
72 use Symbol;
|
|
73
|
|
74 use Bio::Root::Root;
|
|
75 use Bio::Root::IO;
|
|
76 use Bio::Tools::AnalysisResult;
|
|
77 use Bio::SeqFeature::Generic;
|
|
78 use Bio::SeqFeature::Gene::Exon;
|
|
79 use Bio::SeqFeature::FeaturePair;
|
|
80 use Bio::SeqFeature::Gene::Transcript;
|
|
81 use Bio::SeqFeature::Gene::GeneStructure;
|
|
82
|
|
83 @ISA = qw(Bio::Root::Root Bio::Root::IO);
|
|
84 $Srctag = 'genewise';
|
|
85
|
|
86 =head2 new
|
|
87
|
|
88 Title : new
|
|
89 Usage : $obj->new(-file=>"genewise.out");
|
|
90 $obj->new(-fh=>\*GW);
|
|
91 Function: Constructor for genewise wrapper. Takes either a file or filehandle
|
|
92 Example :
|
|
93 Returns : L<Bio::Tools::Genewise>
|
|
94
|
|
95 =cut
|
|
96
|
|
97 sub new {
|
|
98 my($class,@args) = @_;
|
|
99 my $self = $class->SUPER::new(@args);
|
|
100 $self->_initialize_io(@args);
|
|
101 return $self;
|
|
102 }
|
|
103
|
|
104 =head2 _get_strand
|
|
105
|
|
106 Title : _get_strand
|
|
107 Usage : $obj->_get_strand
|
|
108 Function: takes start and end values, swap them if start>end and returns end
|
|
109 Example :
|
|
110 Returns :$start,$end,$strand
|
|
111
|
|
112 =cut
|
|
113
|
|
114 sub _get_strand {
|
|
115 my ($self,$start,$end) = @_;
|
|
116 $start || $self->throw("Need a start");
|
|
117 $end || $self->throw("Need an end");
|
|
118 my $strand;
|
|
119 if ($start > $end) {
|
|
120 my $tmp = $start;
|
|
121 $start = $end;
|
|
122 $end = $tmp;
|
|
123 $strand = -1;
|
|
124 }
|
|
125 else {
|
|
126 $strand = 1;
|
|
127 }
|
|
128 return ($start,$end,$strand);
|
|
129 }
|
|
130
|
|
131 =head2 score
|
|
132
|
|
133 Title : score
|
|
134 Usage : $obj->score
|
|
135 Function: get/set for score info
|
|
136 Example :
|
|
137 Returns : a score value
|
|
138
|
|
139 =cut
|
|
140
|
|
141 sub _score {
|
|
142 my ($self,$val) = @_;
|
|
143 if($val){
|
|
144 $self->{'_score'} = $val;
|
|
145 }
|
|
146 return $self->{'_score'};
|
|
147 }
|
|
148
|
|
149 =head2 _prot_id
|
|
150
|
|
151 Title : _prot_id
|
|
152 Usage : $obj->_prot_id
|
|
153 Function: get/set for protein id
|
|
154 Example :
|
|
155 Returns :a protein id
|
|
156
|
|
157 =cut
|
|
158
|
|
159 sub _prot_id {
|
|
160 my ($self,$val) = @_;
|
|
161 if($val){
|
|
162 $self->{'_prot_id'} = $val;
|
|
163 }
|
|
164 return $self->{'_prot_id'};
|
|
165 }
|
|
166
|
|
167 =head2 _target_id
|
|
168
|
|
169 Title : _target_id
|
|
170 Usage : $obj->_target_id
|
|
171 Function: get/set for genomic sequence id
|
|
172 Example :
|
|
173 Returns :a target id
|
|
174
|
|
175 =cut
|
|
176
|
|
177 sub _target_id {
|
|
178 my ($self,$val) = @_;
|
|
179 if($val){
|
|
180 $self->{'_target_id'} = $val;
|
|
181 }
|
|
182 return $self->{'_target_id'};
|
|
183 }
|
|
184
|
|
185
|
|
186 =head2 next_prediction
|
|
187
|
|
188 Title : next_prediction
|
|
189 Usage : while($gene = $genewise->next_prediction()) {
|
|
190 # do something
|
|
191 }
|
|
192 Function: Returns the gene structure prediction of the Genewise result
|
|
193 file. Call this method repeatedly until FALSE is returned.
|
|
194
|
|
195 Example :
|
|
196 Returns : a Bio::SeqFeature::Gene::GeneStructure object
|
|
197 Args :
|
|
198
|
|
199 =cut
|
|
200
|
|
201
|
|
202 sub next_prediction {
|
|
203 my ($self) = @_;
|
|
204
|
|
205 my $genes = new Bio::SeqFeature::Gene::GeneStructure(-source => $Srctag);
|
|
206 my $transcript = new Bio::SeqFeature::Gene::Transcript(-source => $Srctag);
|
|
207
|
|
208 local ($/) = "//";
|
|
209 my $score;
|
|
210 my $prot_id;
|
|
211 my $target_id;
|
|
212 while ( defined($_ = $self->_readline) ) {
|
|
213 $self->debug( $_ ) if( $self->verbose > 0);
|
|
214 ($score) = $_=~m/Score\s+(\d+[\.][\d]+)/;
|
|
215 $self->_score($score) unless defined $self->_score;
|
|
216 ($prot_id) = $_=~m/Query protein:\s+(\S+)/;
|
|
217 $self->_prot_id($prot_id) unless defined $self->_prot_id;
|
|
218 ($target_id) = $_=~m/Target Sequence\s+(\S+)/;
|
|
219 $self->_target_id($target_id) unless defined $self->_target_id;
|
|
220 next unless /Gene\s+\d+\n/;
|
|
221
|
|
222 #grab exon + supporting feature info
|
|
223 my @exons;
|
|
224
|
|
225 unless ( @exons = $_ =~ m/(Exon .+\s+Supporting .+)/g ) {
|
|
226 @exons = $_ =~ m/(Exon .+\s+)/g;
|
|
227
|
|
228 }
|
|
229 my $nbr = 1;
|
|
230
|
|
231 #loop through each exon-supporting feature pair
|
|
232 foreach my $e (@exons){
|
|
233 my ($e_start,$e_end,$phase) = $e =~ m/Exon\s+(\d+)\s+(\d+)\s+phase\s+(\d+)/;
|
|
234 my $e_strand;
|
|
235 ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,$e_end);
|
|
236 $transcript->strand($e_strand) unless $transcript->strand != 0;
|
|
237
|
|
238 my $exon = new Bio::SeqFeature::Gene::Exon
|
|
239 (-seq_id=>$self->_target_id,
|
|
240 -source => $Srctag,
|
|
241 -start=>$e_start,
|
|
242 -end=>$e_end,
|
|
243 #-frame => $phase,
|
|
244 -strand=>$e_strand);
|
|
245 $exon->add_tag_value('phase',$phase);
|
|
246 if( $self->_prot_id ) {
|
|
247 $exon->add_tag_value('Sequence',"Protein:".$self->_prot_id);
|
|
248 }
|
|
249 $exon->add_tag_value("Exon",$nbr++);
|
|
250
|
|
251 if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
|
|
252 my ($geno_start,$geno_end,
|
|
253 $prot_start,
|
|
254 $prot_end) = ($1,$2,$3,$4);
|
|
255
|
|
256 my $prot_strand;
|
|
257 ($prot_start,$prot_end,
|
|
258 $prot_strand) = $self->_get_strand($prot_start,$prot_end);
|
|
259
|
|
260 my $pf = new Bio::SeqFeature::Generic
|
|
261 ( -start => $prot_start,
|
|
262 -end => $prot_end,
|
|
263 -seq_id => $self->_prot_id,
|
|
264 -score => $self->_score,
|
|
265 -strand => $prot_strand,
|
|
266 -source => $Srctag,
|
|
267 -primary=> 'supporting_protein_feature',
|
|
268 );
|
|
269 my $geno_strand;
|
|
270 ($geno_start,$geno_end,
|
|
271 $geno_strand) = $self->_get_strand($geno_start,$geno_end);
|
|
272 my $gf = new Bio::SeqFeature::Generic
|
|
273 ( -start => $geno_start,
|
|
274 -end => $geno_end,
|
|
275 -seq_id => $self->_target_id,
|
|
276 -score => $self->_score,
|
|
277 -strand => $geno_strand,
|
|
278 -source => $Srctag,
|
|
279 -primary => 'supporting_genomic_feature',
|
|
280 );
|
|
281 my $fp = new Bio::SeqFeature::FeaturePair(-feature1=>$gf,
|
|
282 -feature2=>$pf);
|
|
283
|
|
284 $exon->add_tag_value( 'supporting_feature' => $fp );
|
|
285 }
|
|
286 $transcript->add_exon($exon);
|
|
287 }
|
|
288 $transcript->seq_id($self->_target_id);
|
|
289 $genes->add_transcript($transcript);
|
|
290 $genes->seq_id($self->_target_id);
|
|
291 return $genes;
|
|
292 }
|
|
293 }
|
|
294 1;
|