0
|
1 # $Id: Est2Genome.pm,v 1.11 2002/12/05 13:46:36 heikki Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Tools::Est2Genome
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tools::Est2Genome - Parse est2genome output, makes simple Bio::SeqFeature::Generic objects
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Tools::Est2Genome;
|
|
20
|
|
21 my $featureiter = new Bio::Tools::Est2Genome(-file => 'output.est2genome');
|
|
22
|
|
23 # This is going to be fixed to use the SeqAnalysisI next_feature
|
|
24 # Method eventually when we have the objects to put the data in
|
|
25 # properly
|
|
26 while( my $f = $featureiter->parse_next_gene ) {
|
|
27 # process Bio::SeqFeature::Generic objects here
|
|
28 }
|
|
29
|
|
30 =head1 DESCRIPTION
|
|
31
|
|
32 This module is a parser for est2genome [EMBOSS] alignments of est/cdna
|
|
33 sequence to genomic DNA. This is generally accepted as the best
|
|
34 program for predicting splice sites based on est/cdnas*.
|
|
35
|
|
36 This module currently does not try pull out the ungapped alignments
|
|
37 (Segment) but may in the future.
|
|
38
|
|
39
|
|
40 * AFAIK
|
|
41
|
|
42 =head1 FEEDBACK
|
|
43
|
|
44 =head2 Mailing Lists
|
|
45
|
|
46 User feedback is an integral part of the evolution of this and other
|
|
47 Bioperl modules. Send your comments and suggestions preferably to
|
|
48 the Bioperl mailing list. Your participation is much appreciated.
|
|
49
|
|
50 bioperl-l@bioperl.org - General discussion
|
|
51 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
52
|
|
53 =head2 Reporting Bugs
|
|
54
|
|
55 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
56 of the bugs and their resolution. Bug reports can be submitted via
|
|
57 email or the web:
|
|
58
|
|
59 bioperl-bugs@bioperl.org
|
|
60 http://bugzilla.bioperl.org/
|
|
61
|
|
62 =head1 AUTHOR - Jason Stajich
|
|
63
|
|
64 Email jason@bioperl.org
|
|
65
|
|
66 Describe contact details here
|
|
67
|
|
68 =head1 CONTRIBUTORS
|
|
69
|
|
70 Additional contributors names and emails here
|
|
71
|
|
72 =head1 APPENDIX
|
|
73
|
|
74 The rest of the documentation details each of the object methods.
|
|
75 Internal methods are usually preceded with a _
|
|
76
|
|
77 =cut
|
|
78
|
|
79
|
|
80 # Let the code begin...
|
|
81
|
|
82
|
|
83 package Bio::Tools::Est2Genome;
|
|
84 use vars qw(@ISA);
|
|
85 use strict;
|
|
86
|
|
87 # Object preamble - inherits from Bio::Root::Root
|
|
88
|
|
89 use Bio::Root::Root;
|
|
90 use Bio::Tools::AnalysisResult;
|
|
91 use Bio::SeqFeature::Gene::Exon;
|
|
92 use Bio::SeqFeature::Gene::Intron;
|
|
93 use Bio::SeqFeature::Gene::GeneStructure;
|
|
94 use Bio::SeqFeature::SimilarityPair;
|
|
95
|
|
96 @ISA = qw(Bio::Tools::AnalysisResult );
|
|
97
|
|
98 =head2 new
|
|
99
|
|
100 Title : new
|
|
101 Usage : my $obj = new Bio::Tools::Est2Genome();
|
|
102 Function: Builds a new Bio::Tools::Est2Genome object
|
|
103 Returns : an instance of Bio::Tools::Est2Genome
|
|
104 Args : -file => 'output.est2genome' or
|
|
105 -fh => \*EST2GENOMEOUTPUT
|
|
106 -genomefirst => 1 # genome was the first input (not standard)
|
|
107
|
|
108 =cut
|
|
109
|
|
110 sub _initialize_state {
|
|
111 my($self,@args) = @_;
|
|
112
|
|
113 # call the inherited method first
|
|
114 my $make = $self->SUPER::_initialize_state(@args);
|
|
115
|
|
116 my ($genome_is_first) = $self->_rearrange([qw(GENOMEFIRST)], @args);
|
|
117
|
|
118 delete($self->{'_genome_is_first'});
|
|
119 $self->{'_genome_is_first'} = $genome_is_first if(defined($genome_is_first));
|
|
120 $self->analysis_method("est2genome");
|
|
121 }
|
|
122
|
|
123 =head2 analysis_method
|
|
124
|
|
125 Usage : $sim4->analysis_method();
|
|
126 Purpose : Inherited method. Overridden to ensure that the name matches
|
|
127 /est2genome/i.
|
|
128 Returns : String
|
|
129 Argument : n/a
|
|
130
|
|
131 =cut
|
|
132
|
|
133 #-------------
|
|
134 sub analysis_method {
|
|
135 #-------------
|
|
136 my ($self, $method) = @_;
|
|
137 if($method && ($method !~ /est2genome/i)) {
|
|
138 $self->throw("method $method not supported in " . ref($self));
|
|
139 }
|
|
140 return $self->SUPER::analysis_method($method);
|
|
141 }
|
|
142
|
|
143 =head2 parse_next_gene
|
|
144
|
|
145 Title : parse_next_gene
|
|
146 Usage : @gene = $est2genome_result->parse_next_gene;
|
|
147 foreach $exon (@exons) {
|
|
148 # do something
|
|
149 }
|
|
150
|
|
151 Function: Parses the next alignments of the est2genome result file and
|
|
152 returns the found exons as an array of
|
|
153 Bio::SeqFeature::SimilarityPair objects. Call
|
|
154 this method repeatedly until an empty array is returned to get the
|
|
155 results for all alignments.
|
|
156
|
|
157 The $exon->seq_id() attribute will be set to the identifier of the
|
|
158 respective sequence for both sequences.
|
|
159 The length is accessible via the seqlength()
|
|
160 attribute of $exon->query() and
|
|
161 $exon->est_hit().
|
|
162 Returns : An array (or array reference) of Bio::SeqFeature::SimilarityPair and Bio::SeqFeature::Generic objects
|
|
163 Args : none
|
|
164
|
|
165
|
|
166 =cut
|
|
167
|
|
168 sub parse_next_gene {
|
|
169 my ($self) = @_;
|
|
170 my $seensegment = 0;
|
|
171 my @features;
|
|
172 my ($qstrand,$hstrand) = (1,1);
|
|
173 my $lasthseqname;
|
|
174 while( defined($_ = $self->_readline) ) {
|
|
175 if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) {
|
|
176 if( $seensegment ) {
|
|
177 $self->_pushback($_);
|
|
178 return wantarray ? @features : \@features;
|
|
179 }
|
|
180 $hstrand = -1 if $1 eq 'reversed';
|
|
181 $qstrand = -1 if $4 eq 'REVERSED GENE';
|
|
182 $self->debug( "1=$1, 2=$2, 4=$4\n");
|
|
183 }
|
|
184 elsif( /^Exon/ ) {
|
|
185 my ($name,$len,$score,$qstart,$qend,$qseqname,
|
|
186 $hstart,$hend, $hseqname) = split;
|
|
187 $lasthseqname = $hseqname;
|
|
188 my $query = new Bio::SeqFeature::Similarity(-primary => $name,
|
|
189 -source => $self->analysis_method,
|
|
190 -seq_id => $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE
|
|
191 -start => $qstart,
|
|
192 -end => $qend,
|
|
193 -strand => $qstrand,
|
|
194 -score => $score,
|
|
195 -tag => {
|
|
196 # 'Location' => "$hstart..$hend",
|
|
197 'Sequence' => "$hseqname",
|
|
198 }
|
|
199 );
|
|
200 my $hit = new Bio::SeqFeature::Similarity(-primary => 'exon_hit',
|
|
201 -source => $self->analysis_method,
|
|
202 -seq_id => $hseqname,
|
|
203 -start => $hstart,
|
|
204 -end => $hend,
|
|
205 -strand => $hstrand,
|
|
206 -score => $score,
|
|
207 -tag => {
|
|
208 # 'Location' => "$qstart..$qend",
|
|
209 'Sequence' => "$qseqname",
|
|
210
|
|
211 }
|
|
212 );
|
|
213 push @features, new Bio::SeqFeature::SimilarityPair
|
|
214 (-query => $query,
|
|
215 -hit => $hit,
|
|
216 -source => $self->analysis_method);
|
|
217 } elsif( /^([\-\+\?])(Intron)/) {
|
|
218 my ($name,$len,$score,$qstart,$qend,$qseqname) = split;
|
|
219 push @features, new Bio::SeqFeature::Generic(-primary => $2,
|
|
220 -source => $self->analysis_method,
|
|
221 -start => $qstart,
|
|
222 -end => $qend,
|
|
223 -strand => $qstrand,
|
|
224 -score => $score,
|
|
225 -seq_id => $qseqname,
|
|
226 -tag => {
|
|
227 'Sequence' => $lasthseqname});
|
|
228 } elsif( /^Span/ ) {
|
|
229 } elsif( /^Segment/ ) {
|
|
230 $seensegment = 1;
|
|
231 } elsif( /^\s+$/ ) { # do nothing
|
|
232 } else {
|
|
233 $self->warn( "unknown line $_\n");
|
|
234 }
|
|
235 }
|
|
236 return undef unless( @features );
|
|
237 return wantarray ? @features : \@features;
|
|
238 }
|
|
239
|
|
240 =head2 next_feature
|
|
241
|
|
242 Title : next_feature
|
|
243 Usage : $seqfeature = $obj->next_feature();
|
|
244 Function: Returns the next feature available in the analysis result, or
|
|
245 undef if there are no more features.
|
|
246 Example :
|
|
247 Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
|
|
248 more features.
|
|
249 Args : none
|
|
250
|
|
251 =cut
|
|
252
|
|
253 sub next_feature {
|
|
254 my ($self) = shift;
|
|
255 $self->throw("We haven't really done this right, yet, use parse_next_gene");
|
|
256 }
|
|
257
|
|
258
|
|
259 1;
|