Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Tools/Est2Genome.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: Est2Genome.pm,v 1.11 2002/12/05 13:46:36 heikki Exp $ | |
2 # | |
3 # BioPerl module for Bio::Tools::Est2Genome | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Tools::Est2Genome - Parse est2genome output, makes simple Bio::SeqFeature::Generic objects | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Tools::Est2Genome; | |
20 | |
21 my $featureiter = new Bio::Tools::Est2Genome(-file => 'output.est2genome'); | |
22 | |
23 # This is going to be fixed to use the SeqAnalysisI next_feature | |
24 # Method eventually when we have the objects to put the data in | |
25 # properly | |
26 while( my $f = $featureiter->parse_next_gene ) { | |
27 # process Bio::SeqFeature::Generic objects here | |
28 } | |
29 | |
30 =head1 DESCRIPTION | |
31 | |
32 This module is a parser for est2genome [EMBOSS] alignments of est/cdna | |
33 sequence to genomic DNA. This is generally accepted as the best | |
34 program for predicting splice sites based on est/cdnas*. | |
35 | |
36 This module currently does not try pull out the ungapped alignments | |
37 (Segment) but may in the future. | |
38 | |
39 | |
40 * AFAIK | |
41 | |
42 =head1 FEEDBACK | |
43 | |
44 =head2 Mailing Lists | |
45 | |
46 User feedback is an integral part of the evolution of this and other | |
47 Bioperl modules. Send your comments and suggestions preferably to | |
48 the Bioperl mailing list. Your participation is much appreciated. | |
49 | |
50 bioperl-l@bioperl.org - General discussion | |
51 http://bioperl.org/MailList.shtml - About the mailing lists | |
52 | |
53 =head2 Reporting Bugs | |
54 | |
55 Report bugs to the Bioperl bug tracking system to help us keep track | |
56 of the bugs and their resolution. Bug reports can be submitted via | |
57 email or the web: | |
58 | |
59 bioperl-bugs@bioperl.org | |
60 http://bugzilla.bioperl.org/ | |
61 | |
62 =head1 AUTHOR - Jason Stajich | |
63 | |
64 Email jason@bioperl.org | |
65 | |
66 Describe contact details here | |
67 | |
68 =head1 CONTRIBUTORS | |
69 | |
70 Additional contributors names and emails here | |
71 | |
72 =head1 APPENDIX | |
73 | |
74 The rest of the documentation details each of the object methods. | |
75 Internal methods are usually preceded with a _ | |
76 | |
77 =cut | |
78 | |
79 | |
80 # Let the code begin... | |
81 | |
82 | |
83 package Bio::Tools::Est2Genome; | |
84 use vars qw(@ISA); | |
85 use strict; | |
86 | |
87 # Object preamble - inherits from Bio::Root::Root | |
88 | |
89 use Bio::Root::Root; | |
90 use Bio::Tools::AnalysisResult; | |
91 use Bio::SeqFeature::Gene::Exon; | |
92 use Bio::SeqFeature::Gene::Intron; | |
93 use Bio::SeqFeature::Gene::GeneStructure; | |
94 use Bio::SeqFeature::SimilarityPair; | |
95 | |
96 @ISA = qw(Bio::Tools::AnalysisResult ); | |
97 | |
98 =head2 new | |
99 | |
100 Title : new | |
101 Usage : my $obj = new Bio::Tools::Est2Genome(); | |
102 Function: Builds a new Bio::Tools::Est2Genome object | |
103 Returns : an instance of Bio::Tools::Est2Genome | |
104 Args : -file => 'output.est2genome' or | |
105 -fh => \*EST2GENOMEOUTPUT | |
106 -genomefirst => 1 # genome was the first input (not standard) | |
107 | |
108 =cut | |
109 | |
110 sub _initialize_state { | |
111 my($self,@args) = @_; | |
112 | |
113 # call the inherited method first | |
114 my $make = $self->SUPER::_initialize_state(@args); | |
115 | |
116 my ($genome_is_first) = $self->_rearrange([qw(GENOMEFIRST)], @args); | |
117 | |
118 delete($self->{'_genome_is_first'}); | |
119 $self->{'_genome_is_first'} = $genome_is_first if(defined($genome_is_first)); | |
120 $self->analysis_method("est2genome"); | |
121 } | |
122 | |
123 =head2 analysis_method | |
124 | |
125 Usage : $sim4->analysis_method(); | |
126 Purpose : Inherited method. Overridden to ensure that the name matches | |
127 /est2genome/i. | |
128 Returns : String | |
129 Argument : n/a | |
130 | |
131 =cut | |
132 | |
133 #------------- | |
134 sub analysis_method { | |
135 #------------- | |
136 my ($self, $method) = @_; | |
137 if($method && ($method !~ /est2genome/i)) { | |
138 $self->throw("method $method not supported in " . ref($self)); | |
139 } | |
140 return $self->SUPER::analysis_method($method); | |
141 } | |
142 | |
143 =head2 parse_next_gene | |
144 | |
145 Title : parse_next_gene | |
146 Usage : @gene = $est2genome_result->parse_next_gene; | |
147 foreach $exon (@exons) { | |
148 # do something | |
149 } | |
150 | |
151 Function: Parses the next alignments of the est2genome result file and | |
152 returns the found exons as an array of | |
153 Bio::SeqFeature::SimilarityPair objects. Call | |
154 this method repeatedly until an empty array is returned to get the | |
155 results for all alignments. | |
156 | |
157 The $exon->seq_id() attribute will be set to the identifier of the | |
158 respective sequence for both sequences. | |
159 The length is accessible via the seqlength() | |
160 attribute of $exon->query() and | |
161 $exon->est_hit(). | |
162 Returns : An array (or array reference) of Bio::SeqFeature::SimilarityPair and Bio::SeqFeature::Generic objects | |
163 Args : none | |
164 | |
165 | |
166 =cut | |
167 | |
168 sub parse_next_gene { | |
169 my ($self) = @_; | |
170 my $seensegment = 0; | |
171 my @features; | |
172 my ($qstrand,$hstrand) = (1,1); | |
173 my $lasthseqname; | |
174 while( defined($_ = $self->_readline) ) { | |
175 if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) { | |
176 if( $seensegment ) { | |
177 $self->_pushback($_); | |
178 return wantarray ? @features : \@features; | |
179 } | |
180 $hstrand = -1 if $1 eq 'reversed'; | |
181 $qstrand = -1 if $4 eq 'REVERSED GENE'; | |
182 $self->debug( "1=$1, 2=$2, 4=$4\n"); | |
183 } | |
184 elsif( /^Exon/ ) { | |
185 my ($name,$len,$score,$qstart,$qend,$qseqname, | |
186 $hstart,$hend, $hseqname) = split; | |
187 $lasthseqname = $hseqname; | |
188 my $query = new Bio::SeqFeature::Similarity(-primary => $name, | |
189 -source => $self->analysis_method, | |
190 -seq_id => $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE | |
191 -start => $qstart, | |
192 -end => $qend, | |
193 -strand => $qstrand, | |
194 -score => $score, | |
195 -tag => { | |
196 # 'Location' => "$hstart..$hend", | |
197 'Sequence' => "$hseqname", | |
198 } | |
199 ); | |
200 my $hit = new Bio::SeqFeature::Similarity(-primary => 'exon_hit', | |
201 -source => $self->analysis_method, | |
202 -seq_id => $hseqname, | |
203 -start => $hstart, | |
204 -end => $hend, | |
205 -strand => $hstrand, | |
206 -score => $score, | |
207 -tag => { | |
208 # 'Location' => "$qstart..$qend", | |
209 'Sequence' => "$qseqname", | |
210 | |
211 } | |
212 ); | |
213 push @features, new Bio::SeqFeature::SimilarityPair | |
214 (-query => $query, | |
215 -hit => $hit, | |
216 -source => $self->analysis_method); | |
217 } elsif( /^([\-\+\?])(Intron)/) { | |
218 my ($name,$len,$score,$qstart,$qend,$qseqname) = split; | |
219 push @features, new Bio::SeqFeature::Generic(-primary => $2, | |
220 -source => $self->analysis_method, | |
221 -start => $qstart, | |
222 -end => $qend, | |
223 -strand => $qstrand, | |
224 -score => $score, | |
225 -seq_id => $qseqname, | |
226 -tag => { | |
227 'Sequence' => $lasthseqname}); | |
228 } elsif( /^Span/ ) { | |
229 } elsif( /^Segment/ ) { | |
230 $seensegment = 1; | |
231 } elsif( /^\s+$/ ) { # do nothing | |
232 } else { | |
233 $self->warn( "unknown line $_\n"); | |
234 } | |
235 } | |
236 return undef unless( @features ); | |
237 return wantarray ? @features : \@features; | |
238 } | |
239 | |
240 =head2 next_feature | |
241 | |
242 Title : next_feature | |
243 Usage : $seqfeature = $obj->next_feature(); | |
244 Function: Returns the next feature available in the analysis result, or | |
245 undef if there are no more features. | |
246 Example : | |
247 Returns : A Bio::SeqFeatureI implementing object, or undef if there are no | |
248 more features. | |
249 Args : none | |
250 | |
251 =cut | |
252 | |
253 sub next_feature { | |
254 my ($self) = shift; | |
255 $self->throw("We haven't really done this right, yet, use parse_next_gene"); | |
256 } | |
257 | |
258 | |
259 1; |