Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/BPlite/Sbjct.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Sbjct.pm,v 1.23.2.1 2003/02/20 00:39:03 jason Exp $ | |
2 ############################################################################### | |
3 # Bio::Tools::BPlite::Sbjct | |
4 ############################################################################### | |
5 # | |
6 # The original BPlite.pm module has been written by Ian Korf ! | |
7 # see http://sapiens.wustl.edu/~ikorf | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # BioPerl module for Bio::Tools::BPlite::Sbjct | |
12 # | |
13 # Cared for by Peter Schattner <schattner@alum.mit.edu> | |
14 # | |
15 # Copyright Peter Schattner | |
16 # | |
17 # You may distribute this module under the same terms as perl itself | |
18 | |
19 # POD documentation - main docs before the code | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::Tools::BPlite::Sbjct - A Blast Subject (database search Hit) | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 use Bio::Tools::BPlite | |
28 my $report = new Bio::Tools::BPlite(-fh=>\*STDIN); | |
29 while(my $sbjct = $report->nextSbjct) { | |
30 $sbjct->name; # access to the hit name | |
31 "$sbjct"; # overloaded to return name | |
32 $sbjct->nextHSP; # gets the next HSP from the sbjct | |
33 while(my $hsp = $sbjct->nextHSP) { | |
34 # canonical form is again a while loop | |
35 } | |
36 | |
37 =head1 DESCRIPTION | |
38 | |
39 See L<Bio::Tools::BPlite> for a more detailed information about the | |
40 BPlite BLAST parsing objects. | |
41 | |
42 The original BPlite.pm module has been written by Ian Korf! | |
43 See http://sapiens.wustl.edu/~ikorf | |
44 | |
45 The Sbjct object encapsulates a Hit in a Blast database | |
46 search. The Subjects are the "Hits" for a particular query. A | |
47 Subject may be made up of multiple High Scoring Pairs (HSP) which are | |
48 accessed through the nextHSP method. | |
49 | |
50 If you are searching for the P-value or percent identity that is | |
51 specific to each HSP and you will need to use the nextHSP method to | |
52 get access to that data. | |
53 | |
54 =head1 FEEDBACK | |
55 | |
56 =head2 Mailing Lists | |
57 | |
58 User feedback is an integral part of the evolution of this and other | |
59 Bioperl modules. Send your comments and suggestions preferably to | |
60 the Bioperl mailing list. Your participation is much appreciated. | |
61 | |
62 bioperl-l@bioperl.org - General discussion | |
63 http://bioperl.org/MailList.shtml - About the mailing lists | |
64 | |
65 =head2 Reporting Bugs | |
66 | |
67 Report bugs to the Bioperl bug tracking system to help us keep track | |
68 of the bugs and their resolution. Bug reports can be submitted via | |
69 email or the web: | |
70 | |
71 bioperl-bugs@bioperl.org | |
72 http://bugzilla.bioperl.org/ | |
73 | |
74 =head1 AUTHOR - Peter Schattner | |
75 | |
76 Email: schattner@alum.mit.edu | |
77 | |
78 =head1 CONTRIBUTORS | |
79 | |
80 Jason Stajich, jason@bioperl.org | |
81 | |
82 =head1 APPENDIX | |
83 | |
84 The rest of the documentation details each of the object methods. | |
85 Internal methods are usually preceded with a _ | |
86 | |
87 =cut | |
88 | |
89 # Let the code begin... | |
90 | |
91 package Bio::Tools::BPlite::Sbjct; | |
92 | |
93 use strict; | |
94 | |
95 use Bio::Root::Root; # root object to inherit from | |
96 use Bio::Tools::BPlite::HSP; # we want to use HSP | |
97 #use overload '""' => 'name'; | |
98 use vars qw(@ISA); | |
99 | |
100 @ISA = qw(Bio::Root::Root); | |
101 | |
102 sub new { | |
103 my ($class, @args) = @_; | |
104 my $self = $class->SUPER::new(@args); | |
105 | |
106 ($self->{'NAME'},$self->{'LENGTH'}, | |
107 $self->{'PARENT'}) = | |
108 $self->_rearrange([qw(NAME | |
109 LENGTH | |
110 PARENT | |
111 )],@args); | |
112 $self->report_type($self->{'PARENT'}->{'BLAST_TYPE'} || 'UNKNOWN'); | |
113 $self->{'HSP_ALL_PARSED'} = 0; | |
114 | |
115 return $self; | |
116 } | |
117 | |
118 =head2 name | |
119 | |
120 Title : name | |
121 Usage : $name = $obj->name(); | |
122 Function : returns the name of the Sbjct | |
123 Example : | |
124 Returns : name of the Sbjct | |
125 Args : | |
126 | |
127 =cut | |
128 | |
129 sub name {shift->{'NAME'}} | |
130 | |
131 =head2 report_type | |
132 | |
133 Title : report_type | |
134 Usage : $type = $sbjct->report_type() | |
135 Function : Returns the type of report from which this hit was obtained. | |
136 This usually pertains only to BLAST and friends reports, for which | |
137 the report type denotes what type of sequence was aligned against | |
138 what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt, | |
139 TBLASTN prt-translated dna, TBLASTX translated dna-translated dna). | |
140 Example : | |
141 Returns : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN) | |
142 Args : a string on set (you should know what you are doing) | |
143 | |
144 =cut | |
145 | |
146 sub report_type { | |
147 my ($self, $rpt) = @_; | |
148 if($rpt) { | |
149 $self->{'_report_type'} = $rpt; | |
150 } | |
151 return $self->{'_report_type'}; | |
152 } | |
153 | |
154 =head2 nextFeaturePair | |
155 | |
156 Title : nextFeaturePair | |
157 Usage : $name = $obj->nextFeaturePair(); | |
158 Function : same as the nextHSP function | |
159 Example : | |
160 Returns : next FeaturePair | |
161 Args : | |
162 | |
163 =cut | |
164 | |
165 sub nextFeaturePair {shift->nextHSP}; # just another name | |
166 | |
167 =head2 nextHSP | |
168 | |
169 Title : nextHSP | |
170 Usage : $hsp = $obj->nextHSP(); | |
171 Function : returns the next available High Scoring Pair | |
172 Example : | |
173 Returns : Bio::Tools::HSP or null if finished | |
174 Args : | |
175 | |
176 =cut | |
177 | |
178 sub nextHSP { | |
179 my ($self) = @_; | |
180 return undef if $self->{'HSP_ALL_PARSED'}; | |
181 | |
182 ############################ | |
183 # get and parse scorelines # | |
184 ############################ | |
185 my ($qframe, $sframe); | |
186 my $scoreline = $self->_readline(); | |
187 my $nextline = $self->_readline(); | |
188 return undef if not defined $nextline; | |
189 $scoreline .= $nextline; | |
190 my ($score, $bits); | |
191 if ($scoreline =~ /\d bits\)/) { | |
192 ($score, $bits) = $scoreline =~ | |
193 /Score = (\d+) \((\S+) bits\)/; # WU-BLAST | |
194 } | |
195 else { | |
196 ($bits, $score) = $scoreline =~ | |
197 /Score =\s+(\S+) bits \((\d+)/; # NCBI-BLAST | |
198 } | |
199 | |
200 my ($match, $hsplength) = ($scoreline =~ /Identities = (\d+)\/(\d+)/); | |
201 my ($positive) = ($scoreline =~ /Positives = (\d+)/); | |
202 my ($gaps) = ($scoreline =~ /Gaps = (\d+)/); | |
203 if($self->report_type() eq 'TBLASTX') { | |
204 ($qframe, $sframe) = $scoreline =~ /Frame =\s+([+-]\d)\s+\/\s+([+-]\d)/; | |
205 } elsif ($self->report_type() eq 'TBLASTN') { | |
206 ($sframe) = $scoreline =~ /Frame =\s+([+-]\d)/; | |
207 } else { | |
208 ($qframe) = $scoreline =~ /Frame =\s+([+-]\d)/; | |
209 } | |
210 $positive = $match if not defined $positive; | |
211 $gaps = '0' if not defined $gaps; | |
212 my ($p) = ($scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/); | |
213 unless (defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+(\S+)/} | |
214 my ($exp) = ($scoreline =~ /Expect(?:\(\d+\))? =\s+([^\s,]+)/); | |
215 $exp = -1 unless( defined $exp ); | |
216 | |
217 $self->throw("Unable to parse '$scoreline'") unless defined $score; | |
218 | |
219 ####################### | |
220 # get alignment lines # | |
221 ####################### | |
222 my (@hspline); | |
223 while( defined($_ = $self->_readline()) ) { | |
224 if ($_ =~ /^WARNING:|^NOTE:/) { | |
225 while(defined($_ = $self->_readline())) {last if $_ !~ /\S/} | |
226 } | |
227 elsif ($_ !~ /\S/) {next} | |
228 elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data | |
229 elsif ($_ =~ /^\s*Strand/) {next} # NCBI-BLAST non-data | |
230 elsif ($_ =~ /^\s*Score/) {$self->_pushback($_); last} | |
231 | |
232 elsif ($_ =~ /^>|^Histogram|^Searching|^Parameters|^\s+Database:|^CPU\stime|^\s*Lambda/) | |
233 { | |
234 #ps 5/28/01 | |
235 # elsif ($_ =~ /^>|^Parameters|^\s+Database:|^CPU\stime/) { | |
236 $self->_pushback($_); | |
237 | |
238 $self->{'HSP_ALL_PARSED'} = 1; | |
239 last; | |
240 } | |
241 elsif( $_ =~ /^\s*Frame/ ) { | |
242 if ($self->report_type() eq 'TBLASTX') { | |
243 ($qframe, $sframe) = $_ =~ /Frame = ([\+-]\d)\s+\/\s+([\+-]\d)/; | |
244 } elsif ($self->report_type() eq 'TBLASTN') { | |
245 ($sframe) = $_ =~ /Frame = ([\+-]\d)/; | |
246 } else { | |
247 ($qframe) = $_ =~ /Frame = ([\+-]\d)/; | |
248 } | |
249 } | |
250 else { | |
251 push @hspline, $_; # store the query line | |
252 $nextline = $self->_readline(); | |
253 # Skip "pattern" line when parsing PHIBLAST reports, otherwise store the alignment line | |
254 my $l1 = ($nextline =~ /^\s*pattern/) ? $self->_readline() : $nextline; | |
255 push @hspline, $l1; # store the alignment line | |
256 my $l2 = $self->_readline(); push @hspline, $l2; # grab/store the sbjct line | |
257 } | |
258 } | |
259 | |
260 ######################### | |
261 # parse alignment lines # | |
262 ######################### | |
263 my ($ql, $sl, $as) = ("", "", ""); | |
264 my ($qb, $qe, $sb, $se) = (0,0,0,0); | |
265 my (@QL, @SL, @AS); # for better memory management | |
266 | |
267 for(my $i=0;$i<@hspline;$i+=3) { | |
268 # warn $hspline[$i], $hspline[$i+2]; | |
269 $hspline[$i] =~ /^(?:Query|Trans):\s+(\d+)\s*([\D\S]+)\s+(\d+)/; | |
270 $ql = $2; $qb = $1 unless $qb; $qe = $3; | |
271 | |
272 my $offset = index($hspline[$i], $ql); | |
273 $as = substr($hspline[$i+1], $offset, CORE::length($ql)); | |
274 | |
275 $hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/; | |
276 $sl = $2; $sb = $1 unless $sb; $se = $3; | |
277 | |
278 push @QL, $ql; push @SL, $sl; push @AS, $as; | |
279 } | |
280 | |
281 ################## | |
282 # the HSP object # | |
283 ################## | |
284 $ql = join("", @QL); | |
285 $sl = join("", @SL); | |
286 $as = join("", @AS); | |
287 # Query name and length are not in the report for a bl2seq report so {'PARENT'}->query and | |
288 # {'PARENT'}->qlength will not be available. | |
289 my ($qname, $qlength) = ('unknown','unknown'); | |
290 if ($self->{'PARENT'}->can('query')) { | |
291 $qname = $self->{'PARENT'}->query; | |
292 $qlength = $self->{'PARENT'}->qlength; | |
293 } | |
294 | |
295 my $hsp = new Bio::Tools::BPlite::HSP | |
296 ('-score' => $score, | |
297 '-bits' => $bits, | |
298 '-match' => $match, | |
299 '-positive' => $positive, | |
300 '-gaps' => $gaps, | |
301 '-hsplength' => $hsplength, | |
302 '-p' => $p, | |
303 '-exp' => $exp, | |
304 '-queryBegin' => $qb, | |
305 '-queryEnd' => $qe, | |
306 '-sbjctBegin' => $sb, | |
307 '-sbjctEnd' => $se, | |
308 '-querySeq' => $ql, | |
309 '-sbjctSeq' => $sl, | |
310 '-homologySeq'=> $as, | |
311 '-queryName' => $qname, | |
312 # '-queryName'=>$self->{'PARENT'}->query, | |
313 '-sbjctName' => $self->{'NAME'}, | |
314 '-queryLength'=> $qlength, | |
315 # '-queryLength'=>$self->{'PARENT'}->qlength, | |
316 '-sbjctLength'=> $self->{'LENGTH'}, | |
317 '-queryFrame' => $qframe, | |
318 '-sbjctFrame' => $sframe, | |
319 '-blastType' => $self->report_type()); | |
320 return $hsp; | |
321 } | |
322 | |
323 =head2 _readline | |
324 | |
325 Title : _readline | |
326 Usage : $obj->_readline | |
327 Function: Reads a line of input. | |
328 | |
329 Note that this method implicitely uses the value of $/ that is | |
330 in effect when called. | |
331 | |
332 Note also that the current implementation does not handle pushed | |
333 back input correctly unless the pushed back input ends with the | |
334 value of $/. | |
335 Example : | |
336 Returns : | |
337 | |
338 =cut | |
339 | |
340 sub _readline{ | |
341 my ($self) = @_; | |
342 return $self->{'PARENT'}->_readline(); | |
343 } | |
344 | |
345 =head2 _pushback | |
346 | |
347 Title : _pushback | |
348 Usage : $obj->_pushback($newvalue) | |
349 Function: puts a line previously read with _readline back into a buffer | |
350 Example : | |
351 Returns : | |
352 Args : newvalue | |
353 | |
354 =cut | |
355 | |
356 sub _pushback { | |
357 my ($self, $arg) = @_; | |
358 return $self->{'PARENT'}->_pushback($arg); | |
359 } | |
360 | |
361 1; |