comparison variant_effect_predictor/Bio/Tools/BPlite/Sbjct.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Sbjct.pm,v 1.23.2.1 2003/02/20 00:39:03 jason Exp $
2 ###############################################################################
3 # Bio::Tools::BPlite::Sbjct
4 ###############################################################################
5 #
6 # The original BPlite.pm module has been written by Ian Korf !
7 # see http://sapiens.wustl.edu/~ikorf
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # BioPerl module for Bio::Tools::BPlite::Sbjct
12 #
13 # Cared for by Peter Schattner <schattner@alum.mit.edu>
14 #
15 # Copyright Peter Schattner
16 #
17 # You may distribute this module under the same terms as perl itself
18
19 # POD documentation - main docs before the code
20
21 =head1 NAME
22
23 Bio::Tools::BPlite::Sbjct - A Blast Subject (database search Hit)
24
25 =head1 SYNOPSIS
26
27 use Bio::Tools::BPlite
28 my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
29 while(my $sbjct = $report->nextSbjct) {
30 $sbjct->name; # access to the hit name
31 "$sbjct"; # overloaded to return name
32 $sbjct->nextHSP; # gets the next HSP from the sbjct
33 while(my $hsp = $sbjct->nextHSP) {
34 # canonical form is again a while loop
35 }
36
37 =head1 DESCRIPTION
38
39 See L<Bio::Tools::BPlite> for a more detailed information about the
40 BPlite BLAST parsing objects.
41
42 The original BPlite.pm module has been written by Ian Korf!
43 See http://sapiens.wustl.edu/~ikorf
44
45 The Sbjct object encapsulates a Hit in a Blast database
46 search. The Subjects are the "Hits" for a particular query. A
47 Subject may be made up of multiple High Scoring Pairs (HSP) which are
48 accessed through the nextHSP method.
49
50 If you are searching for the P-value or percent identity that is
51 specific to each HSP and you will need to use the nextHSP method to
52 get access to that data.
53
54 =head1 FEEDBACK
55
56 =head2 Mailing Lists
57
58 User feedback is an integral part of the evolution of this and other
59 Bioperl modules. Send your comments and suggestions preferably to
60 the Bioperl mailing list. Your participation is much appreciated.
61
62 bioperl-l@bioperl.org - General discussion
63 http://bioperl.org/MailList.shtml - About the mailing lists
64
65 =head2 Reporting Bugs
66
67 Report bugs to the Bioperl bug tracking system to help us keep track
68 of the bugs and their resolution. Bug reports can be submitted via
69 email or the web:
70
71 bioperl-bugs@bioperl.org
72 http://bugzilla.bioperl.org/
73
74 =head1 AUTHOR - Peter Schattner
75
76 Email: schattner@alum.mit.edu
77
78 =head1 CONTRIBUTORS
79
80 Jason Stajich, jason@bioperl.org
81
82 =head1 APPENDIX
83
84 The rest of the documentation details each of the object methods.
85 Internal methods are usually preceded with a _
86
87 =cut
88
89 # Let the code begin...
90
91 package Bio::Tools::BPlite::Sbjct;
92
93 use strict;
94
95 use Bio::Root::Root; # root object to inherit from
96 use Bio::Tools::BPlite::HSP; # we want to use HSP
97 #use overload '""' => 'name';
98 use vars qw(@ISA);
99
100 @ISA = qw(Bio::Root::Root);
101
102 sub new {
103 my ($class, @args) = @_;
104 my $self = $class->SUPER::new(@args);
105
106 ($self->{'NAME'},$self->{'LENGTH'},
107 $self->{'PARENT'}) =
108 $self->_rearrange([qw(NAME
109 LENGTH
110 PARENT
111 )],@args);
112 $self->report_type($self->{'PARENT'}->{'BLAST_TYPE'} || 'UNKNOWN');
113 $self->{'HSP_ALL_PARSED'} = 0;
114
115 return $self;
116 }
117
118 =head2 name
119
120 Title : name
121 Usage : $name = $obj->name();
122 Function : returns the name of the Sbjct
123 Example :
124 Returns : name of the Sbjct
125 Args :
126
127 =cut
128
129 sub name {shift->{'NAME'}}
130
131 =head2 report_type
132
133 Title : report_type
134 Usage : $type = $sbjct->report_type()
135 Function : Returns the type of report from which this hit was obtained.
136 This usually pertains only to BLAST and friends reports, for which
137 the report type denotes what type of sequence was aligned against
138 what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt,
139 TBLASTN prt-translated dna, TBLASTX translated dna-translated dna).
140 Example :
141 Returns : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN)
142 Args : a string on set (you should know what you are doing)
143
144 =cut
145
146 sub report_type {
147 my ($self, $rpt) = @_;
148 if($rpt) {
149 $self->{'_report_type'} = $rpt;
150 }
151 return $self->{'_report_type'};
152 }
153
154 =head2 nextFeaturePair
155
156 Title : nextFeaturePair
157 Usage : $name = $obj->nextFeaturePair();
158 Function : same as the nextHSP function
159 Example :
160 Returns : next FeaturePair
161 Args :
162
163 =cut
164
165 sub nextFeaturePair {shift->nextHSP}; # just another name
166
167 =head2 nextHSP
168
169 Title : nextHSP
170 Usage : $hsp = $obj->nextHSP();
171 Function : returns the next available High Scoring Pair
172 Example :
173 Returns : Bio::Tools::HSP or null if finished
174 Args :
175
176 =cut
177
178 sub nextHSP {
179 my ($self) = @_;
180 return undef if $self->{'HSP_ALL_PARSED'};
181
182 ############################
183 # get and parse scorelines #
184 ############################
185 my ($qframe, $sframe);
186 my $scoreline = $self->_readline();
187 my $nextline = $self->_readline();
188 return undef if not defined $nextline;
189 $scoreline .= $nextline;
190 my ($score, $bits);
191 if ($scoreline =~ /\d bits\)/) {
192 ($score, $bits) = $scoreline =~
193 /Score = (\d+) \((\S+) bits\)/; # WU-BLAST
194 }
195 else {
196 ($bits, $score) = $scoreline =~
197 /Score =\s+(\S+) bits \((\d+)/; # NCBI-BLAST
198 }
199
200 my ($match, $hsplength) = ($scoreline =~ /Identities = (\d+)\/(\d+)/);
201 my ($positive) = ($scoreline =~ /Positives = (\d+)/);
202 my ($gaps) = ($scoreline =~ /Gaps = (\d+)/);
203 if($self->report_type() eq 'TBLASTX') {
204 ($qframe, $sframe) = $scoreline =~ /Frame =\s+([+-]\d)\s+\/\s+([+-]\d)/;
205 } elsif ($self->report_type() eq 'TBLASTN') {
206 ($sframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
207 } else {
208 ($qframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
209 }
210 $positive = $match if not defined $positive;
211 $gaps = '0' if not defined $gaps;
212 my ($p) = ($scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/);
213 unless (defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+(\S+)/}
214 my ($exp) = ($scoreline =~ /Expect(?:\(\d+\))? =\s+([^\s,]+)/);
215 $exp = -1 unless( defined $exp );
216
217 $self->throw("Unable to parse '$scoreline'") unless defined $score;
218
219 #######################
220 # get alignment lines #
221 #######################
222 my (@hspline);
223 while( defined($_ = $self->_readline()) ) {
224 if ($_ =~ /^WARNING:|^NOTE:/) {
225 while(defined($_ = $self->_readline())) {last if $_ !~ /\S/}
226 }
227 elsif ($_ !~ /\S/) {next}
228 elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data
229 elsif ($_ =~ /^\s*Strand/) {next} # NCBI-BLAST non-data
230 elsif ($_ =~ /^\s*Score/) {$self->_pushback($_); last}
231
232 elsif ($_ =~ /^>|^Histogram|^Searching|^Parameters|^\s+Database:|^CPU\stime|^\s*Lambda/)
233 {
234 #ps 5/28/01
235 # elsif ($_ =~ /^>|^Parameters|^\s+Database:|^CPU\stime/) {
236 $self->_pushback($_);
237
238 $self->{'HSP_ALL_PARSED'} = 1;
239 last;
240 }
241 elsif( $_ =~ /^\s*Frame/ ) {
242 if ($self->report_type() eq 'TBLASTX') {
243 ($qframe, $sframe) = $_ =~ /Frame = ([\+-]\d)\s+\/\s+([\+-]\d)/;
244 } elsif ($self->report_type() eq 'TBLASTN') {
245 ($sframe) = $_ =~ /Frame = ([\+-]\d)/;
246 } else {
247 ($qframe) = $_ =~ /Frame = ([\+-]\d)/;
248 }
249 }
250 else {
251 push @hspline, $_; # store the query line
252 $nextline = $self->_readline();
253 # Skip "pattern" line when parsing PHIBLAST reports, otherwise store the alignment line
254 my $l1 = ($nextline =~ /^\s*pattern/) ? $self->_readline() : $nextline;
255 push @hspline, $l1; # store the alignment line
256 my $l2 = $self->_readline(); push @hspline, $l2; # grab/store the sbjct line
257 }
258 }
259
260 #########################
261 # parse alignment lines #
262 #########################
263 my ($ql, $sl, $as) = ("", "", "");
264 my ($qb, $qe, $sb, $se) = (0,0,0,0);
265 my (@QL, @SL, @AS); # for better memory management
266
267 for(my $i=0;$i<@hspline;$i+=3) {
268 # warn $hspline[$i], $hspline[$i+2];
269 $hspline[$i] =~ /^(?:Query|Trans):\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
270 $ql = $2; $qb = $1 unless $qb; $qe = $3;
271
272 my $offset = index($hspline[$i], $ql);
273 $as = substr($hspline[$i+1], $offset, CORE::length($ql));
274
275 $hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
276 $sl = $2; $sb = $1 unless $sb; $se = $3;
277
278 push @QL, $ql; push @SL, $sl; push @AS, $as;
279 }
280
281 ##################
282 # the HSP object #
283 ##################
284 $ql = join("", @QL);
285 $sl = join("", @SL);
286 $as = join("", @AS);
287 # Query name and length are not in the report for a bl2seq report so {'PARENT'}->query and
288 # {'PARENT'}->qlength will not be available.
289 my ($qname, $qlength) = ('unknown','unknown');
290 if ($self->{'PARENT'}->can('query')) {
291 $qname = $self->{'PARENT'}->query;
292 $qlength = $self->{'PARENT'}->qlength;
293 }
294
295 my $hsp = new Bio::Tools::BPlite::HSP
296 ('-score' => $score,
297 '-bits' => $bits,
298 '-match' => $match,
299 '-positive' => $positive,
300 '-gaps' => $gaps,
301 '-hsplength' => $hsplength,
302 '-p' => $p,
303 '-exp' => $exp,
304 '-queryBegin' => $qb,
305 '-queryEnd' => $qe,
306 '-sbjctBegin' => $sb,
307 '-sbjctEnd' => $se,
308 '-querySeq' => $ql,
309 '-sbjctSeq' => $sl,
310 '-homologySeq'=> $as,
311 '-queryName' => $qname,
312 # '-queryName'=>$self->{'PARENT'}->query,
313 '-sbjctName' => $self->{'NAME'},
314 '-queryLength'=> $qlength,
315 # '-queryLength'=>$self->{'PARENT'}->qlength,
316 '-sbjctLength'=> $self->{'LENGTH'},
317 '-queryFrame' => $qframe,
318 '-sbjctFrame' => $sframe,
319 '-blastType' => $self->report_type());
320 return $hsp;
321 }
322
323 =head2 _readline
324
325 Title : _readline
326 Usage : $obj->_readline
327 Function: Reads a line of input.
328
329 Note that this method implicitely uses the value of $/ that is
330 in effect when called.
331
332 Note also that the current implementation does not handle pushed
333 back input correctly unless the pushed back input ends with the
334 value of $/.
335 Example :
336 Returns :
337
338 =cut
339
340 sub _readline{
341 my ($self) = @_;
342 return $self->{'PARENT'}->_readline();
343 }
344
345 =head2 _pushback
346
347 Title : _pushback
348 Usage : $obj->_pushback($newvalue)
349 Function: puts a line previously read with _readline back into a buffer
350 Example :
351 Returns :
352 Args : newvalue
353
354 =cut
355
356 sub _pushback {
357 my ($self, $arg) = @_;
358 return $self->{'PARENT'}->_pushback($arg);
359 }
360
361 1;