comparison variant_effect_predictor/Bio/Tools/BPlite/Iteration.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Iteration.pm,v 1.15 2002/06/19 00:27:49 jason Exp $
2 # Bioperl module Bio::Tools::BPlite::Iteration
3 # based closely on the Bio::Tools::BPlite modules
4 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
5 # Lorenz Pollak (lorenz@ist.org, bioperl port)
6 #
7 # Copyright Peter Schattner
8 #
9 # You may distribute this module under the same terms as perl itself
10 # _history
11 # October 20, 2000
12 # POD documentation - main docs before the code
13 #
14 # Added to get a simple_align object for a psiblast run with the -m 6 flag /AE
15 #
16
17 =head1 NAME
18
19 Bio::Tools::BPlite::Iteration - object for parsing single iteration
20 of a PSIBLAST report
21
22 =head1 SYNOPSIS
23
24 use Bio::Tools:: BPpsilite;
25
26 open FH, "t/psiblastreport.out";
27 $report = Bio::Tools::BPpsilite->new(-fh=>\*FH);
28
29 # determine number of iterations executed by psiblast
30 $total_iterations = $report->number_of_iterations;
31 $last_iteration = $report->round($total_iterations);
32
33 # Process only hits found in last iteration ...
34 $oldhitarray_ref = $last_iteration->oldhits;
35 HIT: while($sbjct = $last_iteration->nextSbjct) {
36 $id = $sbjct->name;
37 $is_old = grep /\Q$id\E/, @$oldhitarray_ref;
38 if ($is_old ){next HIT;}
39 # do something with new hit...
40 }
41
42 =head2 ALIGNMENTS
43
44 # This assumed that you have $db pointing to a database, $out to an output file
45 # $slxdir to a directory and $psiout
46 # note the alignments can only be obtained if the flag "-m 6" is run.
47 # It might also be necessary to use the flag -v to get all alignments
48 #
49 my @psiparams = ('database' => $db , 'output' => $out, 'j' => 3, 'm' => 6,
50 'h' => 1.e-3 , 'F' => 'T' , 'Q' => $psiout );
51 my $factory = Bio::Tools::Run::StandAloneBlast->new(@psiparams);
52 my $report = $factory->blastpgp($seq);
53 my $total_iterations = $report->number_of_iterations();
54 my $last_iteration = $report->round($total_iterations);
55 my $align=$last_iteration->Align;
56 my $slxfile=$slxdir.$id.".slx";
57 my $slx = Bio::AlignIO->new('-format' => 'selex','-file' => ">".$slxfile );
58 $slx->write_aln($align);
59
60 =head1 DESCRIPTION
61
62 See the documentation for BPpsilite.pm for a description of the
63 Iteration.pm module.
64
65 =head1 AUTHORS - Peter Schattner
66
67 Email: schattner@alum.mit.edu
68
69 =head1 CONTRIBUTORS
70
71 Jason Stajich, jason@cgt.mc.duke.edu
72
73 =head1 ACKNOWLEDGEMENTS
74
75 Based on work of:
76 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
77 Lorenz Pollak (lorenz@ist.org, bioperl port)
78
79 =head1 COPYRIGHT
80
81 BPlite.pm is copyright (C) 1999 by Ian Korf.
82
83 =head1 DISCLAIMER
84
85 This software is provided "as is" without warranty of any kind.
86
87 =cut
88
89 package Bio::Tools::BPlite::Iteration;
90
91 use strict;
92 use vars qw(@ISA);
93 use Bio::Root::Root; # root object to inherit from
94 use Bio::Tools::BPlite; #
95 use Bio::Tools::BPlite::Sbjct;
96
97 @ISA = qw(Bio::Root::Root);
98
99 sub new {
100 my ($class, @args) = @_;
101 my $self = $class->SUPER::new(@args);
102
103 ($self->{'PARENT'},$self->{'ROUND'}) =
104 $self->_rearrange([qw(PARENT
105 ROUND
106 )],@args);
107
108 $self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'};
109 $self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'};
110
111 if($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
112 else {$self->{'REPORT_DONE'} = 1} # empty report
113
114 return $self; # success - we hope!
115 }
116
117 =head2 query
118
119 Title : query
120 Usage : $query = $obj->query();
121 Function : returns the query object
122 Example :
123 Returns : query object
124 Args :
125
126 =cut
127
128 sub query {shift->{'QUERY'}}
129
130 =head2 qlength
131
132 Title : qlength
133 Usage : $len = $obj->qlength();
134 Returns : length of query
135 Args : none
136
137 =cut
138
139 sub qlength {shift->{'LENGTH'}}
140
141 =head2 newhits
142
143 Title : newhits
144 Usage : $newhits = $obj->newhits();
145 Returns : reference to an array listing all the hits
146 from the current iteration which were not identified
147 in the previous iteration
148 Args : none
149
150 =cut
151
152 sub newhits {shift->{'NEWHITS'}}
153
154 =head2 oldhits
155
156 Title : oldhits
157 Usage : $oldhits = $obj->oldhits();
158 Returns : reference to an array listing all the hits from
159 the current iteration which were identified and
160 above threshold in the previous iteration
161 Args : none
162
163 =cut
164
165 sub oldhits {shift->{'OLDHITS'}}
166
167
168 =head2 nextSbjct
169
170 Title : nextSbjct
171 Usage : $sbjct = $obj->nextSbjct();
172 Function : Method of iterating through all the Sbjct retrieved
173 from parsing the report
174 #Example : while ( my $sbjct = $obj->nextSbjct ) {}
175 Returns : next Sbjct object or undef if finished
176 Args :
177
178 =cut
179
180 sub nextSbjct {
181 my ($self) = @_;
182 $self->_fastForward or return undef;
183
184 #######################
185 # get all sbjct lines #
186 #######################
187 my $def = $self->_readline();
188
189 while(defined ($_ = $self->_readline) ) {
190 if ($_ !~ /\w/) {next}
191 elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data
192 elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback( $_); last}
193 elsif ($_ =~ /^(\d+) .* \d+$/) { # This is not correct at all
194 $self->_pushback($_); # 1: HSP does not work for -m 6 flag
195 $def = $1; # 2: length/name are incorrect
196 my $length = undef; # 3: Names are repeated many times.
197 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
198 '-length'=>$length,
199 '-parent'=>$self);
200 return $sbjct;
201 } # m-6
202 elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) {
203 $self->_pushback( $_);
204 last;
205 } else {$def .= $_}
206 }
207 $def =~ s/\s+/ /g;
208 $def =~ s/\s+$//g;
209 $def =~ s/Length = ([\d,]+)$//g;
210 my $length = $1;
211 return 0 unless $def =~ /^>/;
212 $def =~ s/^>//;
213
214 ####################
215 # the Sbjct object #
216 ####################
217 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
218 '-length'=>$length,
219 '-parent'=>$self);
220 return $sbjct;
221 }
222
223
224 # This is added by /AE
225
226 =head2 Align
227
228 Title : Align
229 Usage : $SimpleAlign = $obj->Align();
230 Function : Method to obtain a simpleAlign object from psiblast
231 Example : $SimpleAlign = $obj->Align();
232 Returns : SimpleAlign object or undef if not found.
233 BUG : Only works if psiblast has been run with m 6 flag
234 Args :
235
236 =cut
237
238 sub Align {
239 use Bio::SimpleAlign;
240 my ($self) = @_;
241 $self->_fastForward or return undef;
242 my $lastline = $self->_readline();
243 return undef unless $lastline =~ /^QUERY/; # If psiblast not run correctly
244 my (%sequence,%first,%last,$num);
245
246 if ( $lastline =~ /^QUERY\s+(\d*)\s*([-\w]+)\s*(\d*)\s*$/){
247 my $name='QUERY';
248 my $start=$1;
249 my $seq=$2;
250 my $stop=$3;
251 $seq =~ s/-/\./g;
252 $start =~ s/ //g;
253 $stop =~ s/ //g;
254 $sequence{$name} .= $seq;
255 if ($first{$name} eq ''){$first{$name}=$start;}
256 if ($stop ne ''){$last{$name}=$stop;}
257 # print "FOUND:\t$seq\t$start\t$stop\n";
258 $num=0;
259 }
260 while(defined($_ = $self->_readline()) ){
261 chomp($_);
262 if ( $_ =~ /^QUERY\s+(\d+)\s*([\-A-Z]+)\s*(\+)\s*$/){
263 my $name='QUERY';
264 my $start=$1;
265 my $seq=$2;
266 my $stop=$3;
267 $seq =~ s/-/\./g;
268 $start =~ s/ //g;
269 $stop =~ s/ //g;
270 $sequence{$name} .= $seq;
271 if ($first{$name} eq '') { $first{$name} = $start;}
272 if ($stop ne '') { $last{$name}=$stop;}
273 $num=0;
274 } elsif ( $_ =~ /^(\d+)\s+(\d+)\s*([\-A-Z]+)\s*(\d+)\s*$/ ){
275 my $name=$1.".".$num;
276 my $start=$2;
277 my $seq=$3;
278 my $stop=$4;
279 $seq =~ s/-/\./g;
280 $start =~ s/ //g;
281 $stop =~ s/ //g;
282 $sequence{$name} .= $seq;
283 if ($first{$name} eq ''){$first{$name}=$start;}
284 if ($stop ne ''){$last{$name}=$stop;}
285 $num++;
286 }
287 }
288 my $align = new Bio::SimpleAlign();
289 my @keys=sort keys(%sequence);
290 foreach my $name (@keys){
291 my $nse = $name."/".$first{$name}."-".$last{$name};
292 my $seqobj = Bio::LocatableSeq->new( -seq => $sequence{$name},
293 -id => $name,
294 -name => $nse,
295 -start => $first{$name},
296 -end => $last{$name}
297 );
298
299 $align->add_seq($seqobj);
300 }
301 return $align;
302 }
303
304 # Start of internal subroutines.
305
306 sub _parseHeader {
307 my ($self) = @_;
308 my (@old_hits, @new_hits);
309
310 my $newhits_true = ($self->{'ROUND'} < 2) ? 1 : 0 ;
311 while(defined($_ = $self->_readline()) ) {
312 if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/) {
313 my $id = $1;
314 my $score= $2; #not used currently
315 my $evalue= $3; #not used currently
316 if ($newhits_true) { push ( @new_hits, $id);}
317 else { push (@old_hits, $id);}
318 }
319 elsif ($_ =~ /^Sequences not found previously/) {$newhits_true = 1 ;}
320 # This is changed for "-m 6" option /AE
321 elsif ($_ =~ /^>/ || $_ =~ /^QUERY/)
322 {
323 $self->_pushback($_);
324 $self->{'OLDHITS'} = \@old_hits;
325 $self->{'NEWHITS'} = \@new_hits;
326 return 1;
327 }
328 elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) {
329 $self->_pushback($_);
330 return 0; # no sequences found in this iteration
331 }
332 }
333 return 0; # no sequences found in this iteration
334 }
335
336 sub _fastForward {
337 my ($self) = @_;
338 return 0 if $self->{'REPORT_DONE'}; # empty report
339
340 while(defined($_ = $self->_readline()) ) {
341 if( $_ =~ /^>/ ||
342 $_ =~ /^QUERY|^\d+ .* \d+$/ ) { # Changed to also handle "-m 6" /AE
343 $self->_pushback($_);
344 return 1;
345 }
346 # print "FASTFORWARD",$_,"\n";
347 if ($_ =~ /^>|^Parameters|^\s+Database:/) {
348 $self->_pushback($_);
349 return 1;
350 }
351 }
352 $self->warn("Possible error (2) while parsing BLAST report!");
353 }
354
355
356 =head2 _readline
357
358 Title : _readline
359 Usage : $obj->_readline
360 Function: Reads a line of input.
361
362 Note that this method implicitely uses the value of $/ that is
363 in effect when called.
364
365 Note also that the current implementation does not handle pushed
366 back input correctly unless the pushed back input ends with the
367 value of $/.
368 Example :
369 Returns :
370
371 =cut
372
373 sub _readline{
374 my ($self) = @_;
375 return $self->{'PARENT'}->_readline();
376 }
377
378 =head2 _pushback
379
380 Title : _pushback
381 Usage : $obj->_pushback($newvalue)
382 Function: puts a line previously read with _readline back into a buffer
383 Example :
384 Returns :
385 Args : newvalue
386
387 =cut
388
389 sub _pushback {
390 my ($self, $arg) = @_;
391 return $self->{'PARENT'}->_pushback($arg);
392 }
393 1;
394 __END__