Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/BPlite/Iteration.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Iteration.pm,v 1.15 2002/06/19 00:27:49 jason Exp $ | |
2 # Bioperl module Bio::Tools::BPlite::Iteration | |
3 # based closely on the Bio::Tools::BPlite modules | |
4 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), | |
5 # Lorenz Pollak (lorenz@ist.org, bioperl port) | |
6 # | |
7 # Copyright Peter Schattner | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # _history | |
11 # October 20, 2000 | |
12 # POD documentation - main docs before the code | |
13 # | |
14 # Added to get a simple_align object for a psiblast run with the -m 6 flag /AE | |
15 # | |
16 | |
17 =head1 NAME | |
18 | |
19 Bio::Tools::BPlite::Iteration - object for parsing single iteration | |
20 of a PSIBLAST report | |
21 | |
22 =head1 SYNOPSIS | |
23 | |
24 use Bio::Tools:: BPpsilite; | |
25 | |
26 open FH, "t/psiblastreport.out"; | |
27 $report = Bio::Tools::BPpsilite->new(-fh=>\*FH); | |
28 | |
29 # determine number of iterations executed by psiblast | |
30 $total_iterations = $report->number_of_iterations; | |
31 $last_iteration = $report->round($total_iterations); | |
32 | |
33 # Process only hits found in last iteration ... | |
34 $oldhitarray_ref = $last_iteration->oldhits; | |
35 HIT: while($sbjct = $last_iteration->nextSbjct) { | |
36 $id = $sbjct->name; | |
37 $is_old = grep /\Q$id\E/, @$oldhitarray_ref; | |
38 if ($is_old ){next HIT;} | |
39 # do something with new hit... | |
40 } | |
41 | |
42 =head2 ALIGNMENTS | |
43 | |
44 # This assumed that you have $db pointing to a database, $out to an output file | |
45 # $slxdir to a directory and $psiout | |
46 # note the alignments can only be obtained if the flag "-m 6" is run. | |
47 # It might also be necessary to use the flag -v to get all alignments | |
48 # | |
49 my @psiparams = ('database' => $db , 'output' => $out, 'j' => 3, 'm' => 6, | |
50 'h' => 1.e-3 , 'F' => 'T' , 'Q' => $psiout ); | |
51 my $factory = Bio::Tools::Run::StandAloneBlast->new(@psiparams); | |
52 my $report = $factory->blastpgp($seq); | |
53 my $total_iterations = $report->number_of_iterations(); | |
54 my $last_iteration = $report->round($total_iterations); | |
55 my $align=$last_iteration->Align; | |
56 my $slxfile=$slxdir.$id.".slx"; | |
57 my $slx = Bio::AlignIO->new('-format' => 'selex','-file' => ">".$slxfile ); | |
58 $slx->write_aln($align); | |
59 | |
60 =head1 DESCRIPTION | |
61 | |
62 See the documentation for BPpsilite.pm for a description of the | |
63 Iteration.pm module. | |
64 | |
65 =head1 AUTHORS - Peter Schattner | |
66 | |
67 Email: schattner@alum.mit.edu | |
68 | |
69 =head1 CONTRIBUTORS | |
70 | |
71 Jason Stajich, jason@cgt.mc.duke.edu | |
72 | |
73 =head1 ACKNOWLEDGEMENTS | |
74 | |
75 Based on work of: | |
76 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), | |
77 Lorenz Pollak (lorenz@ist.org, bioperl port) | |
78 | |
79 =head1 COPYRIGHT | |
80 | |
81 BPlite.pm is copyright (C) 1999 by Ian Korf. | |
82 | |
83 =head1 DISCLAIMER | |
84 | |
85 This software is provided "as is" without warranty of any kind. | |
86 | |
87 =cut | |
88 | |
89 package Bio::Tools::BPlite::Iteration; | |
90 | |
91 use strict; | |
92 use vars qw(@ISA); | |
93 use Bio::Root::Root; # root object to inherit from | |
94 use Bio::Tools::BPlite; # | |
95 use Bio::Tools::BPlite::Sbjct; | |
96 | |
97 @ISA = qw(Bio::Root::Root); | |
98 | |
99 sub new { | |
100 my ($class, @args) = @_; | |
101 my $self = $class->SUPER::new(@args); | |
102 | |
103 ($self->{'PARENT'},$self->{'ROUND'}) = | |
104 $self->_rearrange([qw(PARENT | |
105 ROUND | |
106 )],@args); | |
107 | |
108 $self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'}; | |
109 $self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'}; | |
110 | |
111 if($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments | |
112 else {$self->{'REPORT_DONE'} = 1} # empty report | |
113 | |
114 return $self; # success - we hope! | |
115 } | |
116 | |
117 =head2 query | |
118 | |
119 Title : query | |
120 Usage : $query = $obj->query(); | |
121 Function : returns the query object | |
122 Example : | |
123 Returns : query object | |
124 Args : | |
125 | |
126 =cut | |
127 | |
128 sub query {shift->{'QUERY'}} | |
129 | |
130 =head2 qlength | |
131 | |
132 Title : qlength | |
133 Usage : $len = $obj->qlength(); | |
134 Returns : length of query | |
135 Args : none | |
136 | |
137 =cut | |
138 | |
139 sub qlength {shift->{'LENGTH'}} | |
140 | |
141 =head2 newhits | |
142 | |
143 Title : newhits | |
144 Usage : $newhits = $obj->newhits(); | |
145 Returns : reference to an array listing all the hits | |
146 from the current iteration which were not identified | |
147 in the previous iteration | |
148 Args : none | |
149 | |
150 =cut | |
151 | |
152 sub newhits {shift->{'NEWHITS'}} | |
153 | |
154 =head2 oldhits | |
155 | |
156 Title : oldhits | |
157 Usage : $oldhits = $obj->oldhits(); | |
158 Returns : reference to an array listing all the hits from | |
159 the current iteration which were identified and | |
160 above threshold in the previous iteration | |
161 Args : none | |
162 | |
163 =cut | |
164 | |
165 sub oldhits {shift->{'OLDHITS'}} | |
166 | |
167 | |
168 =head2 nextSbjct | |
169 | |
170 Title : nextSbjct | |
171 Usage : $sbjct = $obj->nextSbjct(); | |
172 Function : Method of iterating through all the Sbjct retrieved | |
173 from parsing the report | |
174 #Example : while ( my $sbjct = $obj->nextSbjct ) {} | |
175 Returns : next Sbjct object or undef if finished | |
176 Args : | |
177 | |
178 =cut | |
179 | |
180 sub nextSbjct { | |
181 my ($self) = @_; | |
182 $self->_fastForward or return undef; | |
183 | |
184 ####################### | |
185 # get all sbjct lines # | |
186 ####################### | |
187 my $def = $self->_readline(); | |
188 | |
189 while(defined ($_ = $self->_readline) ) { | |
190 if ($_ !~ /\w/) {next} | |
191 elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data | |
192 elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback( $_); last} | |
193 elsif ($_ =~ /^(\d+) .* \d+$/) { # This is not correct at all | |
194 $self->_pushback($_); # 1: HSP does not work for -m 6 flag | |
195 $def = $1; # 2: length/name are incorrect | |
196 my $length = undef; # 3: Names are repeated many times. | |
197 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, | |
198 '-length'=>$length, | |
199 '-parent'=>$self); | |
200 return $sbjct; | |
201 } # m-6 | |
202 elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) { | |
203 $self->_pushback( $_); | |
204 last; | |
205 } else {$def .= $_} | |
206 } | |
207 $def =~ s/\s+/ /g; | |
208 $def =~ s/\s+$//g; | |
209 $def =~ s/Length = ([\d,]+)$//g; | |
210 my $length = $1; | |
211 return 0 unless $def =~ /^>/; | |
212 $def =~ s/^>//; | |
213 | |
214 #################### | |
215 # the Sbjct object # | |
216 #################### | |
217 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, | |
218 '-length'=>$length, | |
219 '-parent'=>$self); | |
220 return $sbjct; | |
221 } | |
222 | |
223 | |
224 # This is added by /AE | |
225 | |
226 =head2 Align | |
227 | |
228 Title : Align | |
229 Usage : $SimpleAlign = $obj->Align(); | |
230 Function : Method to obtain a simpleAlign object from psiblast | |
231 Example : $SimpleAlign = $obj->Align(); | |
232 Returns : SimpleAlign object or undef if not found. | |
233 BUG : Only works if psiblast has been run with m 6 flag | |
234 Args : | |
235 | |
236 =cut | |
237 | |
238 sub Align { | |
239 use Bio::SimpleAlign; | |
240 my ($self) = @_; | |
241 $self->_fastForward or return undef; | |
242 my $lastline = $self->_readline(); | |
243 return undef unless $lastline =~ /^QUERY/; # If psiblast not run correctly | |
244 my (%sequence,%first,%last,$num); | |
245 | |
246 if ( $lastline =~ /^QUERY\s+(\d*)\s*([-\w]+)\s*(\d*)\s*$/){ | |
247 my $name='QUERY'; | |
248 my $start=$1; | |
249 my $seq=$2; | |
250 my $stop=$3; | |
251 $seq =~ s/-/\./g; | |
252 $start =~ s/ //g; | |
253 $stop =~ s/ //g; | |
254 $sequence{$name} .= $seq; | |
255 if ($first{$name} eq ''){$first{$name}=$start;} | |
256 if ($stop ne ''){$last{$name}=$stop;} | |
257 # print "FOUND:\t$seq\t$start\t$stop\n"; | |
258 $num=0; | |
259 } | |
260 while(defined($_ = $self->_readline()) ){ | |
261 chomp($_); | |
262 if ( $_ =~ /^QUERY\s+(\d+)\s*([\-A-Z]+)\s*(\+)\s*$/){ | |
263 my $name='QUERY'; | |
264 my $start=$1; | |
265 my $seq=$2; | |
266 my $stop=$3; | |
267 $seq =~ s/-/\./g; | |
268 $start =~ s/ //g; | |
269 $stop =~ s/ //g; | |
270 $sequence{$name} .= $seq; | |
271 if ($first{$name} eq '') { $first{$name} = $start;} | |
272 if ($stop ne '') { $last{$name}=$stop;} | |
273 $num=0; | |
274 } elsif ( $_ =~ /^(\d+)\s+(\d+)\s*([\-A-Z]+)\s*(\d+)\s*$/ ){ | |
275 my $name=$1.".".$num; | |
276 my $start=$2; | |
277 my $seq=$3; | |
278 my $stop=$4; | |
279 $seq =~ s/-/\./g; | |
280 $start =~ s/ //g; | |
281 $stop =~ s/ //g; | |
282 $sequence{$name} .= $seq; | |
283 if ($first{$name} eq ''){$first{$name}=$start;} | |
284 if ($stop ne ''){$last{$name}=$stop;} | |
285 $num++; | |
286 } | |
287 } | |
288 my $align = new Bio::SimpleAlign(); | |
289 my @keys=sort keys(%sequence); | |
290 foreach my $name (@keys){ | |
291 my $nse = $name."/".$first{$name}."-".$last{$name}; | |
292 my $seqobj = Bio::LocatableSeq->new( -seq => $sequence{$name}, | |
293 -id => $name, | |
294 -name => $nse, | |
295 -start => $first{$name}, | |
296 -end => $last{$name} | |
297 ); | |
298 | |
299 $align->add_seq($seqobj); | |
300 } | |
301 return $align; | |
302 } | |
303 | |
304 # Start of internal subroutines. | |
305 | |
306 sub _parseHeader { | |
307 my ($self) = @_; | |
308 my (@old_hits, @new_hits); | |
309 | |
310 my $newhits_true = ($self->{'ROUND'} < 2) ? 1 : 0 ; | |
311 while(defined($_ = $self->_readline()) ) { | |
312 if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/) { | |
313 my $id = $1; | |
314 my $score= $2; #not used currently | |
315 my $evalue= $3; #not used currently | |
316 if ($newhits_true) { push ( @new_hits, $id);} | |
317 else { push (@old_hits, $id);} | |
318 } | |
319 elsif ($_ =~ /^Sequences not found previously/) {$newhits_true = 1 ;} | |
320 # This is changed for "-m 6" option /AE | |
321 elsif ($_ =~ /^>/ || $_ =~ /^QUERY/) | |
322 { | |
323 $self->_pushback($_); | |
324 $self->{'OLDHITS'} = \@old_hits; | |
325 $self->{'NEWHITS'} = \@new_hits; | |
326 return 1; | |
327 } | |
328 elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) { | |
329 $self->_pushback($_); | |
330 return 0; # no sequences found in this iteration | |
331 } | |
332 } | |
333 return 0; # no sequences found in this iteration | |
334 } | |
335 | |
336 sub _fastForward { | |
337 my ($self) = @_; | |
338 return 0 if $self->{'REPORT_DONE'}; # empty report | |
339 | |
340 while(defined($_ = $self->_readline()) ) { | |
341 if( $_ =~ /^>/ || | |
342 $_ =~ /^QUERY|^\d+ .* \d+$/ ) { # Changed to also handle "-m 6" /AE | |
343 $self->_pushback($_); | |
344 return 1; | |
345 } | |
346 # print "FASTFORWARD",$_,"\n"; | |
347 if ($_ =~ /^>|^Parameters|^\s+Database:/) { | |
348 $self->_pushback($_); | |
349 return 1; | |
350 } | |
351 } | |
352 $self->warn("Possible error (2) while parsing BLAST report!"); | |
353 } | |
354 | |
355 | |
356 =head2 _readline | |
357 | |
358 Title : _readline | |
359 Usage : $obj->_readline | |
360 Function: Reads a line of input. | |
361 | |
362 Note that this method implicitely uses the value of $/ that is | |
363 in effect when called. | |
364 | |
365 Note also that the current implementation does not handle pushed | |
366 back input correctly unless the pushed back input ends with the | |
367 value of $/. | |
368 Example : | |
369 Returns : | |
370 | |
371 =cut | |
372 | |
373 sub _readline{ | |
374 my ($self) = @_; | |
375 return $self->{'PARENT'}->_readline(); | |
376 } | |
377 | |
378 =head2 _pushback | |
379 | |
380 Title : _pushback | |
381 Usage : $obj->_pushback($newvalue) | |
382 Function: puts a line previously read with _readline back into a buffer | |
383 Example : | |
384 Returns : | |
385 Args : newvalue | |
386 | |
387 =cut | |
388 | |
389 sub _pushback { | |
390 my ($self, $arg) = @_; | |
391 return $self->{'PARENT'}->_pushback($arg); | |
392 } | |
393 1; | |
394 __END__ |