comparison variant_effect_predictor/Bio/Tools/Run/RemoteBlast.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: RemoteBlast.pm,v 1.14.2.2 2003/09/03 18:29:50 jason Exp $
2 #
3 # BioPerl module for Bio::Tools::Run::RemoteBlast
4 #
5 # Cared for by Jason Stajich, Mat Wiepert
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Tools::Run::RemoteBlast - Object for remote execution of the NCBI Blast
16 via HTTP
17
18 =head1 SYNOPSIS
19
20 #Remote-blast "factory object" creation and blast-parameter initialization
21
22 use Bio::Tools::Run::RemoteBlast;
23 use strict;
24 my $prog = 'blastp';
25 my $db = 'swissprot';
26 my $e_val= '1e-10';
27
28 my @params = ( '-prog' => $prog,
29 '-data' => $db,
30 '-expect' => $e_val,
31 '-readmethod' => 'SearchIO' );
32
33 my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
34
35 #change a paramter
36 $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';
37
38 #remove a parameter
39 delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
40
41 my $v = 1;
42 #$v is just to turn on and off the messages
43
44 my $str = Bio::SeqIO->new(-file=>'amino.fa' , '-format' => 'fasta' );
45
46 while (my $input = $str->next_seq()){
47 #Blast a sequence against a database:
48
49 #Alternatively, you could pass in a file with many
50 #sequences rather than loop through sequence one at a time
51 #Remove the loop starting 'while (my $input = $str->next_seq())'
52 #and swap the two lines below for an example of that.
53 my $r = $factory->submit_blast($input);
54 #my $r = $factory->submit_blast('amino.fa');
55
56 print STDERR "waiting..." if( $v > 0 );
57 while ( my @rids = $factory->each_rid ) {
58 foreach my $rid ( @rids ) {
59 my $rc = $factory->retrieve_blast($rid);
60 if( !ref($rc) ) {
61 if( $rc < 0 ) {
62 $factory->remove_rid($rid);
63 }
64 print STDERR "." if ( $v > 0 );
65 sleep 5;
66 } else {
67 my $result = $rc->next_result();
68 #save the output
69 my $filename = $result->query_name()."\.out";
70 $factory->save_output($filename);
71 $factory->remove_rid($rid);
72 print "\nQuery Name: ", $result->query_name(), "\n";
73 while ( my $hit = $result->next_hit ) {
74 next unless ( $v > 0);
75 print "\thit name is ", $hit->name, "\n";
76 while( my $hsp = $hit->next_hsp ) {
77 print "\t\tscore is ", $hsp->score, "\n";
78 }
79 }
80 }
81 }
82 }
83 }
84
85 # This example shows how to change a CGI parameter:
86 $Bio::Tools::Run::RemoteBlast::HEADER{'MATRIX_NAME'} = 'BLOSUM25';
87
88 # And this is how to delete a CGI parameter:
89 delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
90
91
92 =head1 DESCRIPTION
93
94 Class for remote execution of the NCBI Blast via HTTP.
95
96 For a description of the many CGI parameters see:
97 http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html
98
99 Various additional options and input formats are available.
100
101 =head1 FEEDBACK
102
103 =head2 Mailing Lists
104
105 User feedback is an integral part of the evolution of this and other
106 Bioperl modules. Send your comments and suggestions preferably to one
107 of the Bioperl mailing lists. Your participation is much appreciated.
108
109 bioperl-l@bioperl.org - General discussion
110 http://bio.perl.org/MailList.html - About the mailing lists
111
112 =head2 Reporting Bugs
113
114 Report bugs to the Bioperl bug tracking system to help us keep track
115 the bugs and their resolution. Bug reports can be submitted via email
116 or the web:
117
118 bioperl-bugs@bio.perl.org
119 http://bio.perl.org/bioperl-bugs/
120
121 =head1 AUTHOR - Jason Stajich
122
123 Email jason@bioperl.org
124
125 =head1 APPENDIX
126
127 The rest of the documentation details each of the object
128 methods. Internal methods are usually preceded with a _
129
130 =cut
131
132 package Bio::Tools::Run::RemoteBlast;
133
134 use vars qw($AUTOLOAD @ISA %BLAST_PARAMS $URLBASE %HEADER %RETRIEVALHEADER
135 $RIDLINE);
136 use strict;
137
138 use Bio::Root::Root;
139 use Bio::Root::IO;
140 use Bio::SeqIO;
141 use IO::String;
142 use Bio::Tools::BPlite;
143 use Bio::SearchIO;
144 use LWP;
145 use HTTP::Request::Common;
146 BEGIN {
147 $URLBASE = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
148 %HEADER = ('CMD' => 'Put',
149 'PROGRAM' => '',
150 'DATABASE' => '',
151 'FILTER' => 'L',
152 'EXPECT' => '',
153 'QUERY' => '',
154 'CDD_SEARCH' => 'off',
155 'COMPOSITION_BASED_STATISTICS' => 'off',
156 'FORMAT_OBJECT' => 'Alignment',
157 'SERVICE' => 'plain',
158 );
159
160 %RETRIEVALHEADER = ('CMD' => 'Get',
161 'RID' => '',
162 'ALIGNMENT_VIEW' => 'Pairwise',
163 'DESCRIPTIONS' => 100,
164 'ALIGNMENTS' => 50,
165 'FORMAT_TYPE' => 'Text',
166 );
167
168 $RIDLINE = 'RID\s+=\s+(\S+)';
169
170 %BLAST_PARAMS = ( 'prog' => 'blastp',
171 'data' => 'nr',
172 'expect' => '1e-3',
173 'readmethod' => 'SearchIO'
174 );
175
176 }
177
178 @ISA = qw(Bio::Root::Root Bio::Root::IO);
179
180 sub new {
181 my ($caller, @args) = @_;
182 # chained new
183 my $self = $caller->SUPER::new(@args);
184 # so that tempfiles are cleaned up
185 $self->_initialize_io();
186 my ($prog, $data, $expect,
187 $readmethod) = $self->_rearrange([qw(PROG DATA
188 EXPECT
189 READMETHOD)],
190 @args);
191
192 $readmethod = $BLAST_PARAMS{'readmethod'} unless defined $readmethod;
193 $prog = $BLAST_PARAMS{'prog'} unless defined $prog;
194 $data = $BLAST_PARAMS{'data'} unless defined $data;
195 $expect = $BLAST_PARAMS{'expect'} unless defined $expect;
196 $self->readmethod($readmethod);
197 $self->program($prog);
198 $self->database($data);
199 $self->expect($expect);
200
201 return $self;
202 }
203
204 =head2 header
205
206 Title : header
207 Usage : my $header = $self->header
208 Function: Get/Set HTTP header for blast query
209 Returns : string
210 Args : none
211
212 =cut
213
214 sub header {
215 my ($self) = @_;
216 my %h = %HEADER;
217 $h{'PROGRAM'} = $self->program;
218 $h{'DATABASE'} = $self->database;
219 $h{'EXPECT'} = $self->expect;
220 return %h;
221 }
222
223 =head2 readmethod
224
225 Title : readmethod
226 Usage : my $readmethod = $self->readmethod
227 Function: Get/Set the method to read the blast report
228 Returns : string
229 Args : string [ Blast, BPlite ]
230
231 =cut
232
233 sub readmethod {
234 my ($self, $val) = @_;
235 if( defined $val ) {
236 $self->{'_readmethod'} = $val;
237 }
238 return $self->{'_readmethod'};
239 }
240
241
242 =head2 program
243
244 Title : program
245 Usage : my $prog = $self->program
246 Function: Get/Set the program to run
247 Returns : string
248 Args : string [ blastp, blastn, blastx, tblastn, tblastx ]
249
250 =cut
251
252 sub program {
253 my ($self, $val) = @_;
254 if( defined $val ) {
255 $val = lc $val;
256 if( $val !~ /t?blast[pnx]/ ) {
257 $self->warn("trying to set program to an invalid program name ($val) -- defaulting to blastp");
258 $val = 'blastp';
259 }
260 # $self->{'_program'} = $val;
261 $HEADER{'PROGRAM'} = $val;
262 }
263 return $HEADER{'PROGRAM'};
264 }
265
266
267 =head2 database
268
269 Title : database
270 Usage : my $db = $self->database
271 Function: Get/Set the database to search
272 Returns : string
273 Args : string [ swissprot, nr, nt, etc... ]
274
275 =cut
276
277 sub database {
278 my ($self, $val) = @_;
279 if( defined $val ) {
280 # $self->{'_database'} = $val;
281 $HEADER{'DATABASE'} = $val;
282 }
283 return $HEADER{'DATABASE'};
284 }
285
286
287 =head2 expect
288
289 Title : expect
290 Usage : my $expect = $self->expect
291 Function: Get/Set the E value cutoff
292 Returns : string
293 Args : string [ '1e-4' ]
294
295 =cut
296
297 sub expect {
298 my ($self, $val) = @_;
299 if( defined $val ) {
300 # $self->{'_expect'} = $val;
301 $HEADER{'EXPECT'} = $val;
302 }
303 return $HEADER{'EXPECT'};
304 }
305
306 =head2 ua
307
308 Title : ua
309 Usage : my $ua = $self->ua or
310 $self->ua($ua)
311 Function: Get/Set a LWP::UserAgent for use
312 Returns : reference to LWP::UserAgent Object
313 Args : none
314 Comments: Will create a UserAgent if none has been requested before.
315
316 =cut
317
318 sub ua {
319 my ($self, $value) = @_;
320 if( ! defined $self->{'_ua'} ) {
321 $self->{'_ua'} = new LWP::UserAgent;
322 }
323 return $self->{'_ua'};
324 }
325
326 =head2 proxy
327
328 Title : proxy
329 Usage : $httpproxy = $db->proxy('http') or
330 $db->proxy(['http','ftp'], 'http://myproxy' )
331 Function: Get/Set a proxy for use of proxy
332 Returns : a string indicating the proxy
333 Args : $protocol : an array ref of the protocol(s) to set/get
334 $proxyurl : url of the proxy to use for the specified protocol
335
336 =cut
337
338 sub proxy {
339 my ($self,$protocol,$proxy) = @_;
340 return undef if ( !defined $self->ua || !defined $protocol
341 || !defined $proxy );
342 return $self->ua->proxy($protocol,$proxy);
343 }
344
345 sub add_rid {
346 my ($self, @vals) = @_;
347 foreach ( @vals ) {
348 $self->{'_rids'}->{$_} = 1;
349 }
350 return scalar keys %{$self->{'_rids'}};
351 }
352
353 sub remove_rid {
354 my ($self, @vals) = @_;
355 foreach ( @vals ) {
356 delete $self->{'_rids'}->{$_};
357 }
358 return scalar keys %{$self->{'_rids'}};
359 }
360
361 sub each_rid {
362 my ($self) = @_;
363 return keys %{$self->{'_rids'}};
364 }
365
366 =head2 submit_blast
367
368 Title : submit_blast
369 Usage : $self->submit_blast([$seq1,$seq2]);
370 Function: Submit blast jobs to ncbi blast queue on sequence(s)
371 Returns : Blast report object as defined by $self->readmethod
372 Args : input can be:
373 * sequence object
374 * array ref of sequence objects
375 * filename of file containing fasta formatted sequences
376
377 =cut
378
379 sub submit_blast {
380 my ($self, $input) = @_;
381 my @seqs = $self->_load_input($input);
382 return 0 unless ( @seqs );
383 my $tcount = 0;
384 my %header = $self->header;
385 foreach my $seq ( @seqs ) {
386 #If query has a fasta header, the output has the query line.
387 $header{'QUERY'} = ">".(defined $seq->display_id() ? $seq->display_id() : "").
388 " ".(defined $seq->desc() ? $seq->desc() : "")."\n".$seq->seq();
389 my $request = POST $URLBASE, [%header];
390 $self->warn($request->as_string) if ( $self->verbose > 0);
391 my $response = $self->ua->request( $request);
392
393 if( $response->is_success ) {
394 if( $self->verbose > 0 ) {
395 my ($tempfh) = $self->tempfile();
396 # Hmm, what exactly are we trying to do here?
397 print $tempfh $response->content;
398 close($tempfh);
399 undef $tempfh;
400 }
401 my @subdata = split(/\n/, $response->content );
402 my $count = 0;
403 foreach ( @subdata ) {
404 if( /$RIDLINE/ ) {
405 $count++;
406 print STDERR $_ if( $self->verbose > 0);
407 $self->add_rid($1);
408 last;
409 }
410 }
411 if( $count == 0 ) {
412 $self->warn("req was ". $request->as_string() . "\n");
413 $self->warn(join('', @subdata));
414 }
415 $tcount += $count;
416 } else {
417 # should try and be a little more verbose here
418 $self->warn("req was ". $request->as_string() . "\n" .
419 $response->error_as_HTML);
420 $tcount = -1;
421 }
422 }
423 return $tcount;
424 }
425
426 =head2 retrieve_blast
427
428 Title : retrieve_blast
429 Usage : my $blastreport = $blastfactory->retrieve_blast($rid);
430 Function: Attempts to retrieve a blast report from remote blast queue
431 Returns : -1 on error,
432 0 on 'job not finished',
433 Bio::Tools::BPlite or Bio::Tools::Blast object
434 (depending on how object was initialized) on success
435 Args : Remote Blast ID (RID)
436
437 =cut
438
439 sub retrieve_blast {
440 my($self, $rid) = @_;
441 my (undef,$tempfile) = $self->tempfile();
442 my %hdr = %RETRIEVALHEADER;
443 $hdr{'RID'} = $rid;
444 my $req = POST $URLBASE, [%hdr];
445 if( $self->verbose > 0 ) {
446 $self->warn("retrieve request is " . $req->as_string());
447 }
448 my $response = $self->ua->request($req, $tempfile);
449 if( $self->verbose > 0 ) {
450 open(TMP, $tempfile) or $self->throw("cannot open $tempfile");
451 while(<TMP>) { print $_; }
452 close TMP;
453 }
454 if( $response->is_success ) {
455 my $size = -s $tempfile;
456 if( $size > 1000 ) {
457 my $blastobj;
458 if( $self->readmethod =~ /BPlite/ ) {
459 $blastobj = new Bio::Tools::BPlite(-file => $tempfile);
460 } else {
461 $blastobj = new Bio::SearchIO(-file => $tempfile,
462 -format => 'blast');
463 }
464 #save tempfile
465 $self->file($tempfile);
466 return $blastobj;
467 } elsif( $size < 500 ) { # search had a problem
468 open(ERR, "<$tempfile") or $self->throw("cannot open file $tempfile");
469 $self->warn(join("", <ERR>));
470 close ERR;
471 return -1;
472 } else { # still working
473 return 0;
474 }
475 } else {
476 $self->warn($response->error_as_HTML);
477 return -1;
478 }
479 }
480
481 =head2 save_output
482
483 Title : saveoutput
484 Usage : my $saveoutput = $self->save_output($filename)
485 Function: Method to save the blast report
486 Returns : 1 (throws error otherwise)
487 Args : string [rid, filename]
488
489 =cut
490
491 sub save_output {
492 my ($self, $filename) = @_;
493 if( ! defined $filename ) {
494 $self->throw("Can't save blast output. You must specify a filename to save to.");
495 }
496 #should be set when retrieving blast
497 my $blastfile = $self->file;
498 #open temp file and output file, have to filter out some HTML
499 open(TMP, $blastfile) or $self->throw("cannot open $blastfile");
500 open(SAVEOUT, ">$filename") or $self->throw("cannot open $filename");
501 my $seentop=0;
502 while(<TMP>) {
503 next if (/<pre>/);
504 if( /^(?:[T]?BLAST[NPX])\s*.+$/i ||
505 /^RPS-BLAST\s*.+$/i ) {
506 $seentop=1;
507 }
508 next if !$seentop;
509 if( $seentop ) {
510 print SAVEOUT;
511 }
512 }
513 close SAVEOUT;
514 close TMP;
515 return 1;
516 }
517
518 sub _load_input {
519 my ($self, $input) = @_;
520
521 if( ! defined $input ) {
522 $self->throw("Calling remote blast with no input");
523 }
524 my @seqs;
525 if( ! ref $input ) {
526 if( -e $input ) {
527 my $seqio = new Bio::SeqIO(-format => 'fasta', -file => $input);
528 while( my $seq = $seqio->next_seq ) {
529 push @seqs, $seq;
530 }
531 } else {
532 $self->throw("Input $input was not a valid filename");
533 }
534 } elsif( ref($input) =~ /ARRAY/i ) {
535 foreach ( @$input ) {
536 if( ref($_) && $_->isa('Bio::PrimarySeqI') ) {
537 push @seqs, $_;
538 } else {
539 $self->warn("Trying to add a " . ref($_) .
540 " but expected a Bio::PrimarySeqI");
541 }
542 }
543 if( ! @seqs) {
544 $self->throw("Did not pass in valid input -- no sequence objects found");
545 }
546 } elsif( $input->isa('Bio::PrimarySeqI') ) {
547 push @seqs, $input;
548 }
549 return @seqs;
550 }
551 1;
552 __END__