Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/WebDBSeqI.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: WebDBSeqI.pm,v 1.30.2.1 2003/06/12 09:29:38 heikki Exp $ | |
2 # | |
3 # BioPerl module for Bio::DB::WebDBSeqI | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 # | |
13 | |
14 =head1 NAME | |
15 | |
16 Bio::DB::WebDBSeqI - Object Interface to generalize Web Databases | |
17 for retrieving sequences | |
18 | |
19 =head1 SYNOPSIS | |
20 | |
21 # get a WebDBSeqI object somehow | |
22 # assuming it is a nucleotide db | |
23 my $seq = $db->get_Seq_by_id('ROA1_HUMAN') | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 | |
28 | |
29 | |
30 Provides core set of functionality for connecting to a web based | |
31 database for retriving sequences. | |
32 | |
33 Users wishing to add another Web Based Sequence Dabatase will need to | |
34 extend this class (see Bio::DB::SwissProt or Bio::DB::NCBIHelper for | |
35 examples) and implement the get_request method which returns a | |
36 HTTP::Request for the specified uids (accessions, ids, etc depending | |
37 on what query types the database accepts). | |
38 | |
39 | |
40 | |
41 =head1 FEEDBACK | |
42 | |
43 =head2 Mailing Lists | |
44 | |
45 User feedback is an integral part of the | |
46 evolution of this and other Bioperl modules. Send | |
47 your comments and suggestions preferably to one | |
48 of the Bioperl mailing lists. Your participation | |
49 is much appreciated. | |
50 | |
51 bioperl-l@bioperl.org - General discussion | |
52 http://bioperl.org/MailList.shtml - About the mailing lists | |
53 | |
54 =head2 Reporting Bugs | |
55 | |
56 Report bugs to the Bioperl bug tracking system to | |
57 help us keep track the bugs and their resolution. | |
58 Bug reports can be submitted via email or the | |
59 web: | |
60 | |
61 bioperl-bugs@bio.perl.org | |
62 http://bugzilla.bioperl.org/ | |
63 | |
64 =head1 AUTHOR - Jason Stajich | |
65 | |
66 Email E<lt> jason@bioperl.org E<gt> | |
67 | |
68 =head1 APPENDIX | |
69 | |
70 The rest of the documentation details each of the | |
71 object methods. Internal methods are usually | |
72 preceded with a _ | |
73 | |
74 =cut | |
75 | |
76 # Let the code begin... | |
77 | |
78 package Bio::DB::WebDBSeqI; | |
79 use strict; | |
80 use vars qw(@ISA $MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE | |
81 $DEFAULTFORMAT $LAST_INVOCATION_TIME); | |
82 | |
83 use Bio::DB::RandomAccessI; | |
84 use Bio::SeqIO; | |
85 use Bio::Root::IO; | |
86 use LWP::UserAgent; | |
87 use HTTP::Request::Common; | |
88 use HTTP::Response; | |
89 use File::Spec; | |
90 use IO::String; | |
91 use Bio::Root::Root; | |
92 | |
93 @ISA = qw(Bio::DB::RandomAccessI); | |
94 | |
95 BEGIN { | |
96 $MODVERSION = '0.8'; | |
97 %RETRIEVAL_TYPES = ( 'io_string' => 1, | |
98 'tempfile' => 1, | |
99 'pipeline' => 1, | |
100 ); | |
101 $DEFAULT_RETRIEVAL_TYPE = 'pipeline'; | |
102 $DEFAULTFORMAT = 'fasta'; | |
103 $LAST_INVOCATION_TIME = 0; | |
104 } | |
105 | |
106 sub new { | |
107 my ($class, @args) = @_; | |
108 my $self = $class->SUPER::new(@args); | |
109 my ($baseaddress, $params, $ret_type, $format,$delay,$db) = | |
110 $self->_rearrange([qw(BASEADDRESS PARAMS RETRIEVALTYPE FORMAT DELAY DB)], | |
111 @args); | |
112 | |
113 $ret_type = $DEFAULT_RETRIEVAL_TYPE unless ( $ret_type); | |
114 $baseaddress && $self->url_base_address($baseaddress); | |
115 $params && $self->url_params($params); | |
116 $db && $self->db($db); | |
117 $ret_type && $self->retrieval_type($ret_type); | |
118 $delay = $self->delay_policy unless defined $delay; | |
119 $self->delay($delay); | |
120 | |
121 # insure we always have a default format set for retrieval | |
122 # even though this will be immedietly overwritten by most sub classes | |
123 $format = $self->default_format unless ( defined $format && | |
124 $format ne '' ); | |
125 | |
126 $self->request_format($format); | |
127 my $ua = new LWP::UserAgent; | |
128 $ua->agent(ref($self) ."/$MODVERSION"); | |
129 $self->ua($ua); | |
130 $self->{'_authentication'} = []; | |
131 return $self; | |
132 } | |
133 | |
134 # from Bio::DB::RandomAccessI | |
135 | |
136 =head2 get_Seq_by_id | |
137 | |
138 Title : get_Seq_by_id | |
139 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') | |
140 Function: Gets a Bio::Seq object by its name | |
141 Returns : a Bio::Seq object | |
142 Args : the id (as a string) of a sequence | |
143 Throws : "id does not exist" exception | |
144 | |
145 | |
146 =cut | |
147 | |
148 sub get_Seq_by_id { | |
149 my ($self,$seqid) = @_; | |
150 $self->_sleep; | |
151 my $seqio = $self->get_Stream_by_id([$seqid]); | |
152 $self->throw("id does not exist") if( !defined $seqio ) ; | |
153 my @seqs; | |
154 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } | |
155 $self->throw("id does not exist") unless @seqs; | |
156 if( wantarray ) { return @seqs } else { return shift @seqs } | |
157 } | |
158 | |
159 =head2 get_Seq_by_acc | |
160 | |
161 Title : get_Seq_by_acc | |
162 Usage : $seq = $db->get_Seq_by_acc('X77802'); | |
163 Function: Gets a Bio::Seq object by accession number | |
164 Returns : A Bio::Seq object | |
165 Args : accession number (as a string) | |
166 Throws : "acc does not exist" exception | |
167 | |
168 =cut | |
169 | |
170 sub get_Seq_by_acc { | |
171 my ($self,$seqid) = @_; | |
172 $self->_sleep; | |
173 my $seqio = $self->get_Stream_by_acc($seqid); | |
174 $self->throw("acc does not exist") if( ! defined $seqio ); | |
175 my @seqs; | |
176 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } | |
177 $self->throw("acc does not exist") unless @seqs; | |
178 if( wantarray ) { return @seqs } else { return shift @seqs } | |
179 } | |
180 | |
181 | |
182 =head2 get_Seq_by_gi | |
183 | |
184 Title : get_Seq_by_gi | |
185 Usage : $seq = $db->get_Seq_by_gi('405830'); | |
186 Function: Gets a Bio::Seq object by gi number | |
187 Returns : A Bio::Seq object | |
188 Args : gi number (as a string) | |
189 Throws : "gi does not exist" exception | |
190 | |
191 =cut | |
192 | |
193 sub get_Seq_by_gi { | |
194 my ($self,$seqid) = @_; | |
195 $self->_sleep; | |
196 my $seqio = $self->get_Stream_by_gi($seqid); | |
197 $self->throw("gi does not exist") if( !defined $seqio ); | |
198 my @seqs; | |
199 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } | |
200 $self->throw("gi does not exist") unless @seqs; | |
201 if( wantarray ) { return @seqs } else { return shift @seqs } | |
202 } | |
203 | |
204 =head2 get_Seq_by_version | |
205 | |
206 Title : get_Seq_by_version | |
207 Usage : $seq = $db->get_Seq_by_version('X77802.1'); | |
208 Function: Gets a Bio::Seq object by sequence version | |
209 Returns : A Bio::Seq object | |
210 Args : accession.version (as a string) | |
211 Throws : "acc.version does not exist" exception | |
212 | |
213 =cut | |
214 | |
215 sub get_Seq_by_version { | |
216 my ($self,$seqid) = @_; | |
217 $self->_sleep; | |
218 my $seqio = $self->get_Stream_by_version($seqid); | |
219 $self->throw("accession.version does not exist") if( !defined $seqio ); | |
220 my @seqs; | |
221 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } | |
222 $self->throw("accession.version does not exist") unless @seqs; | |
223 if( wantarray ) { return @seqs } else { return shift @seqs } | |
224 } | |
225 | |
226 # implementing class must define these | |
227 | |
228 =head2 get_request | |
229 | |
230 Title : get_request | |
231 Usage : my $url = $self->get_request | |
232 Function: returns a HTTP::Request object | |
233 Returns : | |
234 Args : %qualifiers = a hash of qualifiers (ids, format, etc) | |
235 | |
236 =cut | |
237 | |
238 sub get_request { | |
239 my ($self) = @_; | |
240 my $msg = "Implementing class must define method get_request in class WebDBSeqI"; | |
241 $self->throw($msg); | |
242 } | |
243 | |
244 # class methods | |
245 | |
246 =head2 get_Stream_by_id | |
247 | |
248 Title : get_Stream_by_id | |
249 Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] ); | |
250 Function: Gets a series of Seq objects by unique identifiers | |
251 Returns : a Bio::SeqIO stream object | |
252 Args : $ref : a reference to an array of unique identifiers for | |
253 the desired sequence entries | |
254 | |
255 | |
256 =cut | |
257 | |
258 sub get_Stream_by_id { | |
259 my ($self, $ids) = @_; | |
260 my ($webfmt,$localfmt) = $self->request_format; | |
261 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single', | |
262 '-format' => $webfmt); | |
263 } | |
264 | |
265 *get_Stream_by_batch = sub { | |
266 my $self = shift; | |
267 $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead'); | |
268 $self->get_Stream_by_id(@_) | |
269 }; | |
270 | |
271 | |
272 =head2 get_Stream_by_acc | |
273 | |
274 Title : get_Stream_by_acc | |
275 Usage : $seq = $db->get_Stream_by_acc([$acc1, $acc2]); | |
276 Function: Gets a series of Seq objects by accession numbers | |
277 Returns : a Bio::SeqIO stream object | |
278 Args : $ref : a reference to an array of accession numbers for | |
279 the desired sequence entries | |
280 Note : For GenBank, this just calls the same code for get_Stream_by_id() | |
281 | |
282 =cut | |
283 | |
284 sub get_Stream_by_acc { | |
285 my ($self, $ids ) = @_; | |
286 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single'); | |
287 } | |
288 | |
289 | |
290 =head2 get_Stream_by_gi | |
291 | |
292 Title : get_Stream_by_gi | |
293 Usage : $seq = $db->get_Stream_by_gi([$gi1, $gi2]); | |
294 Function: Gets a series of Seq objects by gi numbers | |
295 Returns : a Bio::SeqIO stream object | |
296 Args : $ref : a reference to an array of gi numbers for | |
297 the desired sequence entries | |
298 Note : For GenBank, this just calls the same code for get_Stream_by_id() | |
299 | |
300 =cut | |
301 | |
302 sub get_Stream_by_gi { | |
303 my ($self, $ids ) = @_; | |
304 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'gi'); | |
305 } | |
306 | |
307 =head2 get_Stream_by_version | |
308 | |
309 Title : get_Stream_by_version | |
310 Usage : $seq = $db->get_Stream_by_version([$version1, $version2]); | |
311 Function: Gets a series of Seq objects by accession.versions | |
312 Returns : a Bio::SeqIO stream object | |
313 Args : $ref : a reference to an array of accession.version strings for | |
314 the desired sequence entries | |
315 Note : For GenBank, this is implemeted in NCBIHelper | |
316 | |
317 =cut | |
318 | |
319 sub get_Stream_by_version { | |
320 my ($self, $ids ) = @_; | |
321 # $self->throw("Implementing class should define this method!"); | |
322 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'version'); # how it should work | |
323 } | |
324 | |
325 =head2 get_Stream_by_query | |
326 | |
327 Title : get_Stream_by_query | |
328 Usage : $stream = $db->get_Stream_by_query($query); | |
329 Function: Gets a series of Seq objects by way of a query string or oject | |
330 Returns : a Bio::SeqIO stream object | |
331 Args : $query : A string that uses the appropriate query language | |
332 for the database or a Bio::DB::QueryI object. It is suggested | |
333 that you create the Bio::DB::Query object first and interrogate | |
334 it for the entry count before you fetch a potentially large stream. | |
335 | |
336 =cut | |
337 | |
338 sub get_Stream_by_query { | |
339 my ($self, $query ) = @_; | |
340 return $self->get_seq_stream('-query' => $query, '-mode'=>'query'); | |
341 } | |
342 | |
343 =head2 default_format | |
344 | |
345 Title : default_format | |
346 Usage : my $format = $self->default_format | |
347 Function: Returns default sequence format for this module | |
348 Returns : string | |
349 Args : none | |
350 | |
351 =cut | |
352 | |
353 sub default_format { | |
354 return $DEFAULTFORMAT; | |
355 } | |
356 | |
357 # sorry, but this is hacked in because of BioFetch problems... | |
358 sub db { | |
359 my $self = shift; | |
360 my $d = $self->{_db}; | |
361 $self->{_db} = shift if @_; | |
362 $d; | |
363 } | |
364 | |
365 =head2 request_format | |
366 | |
367 Title : request_format | |
368 Usage : my ($req_format, $ioformat) = $self->request_format; | |
369 $self->request_format("genbank"); | |
370 $self->request_format("fasta"); | |
371 Function: Get/Set sequence format retrieval. The get-form will normally not | |
372 be used outside of this and derived modules. | |
373 Returns : Array of two strings, the first representing the format for | |
374 retrieval, and the second specifying the corresponding SeqIO format. | |
375 Args : $format = sequence format | |
376 | |
377 =cut | |
378 | |
379 sub request_format { | |
380 my ($self, $value) = @_; | |
381 | |
382 if( defined $value ) { | |
383 $self->{'_format'} = [ $value, $value]; | |
384 } | |
385 return @{$self->{'_format'}}; | |
386 } | |
387 | |
388 =head2 get_seq_stream | |
389 | |
390 Title : get_seq_stream | |
391 Usage : my $seqio = $self->get_seq_sream(%qualifiers) | |
392 Function: builds a url and queries a web db | |
393 Returns : a Bio::SeqIO stream capable of producing sequence | |
394 Args : %qualifiers = a hash qualifiers that the implementing class | |
395 will process to make a url suitable for web querying | |
396 | |
397 =cut | |
398 | |
399 sub get_seq_stream { | |
400 my ($self, %qualifiers) = @_; | |
401 my ($rformat, $ioformat) = $self->request_format(); | |
402 my $seen = 0; | |
403 foreach my $key ( keys %qualifiers ) { | |
404 if( $key =~ /format/i ) { | |
405 $rformat = $qualifiers{$key}; | |
406 $seen = 1; | |
407 } | |
408 } | |
409 $qualifiers{'-format'} = $rformat if( !$seen); | |
410 ($rformat, $ioformat) = $self->request_format($rformat); | |
411 | |
412 my $request = $self->get_request(%qualifiers); | |
413 $request->proxy_authorization_basic($self->authentication) | |
414 if ( $self->authentication); | |
415 $self->debug("request is ". $request->as_string(). "\n"); | |
416 | |
417 # workaround for MSWin systems | |
418 $self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/; | |
419 | |
420 if ($self->retrieval_type =~ /pipeline/) { | |
421 # Try to create a stream using POSIX fork-and-pipe facility. | |
422 # this is a *big* win when fetching thousands of sequences from | |
423 # a web database because we can return the first entry while | |
424 # transmission is still in progress. | |
425 # Also, no need to keep sequence in memory or in a temporary file. | |
426 # If this fails (Windows, MacOS 9), we fall back to non-pipelined access. | |
427 | |
428 # fork and pipe: _stream_request()=><STREAM> | |
429 my $result = eval { open(STREAM,"-|") }; | |
430 | |
431 if (defined $result) { | |
432 $DB::fork_TTY = '/dev/null'; # prevents complaints from debugger | |
433 if (!$result) { # in child process | |
434 $self->_stream_request($request); | |
435 kill 9=>$$; # to prevent END{} blocks from executing in forked children | |
436 exit 0; | |
437 } | |
438 else { | |
439 return Bio::SeqIO->new('-verbose' => $self->verbose, | |
440 '-format' => $ioformat, | |
441 '-fh' => \*STREAM); | |
442 } | |
443 } | |
444 else { | |
445 $self->retrieval_type('io_string'); | |
446 } | |
447 } | |
448 | |
449 if ($self->retrieval_type =~ /temp/i) { | |
450 my $dir = $self->io->tempdir( CLEANUP => 1); | |
451 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir ); | |
452 close $fh; | |
453 my $resp = $self->_request($request, $tmpfile); | |
454 if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) { | |
455 $self->throw("WebDBSeqI Error - check query sequences!\n"); | |
456 } | |
457 $self->postprocess_data('type' => 'file', | |
458 'location' => $tmpfile); | |
459 # this may get reset when requesting batch mode | |
460 ($rformat,$ioformat) = $self->request_format(); | |
461 if( $self->verbose > 0 ) { | |
462 open(ERR, "<$tmpfile"); | |
463 while(<ERR>) { $self->debug($_);} | |
464 } | |
465 | |
466 return Bio::SeqIO->new('-verbose' => $self->verbose, | |
467 '-format' => $ioformat, | |
468 '-file' => $tmpfile); | |
469 } | |
470 | |
471 if ($self->retrieval_type =~ /io_string/i ) { | |
472 my $resp = $self->_request($request); | |
473 my $content = $resp->content_ref; | |
474 $self->debug( "content is $$content\n"); | |
475 if (!$resp->is_success() || length($$content) == 0) { | |
476 $self->throw("WebDBSeqI Error - check query sequences!\n"); | |
477 } | |
478 ($rformat,$ioformat) = $self->request_format(); | |
479 $self->postprocess_data('type'=> 'string', | |
480 'location' => $content); | |
481 $self->debug( "str is $$content\n"); | |
482 return Bio::SeqIO->new('-verbose' => $self->verbose, | |
483 '-format' => $ioformat, | |
484 '-fh' => new IO::String($$content)); | |
485 } | |
486 | |
487 # if we got here, we don't know how to handle the retrieval type | |
488 $self->throw("retrieval type " . $self->retrieval_type . | |
489 " unsupported\n"); | |
490 } | |
491 | |
492 =head2 url_base_address | |
493 | |
494 Title : url_base_address | |
495 Usage : my $address = $self->url_base_address or | |
496 $self->url_base_address($address) | |
497 Function: Get/Set the base URL for the Web Database | |
498 Returns : Base URL for the Web Database | |
499 Args : $address - URL for the WebDatabase | |
500 | |
501 =cut | |
502 | |
503 sub url_base_address { | |
504 my $self = shift; | |
505 my $d = $self->{'_baseaddress'}; | |
506 $self->{'_baseaddress'} = shift if @_; | |
507 $d; | |
508 } | |
509 | |
510 | |
511 =head2 proxy | |
512 | |
513 Title : proxy | |
514 Usage : $httpproxy = $db->proxy('http') or | |
515 $db->proxy(['http','ftp'], 'http://myproxy' ) | |
516 Function: Get/Set a proxy for use of proxy | |
517 Returns : a string indicating the proxy | |
518 Args : $protocol : an array ref of the protocol(s) to set/get | |
519 $proxyurl : url of the proxy to use for the specified protocol | |
520 $username : username (if proxy requires authentication) | |
521 $password : password (if proxy requires authentication) | |
522 | |
523 =cut | |
524 | |
525 sub proxy { | |
526 my ($self,$protocol,$proxy,$username,$password) = @_; | |
527 return undef if ( !defined $self->ua || !defined $protocol | |
528 || !defined $proxy ); | |
529 $self->authentication($username, $password) | |
530 if ($username && $password); | |
531 return $self->ua->proxy($protocol,$proxy); | |
532 } | |
533 | |
534 =head2 authentication | |
535 | |
536 Title : authentication | |
537 Usage : $db->authentication($user,$pass) | |
538 Function: Get/Set authentication credentials | |
539 Returns : Array of user/pass | |
540 Args : Array or user/pass | |
541 | |
542 | |
543 =cut | |
544 | |
545 sub authentication{ | |
546 my ($self,$u,$p) = @_; | |
547 | |
548 if( defined $u && defined $p ) { | |
549 $self->{'_authentication'} = [ $u,$p]; | |
550 } | |
551 return @{$self->{'_authentication'}}; | |
552 } | |
553 | |
554 | |
555 =head2 retrieval_type | |
556 | |
557 Title : retrieval_type | |
558 Usage : $self->retrieval_type($type); | |
559 my $type = $self->retrieval_type | |
560 Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile) | |
561 Returns : string representing retrieval type | |
562 Args : $value - the value to store | |
563 | |
564 This setting affects how the data stream from the remote web server is | |
565 processed and passed to the Bio::SeqIO layer. Three types of retrieval | |
566 types are currently allowed: | |
567 | |
568 pipeline Perform a fork in an attempt to begin streaming | |
569 while the data is still downloading from the remote | |
570 server. Disk, memory and speed efficient, but will | |
571 not work on Windows or MacOS 9 platforms. | |
572 | |
573 io_string Store downloaded database entry(s) in memory. Can be | |
574 problematic for batch downloads because entire set | |
575 of entries must fit in memory. Alll entries must be | |
576 downloaded before processing can begin. | |
577 | |
578 tempfile Store downloaded database entry(s) in a temporary file. | |
579 All entries must be downloaded before processing can | |
580 begin. | |
581 | |
582 The default is pipeline, with automatic fallback to io_string if | |
583 pipelining is not available. | |
584 | |
585 =cut | |
586 | |
587 sub retrieval_type { | |
588 my ($self, $value) = @_; | |
589 if( defined $value ) { | |
590 $value = lc $value; | |
591 if( ! $RETRIEVAL_TYPES{$value} ) { | |
592 $self->warn("invalid retrieval type $value must be one of (" . | |
593 join(",", keys %RETRIEVAL_TYPES), ")"); | |
594 $value = $DEFAULT_RETRIEVAL_TYPE; | |
595 } | |
596 $self->{'_retrieval_type'} = $value; | |
597 } | |
598 return $self->{'_retrieval_type'}; | |
599 } | |
600 | |
601 =head2 url_params | |
602 | |
603 Title : url_params | |
604 Usage : my $params = $self->url_params or | |
605 $self->url_params($params) | |
606 Function: Get/Set the URL parameters for the Web Database | |
607 Returns : url parameters for Web Database | |
608 Args : $params - parameters to be appended to the URL for the WebDatabase | |
609 | |
610 =cut | |
611 | |
612 sub url_params { | |
613 my ($self, $value) = @_; | |
614 if( defined $value ) { | |
615 $self->{'_urlparams'} = $value; | |
616 } | |
617 } | |
618 | |
619 =head2 ua | |
620 | |
621 Title : ua | |
622 Usage : my $ua = $self->ua or | |
623 $self->ua($ua) | |
624 Function: Get/Set a LWP::UserAgent for use | |
625 Returns : reference to LWP::UserAgent Object | |
626 Args : $ua - must be a LWP::UserAgent | |
627 | |
628 =cut | |
629 | |
630 sub ua { | |
631 my ($self, $ua) = @_; | |
632 if( defined $ua && $ua->isa("LWP::UserAgent") ) { | |
633 $self->{'_ua'} = $ua; | |
634 } | |
635 return $self->{'_ua'}; | |
636 } | |
637 | |
638 =head2 postprocess_data | |
639 | |
640 Title : postprocess_data | |
641 Usage : $self->postprocess_data ( 'type' => 'string', | |
642 'location' => \$datastr); | |
643 Function: process downloaded data before loading into a Bio::SeqIO | |
644 Returns : void | |
645 Args : hash with two keys - 'type' can be 'string' or 'file' | |
646 - 'location' either file location or string | |
647 reference containing data | |
648 | |
649 =cut | |
650 | |
651 sub postprocess_data { | |
652 my ( $self, %args) = @_; | |
653 return; | |
654 } | |
655 | |
656 # private methods | |
657 sub _request { | |
658 | |
659 my ($self, $url,$tmpfile) = @_; | |
660 my ($resp); | |
661 if( defined $tmpfile && $tmpfile ne '' ) { | |
662 $resp = $self->ua->request($url, $tmpfile); | |
663 } else { $resp = $self->ua->request($url); } | |
664 | |
665 if( $resp->is_error ) { | |
666 $self->throw("WebDBSeqI Request Error:\n".$resp->as_string); | |
667 } | |
668 return $resp; | |
669 } | |
670 | |
671 # send web request to stdout for streaming purposes | |
672 sub _stream_request { | |
673 my $self = shift; | |
674 my $request = shift; | |
675 | |
676 # fork so as to pipe output of fetch process through to | |
677 # postprocess_data method call. | |
678 my $child = open (FETCH,"-|"); | |
679 $self->throw("Couldn't fork: $!") unless defined $child; | |
680 | |
681 if ($child) { | |
682 local ($/) = "//\n"; # assume genbank/swiss format | |
683 $| = 1; | |
684 my $records = 0; | |
685 while (my $record = <FETCH>) { | |
686 $records++; | |
687 $self->postprocess_data('type' => 'string', | |
688 'location' => \$record); | |
689 print STDOUT $record; | |
690 } | |
691 $/ = "\n"; # reset to be safe; | |
692 close(FETCH); | |
693 close STDOUT; | |
694 close STDERR; | |
695 kill 9=>$$; # to prevent END{} blocks from executing in forked children | |
696 sleep; | |
697 } | |
698 else { | |
699 $| = 1; | |
700 my $resp = $self->ua->request($request, | |
701 sub { print shift } | |
702 ); | |
703 if( $resp->is_error ) { | |
704 $self->throw("WebDBSeqI Request Error:\n".$resp->as_string); | |
705 } | |
706 | |
707 close STDOUT; close STDERR; | |
708 kill 9=>$$; # to prevent END{} blocks from executing in forked children | |
709 sleep; | |
710 } | |
711 exit 0; | |
712 } | |
713 | |
714 sub io { | |
715 my ($self,$io) = @_; | |
716 | |
717 if(defined($io) || (! exists($self->{'_io'}))) { | |
718 $io = Bio::Root::IO->new() unless $io; | |
719 $self->{'_io'} = $io; | |
720 } | |
721 return $self->{'_io'}; | |
722 } | |
723 | |
724 | |
725 =head2 delay | |
726 | |
727 Title : delay | |
728 Usage : $secs = $self->delay([$secs]) | |
729 Function: get/set number of seconds to delay between fetches | |
730 Returns : number of seconds to delay | |
731 Args : new value | |
732 | |
733 NOTE: the default is to use the value specified by delay_policy(). | |
734 This can be overridden by calling this method, or by passing the | |
735 -delay argument to new(). | |
736 | |
737 =cut | |
738 | |
739 sub delay { | |
740 my $self = shift; | |
741 my $d = $self->{'_delay'}; | |
742 $self->{'_delay'} = shift if @_; | |
743 $d; | |
744 } | |
745 | |
746 =head2 delay_policy | |
747 | |
748 Title : delay_policy | |
749 Usage : $secs = $self->delay_policy | |
750 Function: return number of seconds to delay between calls to remote db | |
751 Returns : number of seconds to delay | |
752 Args : none | |
753 | |
754 NOTE: The default delay policy is 0s. Override in subclasses to | |
755 implement delays. The timer has only second resolution, so the delay | |
756 will actually be +/- 1s. | |
757 | |
758 =cut | |
759 | |
760 sub delay_policy { | |
761 my $self = shift; | |
762 return 0; | |
763 } | |
764 | |
765 =head2 _sleep | |
766 | |
767 Title : _sleep | |
768 Usage : $self->_sleep | |
769 Function: sleep for a number of seconds indicated by the delay policy | |
770 Returns : none | |
771 Args : none | |
772 | |
773 NOTE: This method keeps track of the last time it was called and only | |
774 imposes a sleep if it was called more recently than the delay_policy() | |
775 allows. | |
776 | |
777 =cut | |
778 | |
779 sub _sleep { | |
780 my $self = shift; | |
781 my $last_invocation = $LAST_INVOCATION_TIME; | |
782 if (time - $LAST_INVOCATION_TIME < $self->delay) { | |
783 my $delay = $self->delay - (time - $LAST_INVOCATION_TIME); | |
784 warn "sleeping for $delay seconds\n" if $self->verbose; | |
785 sleep $delay; | |
786 } | |
787 $LAST_INVOCATION_TIME = time; | |
788 } | |
789 | |
790 1; |