0
|
1 #
|
|
2 # $Id: DBFetch.pm,v 1.8 2002/12/22 22:02:13 lstein Exp $
|
|
3 #
|
|
4 # BioPerl module for Bio::DB::DBFetch
|
|
5 #
|
|
6 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
|
|
7 #
|
|
8 # Copyright Heikki Lehvaslaiho
|
|
9 #
|
|
10 # You may distribute this module under the same terms as perl itself
|
|
11
|
|
12 # POD documentation - main docs before the code
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 #do not use this module directly
|
|
21
|
|
22 =head1 DESCRIPTION
|
|
23
|
|
24 Allows the dynamic retrieval of entries from databases using the
|
|
25 dbfetch script at EBI:
|
|
26 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
|
|
27
|
|
28 In order to make changes transparent we have host type (currently only
|
|
29 ebi) and location (defaults to ebi) separated out. This allows later
|
|
30 additions of more servers in different geographical locations.
|
|
31
|
|
32 This is a superclass which is called by instantiable subclasses with
|
|
33 correct parameters.
|
|
34
|
|
35 =head1 FEEDBACK
|
|
36
|
|
37 =head2 Mailing Lists
|
|
38
|
|
39 User feedback is an integral part of the evolution of this and other
|
|
40 Bioperl modules. Send your comments and suggestions preferably to one
|
|
41 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
42
|
|
43 bioperl-l@bioperl.org - General discussion
|
|
44 http://bio.perl.org/MailList.html - About the mailing lists
|
|
45
|
|
46 =head2 Reporting Bugs
|
|
47
|
|
48 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
49 the bugs and their resolution. Bug reports can be submitted via email
|
|
50 or the web:
|
|
51
|
|
52 bioperl-bugs@bio.perl.org
|
|
53 http://bugzilla.bioperl.org/
|
|
54
|
|
55 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
56
|
|
57 Email Heikki Lehvaslaiho E<lt>Heikki@ebi.ac.ukE<gt>
|
|
58
|
|
59 =head1 APPENDIX
|
|
60
|
|
61 The rest of the documentation details each of the object
|
|
62 methods. Internal methods are usually preceded with a _
|
|
63
|
|
64 =cut
|
|
65
|
|
66 # Let the code begin...
|
|
67
|
|
68 package Bio::DB::DBFetch;
|
|
69 use strict;
|
|
70 use vars qw(@ISA $MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
|
|
71 $DEFAULTSERVERTYPE);
|
|
72
|
|
73 $MODVERSION = '0.1';
|
|
74 use HTTP::Request::Common;
|
|
75 use Bio::DB::WebDBSeqI;
|
|
76
|
|
77 @ISA = qw(Bio::DB::WebDBSeqI);
|
|
78
|
|
79 # the new way to make modules a little more lightweight
|
|
80
|
|
81 BEGIN {
|
|
82 # global vars
|
|
83 $DEFAULTSERVERTYPE = 'dbfetch';
|
|
84 $DEFAULTLOCATION = 'ebi';
|
|
85 }
|
|
86
|
|
87
|
|
88 =head1 Routines from Bio::DB::WebDBSeqI
|
|
89
|
|
90 =head2 get_request
|
|
91
|
|
92 Title : get_request
|
|
93 Usage : my $url = $self->get_request
|
|
94 Function: returns a HTTP::Request object
|
|
95 Returns :
|
|
96 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
|
|
97
|
|
98 =cut
|
|
99
|
|
100 sub get_request {
|
|
101 my ($self, @qualifiers) = @_;
|
|
102 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
|
|
103 @qualifiers);
|
|
104
|
|
105 $self->throw("Must specify a value for UIDs to fetch")
|
|
106 unless defined $uids;
|
|
107 my $tmp;
|
|
108 my $format_string = '';
|
|
109 $format ||= $self->default_format;
|
|
110 ($format, $tmp) = $self->request_format($format);
|
|
111 $format_string = "&format=$format" if $format ne $self->default_format;
|
|
112 my $url = $self->location_url();
|
|
113 my $uid;
|
|
114 if( ref($uids) =~ /ARRAY/i ) {
|
|
115 $uid = join (',', @$uids);
|
|
116 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
|
|
117 if scalar @$uids >50;
|
|
118 } else {
|
|
119 $uid = $uids;
|
|
120 }
|
|
121
|
|
122 return GET $url. $format_string. '&id='. $uid;
|
|
123 }
|
|
124
|
|
125
|
|
126 =head2 postprocess_data
|
|
127
|
|
128 Title : postprocess_data
|
|
129 Usage : $self->postprocess_data ( 'type' => 'string',
|
|
130 'location' => \$datastr);
|
|
131 Function: process downloaded data before loading into a Bio::SeqIO
|
|
132 Returns : void
|
|
133 Args : hash with two keys - 'type' can be 'string' or 'file'
|
|
134 - 'location' either file location or string
|
|
135 reference containing data
|
|
136
|
|
137 =cut
|
|
138
|
|
139 # remove occasional blank lines at top of web output
|
|
140 sub postprocess_data {
|
|
141 my ($self, %args) = @_;
|
|
142 if ($args{type} eq 'string') {
|
|
143 ${$args{location}} =~ s/^\s+//; # get rid of leading whitespace
|
|
144 }
|
|
145 elsif ($args{type} eq 'file') {
|
|
146 open F,$args{location} or $self->throw("Cannot open $args{location}: $!");
|
|
147 my @data = <F>;
|
|
148 for (@data) {
|
|
149 last unless /^\s+$/;
|
|
150 shift @data;
|
|
151 }
|
|
152 open F,">$args{location}" or $self->throw("Cannot write to $args{location}: $!");
|
|
153 print F @data;
|
|
154 close F;
|
|
155 }
|
|
156 }
|
|
157
|
|
158 =head2 default_format
|
|
159
|
|
160 Title : default_format
|
|
161 Usage : my $format = $self->default_format
|
|
162 Function: Returns default sequence format for this module
|
|
163 Returns : string
|
|
164 Args : none
|
|
165
|
|
166 =cut
|
|
167
|
|
168 sub default_format {
|
|
169 my ($self) = @_;
|
|
170 return $self->{'_default_format'};
|
|
171 }
|
|
172
|
|
173 =head1 Bio::DB::DBFetch specific routines
|
|
174
|
|
175 =head2 get_Stream_by_id
|
|
176
|
|
177 Title : get_Stream_by_id
|
|
178 Usage : $seq = $db->get_Stream_by_id($ref);
|
|
179 Function: Retrieves Seq objects from the server 'en masse', rather than one
|
|
180 at a time. For large numbers of sequences, this is far superior
|
|
181 than get_Stream_by_[id/acc]().
|
|
182 Example :
|
|
183 Returns : a Bio::SeqIO stream object
|
|
184 Args : $ref : either an array reference, a filename, or a filehandle
|
|
185 from which to get the list of unique ids/accession numbers.
|
|
186
|
|
187 NOTE: for backward compatibility, this method is also called
|
|
188 get_Stream_by_batch.
|
|
189
|
|
190 =cut
|
|
191
|
|
192 sub get_Stream_by_id {
|
|
193 my ($self, $ids) = @_;
|
|
194 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
|
|
195 }
|
|
196
|
|
197 =head2 get_Seq_by_version
|
|
198
|
|
199 Title : get_Seq_by_version
|
|
200 Usage : $seq = $db->get_Seq_by_version('X77802.1');
|
|
201 Function: Gets a Bio::Seq object by accession number
|
|
202 Returns : A Bio::Seq object
|
|
203 Args : version number (as a string)
|
|
204 Throws : "version does not exist" exception
|
|
205
|
|
206 =cut
|
|
207
|
|
208 sub get_Seq_by_version {
|
|
209 my ($self,$seqid) = @_;
|
|
210 my $seqio = $self->get_Stream_by_acc([$seqid]);
|
|
211 $self->throw("version does not exist") if( !defined $seqio );
|
|
212 return $seqio->next_seq();
|
|
213 }
|
|
214
|
|
215 =head2 request_format
|
|
216
|
|
217 Title : request_format
|
|
218 Usage : my ($req_format, $ioformat) = $self->request_format;
|
|
219 $self->request_format("genbank");
|
|
220 $self->request_format("fasta");
|
|
221 Function: Get/Set sequence format retrieval. The get-form will normally not
|
|
222 be used outside of this and derived modules.
|
|
223 Returns : Array of two strings, the first representing the format for
|
|
224 retrieval, and the second specifying the corresponding SeqIO format.
|
|
225 Args : $format = sequence format
|
|
226
|
|
227 =cut
|
|
228
|
|
229 sub request_format {
|
|
230 my ($self, $value) = @_;
|
|
231 if( defined $value ) {
|
|
232 $value = lc $value;
|
|
233 $self->{'_format'} = $value;
|
|
234 return ($value, $value);
|
|
235 }
|
|
236 $value = $self->{'_format'};
|
|
237 if( $value and defined $self->formatmap->{$value} ) {
|
|
238 return ($value, $self->formatmap->{$value});
|
|
239 } else {
|
|
240 # Try to fall back to a default.
|
|
241 return ($self->default_format, $self->default_format );
|
|
242 }
|
|
243 }
|
|
244
|
|
245
|
|
246 =head2 servertype
|
|
247
|
|
248 Title : servertype
|
|
249 Usage : my $servertype = $self->servertype
|
|
250 $self->servertype($servertype);
|
|
251 Function: Get/Set server type
|
|
252 Returns : string
|
|
253 Args : server type string [optional]
|
|
254
|
|
255 =cut
|
|
256
|
|
257 sub servertype {
|
|
258 my ($self, $servertype) = @_;
|
|
259 if( defined $servertype && $servertype ne '') {
|
|
260 $self->throw("You gave an invalid server type ($servertype)".
|
|
261 " - available types are ".
|
|
262 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
|
|
263 $self->{'_servertype'} = $servertype;
|
|
264 }
|
|
265 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
|
|
266 return $self->{'_servertype'};
|
|
267 }
|
|
268
|
|
269 =head2 hostlocation
|
|
270
|
|
271 Title : hostlocation
|
|
272 Usage : my $location = $self->hostlocation()
|
|
273 $self->hostlocation($location)
|
|
274 Function: Set/Get Hostlocation
|
|
275 Returns : string representing hostlocation
|
|
276 Args : string specifying hostlocation [optional]
|
|
277
|
|
278 =cut
|
|
279
|
|
280 sub hostlocation {
|
|
281 my ($self, $location ) = @_;
|
|
282 $location = lc $location;
|
|
283 my $servertype = $self->servertype;
|
|
284 $self->throw("Must have a valid servertype defined not $servertype")
|
|
285 unless defined $servertype;
|
|
286 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
|
|
287 if( defined $location && $location ne '' ) {
|
|
288 if( ! $hosts{$location} ) {
|
|
289 $self->throw("Must specify a known host, not $location,".
|
|
290 " possible values (".
|
|
291 join(",", sort keys %hosts ). ")");
|
|
292 }
|
|
293 $self->{'_hostlocation'} = $location;
|
|
294 }
|
|
295 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
|
|
296 return $self->{'_hostlocation'};
|
|
297 }
|
|
298
|
|
299 =head2 location_url
|
|
300
|
|
301 Title : location
|
|
302 Usage : my $url = $self->location_url()
|
|
303 Function: Get host url
|
|
304 Returns : string representing url
|
|
305 Args : none
|
|
306
|
|
307 =cut
|
|
308
|
|
309 sub location_url {
|
|
310 my ($self) = @_;
|
|
311 my $servertype = $self->servertype();
|
|
312 my $location = $self->hostlocation();
|
|
313 if( ! defined $location || !defined $servertype ) {
|
|
314 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
|
|
315 }
|
|
316 return sprintf($self->hosts->{$servertype}->{'baseurl'},
|
|
317 $self->hosts->{$servertype}->{'hosts'}->{$location});
|
|
318 }
|
|
319
|
|
320 =head1 Bio::DB::DBFetch routines
|
|
321
|
|
322 These methods allow subclasses to pass parameters.
|
|
323
|
|
324 =head2 hosts
|
|
325
|
|
326 Title : hosts
|
|
327 Usage :
|
|
328 Function: get/set for host hash
|
|
329 Returns :
|
|
330 Args : optional hash
|
|
331
|
|
332 =cut
|
|
333
|
|
334 sub hosts {
|
|
335 my ($self, $value) = @_;
|
|
336 if (defined $value) {
|
|
337 $self->{'_hosts'} = $value;
|
|
338 }
|
|
339 unless (exists $self->{'_hosts'}) {
|
|
340 return ('');
|
|
341 } else {
|
|
342 return $self->{'_hosts'};
|
|
343 }
|
|
344 }
|
|
345
|
|
346 =head2 formatmap
|
|
347
|
|
348 Title : formatmap
|
|
349 Usage :
|
|
350 Function: get/set for format hash
|
|
351 Returns :
|
|
352 Args : optional hash
|
|
353
|
|
354 =cut
|
|
355
|
|
356 sub formatmap {
|
|
357 my ($self, $value) = @_;
|
|
358 if (defined $value) {
|
|
359 $self->{'_formatmap'} = $value;
|
|
360 }
|
|
361 unless (exists $self->{'_formatmap'}) {
|
|
362 return ('');
|
|
363 } else {
|
|
364 return $self->{'_formatmap'};
|
|
365 }
|
|
366 }
|
|
367
|
|
368
|
|
369 1;
|
|
370 __END__
|