comparison variant_effect_predictor/Bio/DB/DBFetch.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # $Id: DBFetch.pm,v 1.8 2002/12/22 22:02:13 lstein Exp $
3 #
4 # BioPerl module for Bio::DB::DBFetch
5 #
6 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
7 #
8 # Copyright Heikki Lehvaslaiho
9 #
10 # You may distribute this module under the same terms as perl itself
11
12 # POD documentation - main docs before the code
13
14 =head1 NAME
15
16 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
17
18 =head1 SYNOPSIS
19
20 #do not use this module directly
21
22 =head1 DESCRIPTION
23
24 Allows the dynamic retrieval of entries from databases using the
25 dbfetch script at EBI:
26 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
27
28 In order to make changes transparent we have host type (currently only
29 ebi) and location (defaults to ebi) separated out. This allows later
30 additions of more servers in different geographical locations.
31
32 This is a superclass which is called by instantiable subclasses with
33 correct parameters.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
42
43 bioperl-l@bioperl.org - General discussion
44 http://bio.perl.org/MailList.html - About the mailing lists
45
46 =head2 Reporting Bugs
47
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via email
50 or the web:
51
52 bioperl-bugs@bio.perl.org
53 http://bugzilla.bioperl.org/
54
55 =head1 AUTHOR - Heikki Lehvaslaiho
56
57 Email Heikki Lehvaslaiho E<lt>Heikki@ebi.ac.ukE<gt>
58
59 =head1 APPENDIX
60
61 The rest of the documentation details each of the object
62 methods. Internal methods are usually preceded with a _
63
64 =cut
65
66 # Let the code begin...
67
68 package Bio::DB::DBFetch;
69 use strict;
70 use vars qw(@ISA $MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
71 $DEFAULTSERVERTYPE);
72
73 $MODVERSION = '0.1';
74 use HTTP::Request::Common;
75 use Bio::DB::WebDBSeqI;
76
77 @ISA = qw(Bio::DB::WebDBSeqI);
78
79 # the new way to make modules a little more lightweight
80
81 BEGIN {
82 # global vars
83 $DEFAULTSERVERTYPE = 'dbfetch';
84 $DEFAULTLOCATION = 'ebi';
85 }
86
87
88 =head1 Routines from Bio::DB::WebDBSeqI
89
90 =head2 get_request
91
92 Title : get_request
93 Usage : my $url = $self->get_request
94 Function: returns a HTTP::Request object
95 Returns :
96 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
97
98 =cut
99
100 sub get_request {
101 my ($self, @qualifiers) = @_;
102 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
103 @qualifiers);
104
105 $self->throw("Must specify a value for UIDs to fetch")
106 unless defined $uids;
107 my $tmp;
108 my $format_string = '';
109 $format ||= $self->default_format;
110 ($format, $tmp) = $self->request_format($format);
111 $format_string = "&format=$format" if $format ne $self->default_format;
112 my $url = $self->location_url();
113 my $uid;
114 if( ref($uids) =~ /ARRAY/i ) {
115 $uid = join (',', @$uids);
116 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
117 if scalar @$uids >50;
118 } else {
119 $uid = $uids;
120 }
121
122 return GET $url. $format_string. '&id='. $uid;
123 }
124
125
126 =head2 postprocess_data
127
128 Title : postprocess_data
129 Usage : $self->postprocess_data ( 'type' => 'string',
130 'location' => \$datastr);
131 Function: process downloaded data before loading into a Bio::SeqIO
132 Returns : void
133 Args : hash with two keys - 'type' can be 'string' or 'file'
134 - 'location' either file location or string
135 reference containing data
136
137 =cut
138
139 # remove occasional blank lines at top of web output
140 sub postprocess_data {
141 my ($self, %args) = @_;
142 if ($args{type} eq 'string') {
143 ${$args{location}} =~ s/^\s+//; # get rid of leading whitespace
144 }
145 elsif ($args{type} eq 'file') {
146 open F,$args{location} or $self->throw("Cannot open $args{location}: $!");
147 my @data = <F>;
148 for (@data) {
149 last unless /^\s+$/;
150 shift @data;
151 }
152 open F,">$args{location}" or $self->throw("Cannot write to $args{location}: $!");
153 print F @data;
154 close F;
155 }
156 }
157
158 =head2 default_format
159
160 Title : default_format
161 Usage : my $format = $self->default_format
162 Function: Returns default sequence format for this module
163 Returns : string
164 Args : none
165
166 =cut
167
168 sub default_format {
169 my ($self) = @_;
170 return $self->{'_default_format'};
171 }
172
173 =head1 Bio::DB::DBFetch specific routines
174
175 =head2 get_Stream_by_id
176
177 Title : get_Stream_by_id
178 Usage : $seq = $db->get_Stream_by_id($ref);
179 Function: Retrieves Seq objects from the server 'en masse', rather than one
180 at a time. For large numbers of sequences, this is far superior
181 than get_Stream_by_[id/acc]().
182 Example :
183 Returns : a Bio::SeqIO stream object
184 Args : $ref : either an array reference, a filename, or a filehandle
185 from which to get the list of unique ids/accession numbers.
186
187 NOTE: for backward compatibility, this method is also called
188 get_Stream_by_batch.
189
190 =cut
191
192 sub get_Stream_by_id {
193 my ($self, $ids) = @_;
194 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
195 }
196
197 =head2 get_Seq_by_version
198
199 Title : get_Seq_by_version
200 Usage : $seq = $db->get_Seq_by_version('X77802.1');
201 Function: Gets a Bio::Seq object by accession number
202 Returns : A Bio::Seq object
203 Args : version number (as a string)
204 Throws : "version does not exist" exception
205
206 =cut
207
208 sub get_Seq_by_version {
209 my ($self,$seqid) = @_;
210 my $seqio = $self->get_Stream_by_acc([$seqid]);
211 $self->throw("version does not exist") if( !defined $seqio );
212 return $seqio->next_seq();
213 }
214
215 =head2 request_format
216
217 Title : request_format
218 Usage : my ($req_format, $ioformat) = $self->request_format;
219 $self->request_format("genbank");
220 $self->request_format("fasta");
221 Function: Get/Set sequence format retrieval. The get-form will normally not
222 be used outside of this and derived modules.
223 Returns : Array of two strings, the first representing the format for
224 retrieval, and the second specifying the corresponding SeqIO format.
225 Args : $format = sequence format
226
227 =cut
228
229 sub request_format {
230 my ($self, $value) = @_;
231 if( defined $value ) {
232 $value = lc $value;
233 $self->{'_format'} = $value;
234 return ($value, $value);
235 }
236 $value = $self->{'_format'};
237 if( $value and defined $self->formatmap->{$value} ) {
238 return ($value, $self->formatmap->{$value});
239 } else {
240 # Try to fall back to a default.
241 return ($self->default_format, $self->default_format );
242 }
243 }
244
245
246 =head2 servertype
247
248 Title : servertype
249 Usage : my $servertype = $self->servertype
250 $self->servertype($servertype);
251 Function: Get/Set server type
252 Returns : string
253 Args : server type string [optional]
254
255 =cut
256
257 sub servertype {
258 my ($self, $servertype) = @_;
259 if( defined $servertype && $servertype ne '') {
260 $self->throw("You gave an invalid server type ($servertype)".
261 " - available types are ".
262 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
263 $self->{'_servertype'} = $servertype;
264 }
265 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
266 return $self->{'_servertype'};
267 }
268
269 =head2 hostlocation
270
271 Title : hostlocation
272 Usage : my $location = $self->hostlocation()
273 $self->hostlocation($location)
274 Function: Set/Get Hostlocation
275 Returns : string representing hostlocation
276 Args : string specifying hostlocation [optional]
277
278 =cut
279
280 sub hostlocation {
281 my ($self, $location ) = @_;
282 $location = lc $location;
283 my $servertype = $self->servertype;
284 $self->throw("Must have a valid servertype defined not $servertype")
285 unless defined $servertype;
286 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
287 if( defined $location && $location ne '' ) {
288 if( ! $hosts{$location} ) {
289 $self->throw("Must specify a known host, not $location,".
290 " possible values (".
291 join(",", sort keys %hosts ). ")");
292 }
293 $self->{'_hostlocation'} = $location;
294 }
295 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
296 return $self->{'_hostlocation'};
297 }
298
299 =head2 location_url
300
301 Title : location
302 Usage : my $url = $self->location_url()
303 Function: Get host url
304 Returns : string representing url
305 Args : none
306
307 =cut
308
309 sub location_url {
310 my ($self) = @_;
311 my $servertype = $self->servertype();
312 my $location = $self->hostlocation();
313 if( ! defined $location || !defined $servertype ) {
314 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
315 }
316 return sprintf($self->hosts->{$servertype}->{'baseurl'},
317 $self->hosts->{$servertype}->{'hosts'}->{$location});
318 }
319
320 =head1 Bio::DB::DBFetch routines
321
322 These methods allow subclasses to pass parameters.
323
324 =head2 hosts
325
326 Title : hosts
327 Usage :
328 Function: get/set for host hash
329 Returns :
330 Args : optional hash
331
332 =cut
333
334 sub hosts {
335 my ($self, $value) = @_;
336 if (defined $value) {
337 $self->{'_hosts'} = $value;
338 }
339 unless (exists $self->{'_hosts'}) {
340 return ('');
341 } else {
342 return $self->{'_hosts'};
343 }
344 }
345
346 =head2 formatmap
347
348 Title : formatmap
349 Usage :
350 Function: get/set for format hash
351 Returns :
352 Args : optional hash
353
354 =cut
355
356 sub formatmap {
357 my ($self, $value) = @_;
358 if (defined $value) {
359 $self->{'_formatmap'} = $value;
360 }
361 unless (exists $self->{'_formatmap'}) {
362 return ('');
363 } else {
364 return $self->{'_formatmap'};
365 }
366 }
367
368
369 1;
370 __END__