0
|
1 # $Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module Bio::DB::Biblio::biofetch.pm
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 # For copyright and disclaimer see below.
|
|
7
|
|
8 # POD documentation - main docs before the code
|
|
9
|
|
10 =head1 NAME
|
|
11
|
|
12 Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic
|
|
13 citation retrieval
|
|
14
|
|
15 =head1 SYNOPSIS
|
|
16
|
|
17 Do not use this object directly, only access it through the
|
|
18 I<Bio::Biblio> module:
|
|
19
|
|
20 use Bio::Biblio;
|
|
21 my $biblio = new Bio::Biblio (-access => 'biofetch');
|
|
22 my $ref = $biblio->get_by_id('20063307'));
|
|
23
|
|
24 my $ids = ['20063307', '98276153'];
|
|
25 my $refio = $biblio->get_all($ids);
|
|
26 while ($ref = $refio->next_bibref) {
|
|
27 print $ref->identifier, "\n";
|
|
28 }
|
|
29
|
|
30 =head1 DESCRIPTION
|
|
31
|
|
32 This class uses BioFetch protocol based service to retrieve Medline
|
|
33 references by their ID.
|
|
34
|
|
35 =head1 FEEDBACK
|
|
36
|
|
37 =head2 Mailing Lists
|
|
38
|
|
39 User feedback is an integral part of the evolution of this and other
|
|
40 Bioperl modules. Send your comments and suggestions preferably to
|
|
41 the Bioperl mailing list. Your participation is much appreciated.
|
|
42
|
|
43 bioperl-l@bioperl.org - General discussion
|
|
44 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
45
|
|
46 =head2 Reporting Bugs
|
|
47
|
|
48 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
49 of the bugs and their resolution. Bug reports can be submitted via
|
|
50 email or the web:
|
|
51
|
|
52 bioperl-bugs@bioperl.org
|
|
53 http://bugzilla.bioperl.org/
|
|
54
|
|
55 =head1 AUTHOR
|
|
56
|
|
57 Heikki Lehvaslaiho (heikki@ebi.ac.uk)
|
|
58
|
|
59 =head1 COPYRIGHT
|
|
60
|
|
61 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
|
|
62
|
|
63 This module is free software; you can redistribute it and/or modify
|
|
64 it under the same terms as Perl itself.
|
|
65
|
|
66 =head1 DISCLAIMER
|
|
67
|
|
68 This software is provided "as is" without warranty of any kind.
|
|
69
|
|
70 =head1 BUGS AND LIMITATIONS
|
|
71
|
|
72 =over 1
|
|
73
|
|
74 =item *
|
|
75
|
|
76 Only method get_by_id() is supported.
|
|
77
|
|
78 =back
|
|
79
|
|
80 =head1 APPENDIX
|
|
81
|
|
82 The main documentation details are to be found in
|
|
83 L<Bio::DB::BiblioI>.
|
|
84
|
|
85 Here is the rest of the object methods. Internal methods are preceded
|
|
86 with an underscore _.
|
|
87
|
|
88 =cut
|
|
89
|
|
90
|
|
91 # Let the code begin...
|
|
92
|
|
93
|
|
94 package Bio::DB::Biblio::biofetch;
|
|
95 use vars qw(@ISA $VERSION %HOSTS %FORMATMAP $DEFAULTFORMAT
|
|
96 $Revision $DEFAULT_SERVICE $DEFAULT_NAMESPACE);
|
|
97 use strict;
|
|
98
|
|
99 use Bio::Biblio;
|
|
100 use Bio::DB::DBFetch;
|
|
101 use Bio::Biblio::IO;
|
|
102
|
|
103 @ISA = qw( Bio::DB::DBFetch Bio::Biblio);
|
|
104
|
|
105 BEGIN {
|
|
106
|
|
107 # set the version for version checking
|
|
108 $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d.%-02d", @r };
|
|
109 $Revision = q$Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $;
|
|
110
|
|
111 # you can add your own here theoretically.
|
|
112 %HOSTS = (
|
|
113 'dbfetch' => {
|
|
114 baseurl => 'http://%s/cgi-bin/dbfetch?db=medline&style=raw',
|
|
115 hosts => {
|
|
116 'ebi' => 'www.ebi.ac.uk'
|
|
117 }
|
|
118 }
|
|
119 );
|
|
120 %FORMATMAP = ( 'default' => 'medlinexml'
|
|
121 );
|
|
122 $DEFAULTFORMAT = 'default';
|
|
123
|
|
124 $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
|
|
125
|
|
126 }
|
|
127
|
|
128
|
|
129 sub new {
|
|
130 my ($class, @args ) = @_;
|
|
131 my $self = $class->SUPER::new(@args);
|
|
132
|
|
133 $self->{ '_hosts' } = {};
|
|
134 $self->{ '_formatmap' } = {};
|
|
135
|
|
136 $self->hosts(\%HOSTS);
|
|
137 $self->formatmap(\%FORMATMAP);
|
|
138 $self->{'_default_format'} = $DEFAULTFORMAT;
|
|
139
|
|
140 return $self;
|
|
141 }
|
|
142
|
|
143 =head2 get_by_id
|
|
144
|
|
145 Title : get_by_id
|
|
146 Usage : $entry = $db->get__by_id('20063307')
|
|
147 Function: Gets a Bio::Biblio::RefI object by its name
|
|
148 Returns : a Bio::Biblio::Medline object
|
|
149 Args : the id (as a string) of the reference
|
|
150
|
|
151 =cut
|
|
152
|
|
153 sub get_by_id {
|
|
154 my ($self,$id) = @_;
|
|
155 my $io = $self->get_Stream_by_id([$id]);
|
|
156 $self->throw("id does not exist") if( !defined $io ) ;
|
|
157 return $io->next_bibref();
|
|
158 }
|
|
159
|
|
160
|
|
161 =head2 get_all
|
|
162
|
|
163 Title : get_all
|
|
164 Usage : $seq = $db->get_all($ref);
|
|
165 Function: Retrieves reference objects from the server 'en masse',
|
|
166 rather than one at a time. For large numbers of sequences,
|
|
167 this is far superior than get_by_id().
|
|
168 Example :
|
|
169 Returns : a stream of Bio::Biblio::Medline objects
|
|
170 Args : $ref : either an array reference, a filename, or a filehandle
|
|
171 from which to get the list of unique ids/accession numbers.
|
|
172
|
|
173 =cut
|
|
174
|
|
175 sub get_all {
|
|
176 my ($self, $ids) = @_;
|
|
177 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
|
|
178 }
|
|
179
|
|
180 =head2 get_seq_stream
|
|
181
|
|
182 Title : get_seq_stream
|
|
183 Usage : my $seqio = $self->get_seq_sream(%qualifiers)
|
|
184 Function: builds a url and queries a web db
|
|
185 Returns : a Bio::SeqIO stream capable of producing sequence
|
|
186 Args : %qualifiers = a hash qualifiers that the implementing class
|
|
187 will process to make a url suitable for web querying
|
|
188
|
|
189 =cut
|
|
190
|
|
191 sub get_seq_stream {
|
|
192 my ($self, %qualifiers) = @_;
|
|
193 my ($rformat, $ioformat) = $self->request_format();
|
|
194 my $seen = 0;
|
|
195 foreach my $key ( keys %qualifiers ) {
|
|
196 if( $key =~ /format/i ) {
|
|
197 $rformat = $qualifiers{$key};
|
|
198 $seen = 1;
|
|
199 }
|
|
200 }
|
|
201 $qualifiers{'-format'} = $rformat if( !$seen);
|
|
202 ($rformat, $ioformat) = $self->request_format($rformat);
|
|
203
|
|
204 my $request = $self->get_request(%qualifiers);
|
|
205 my ($stream,$resp);
|
|
206 if( $self->retrieval_type =~ /temp/i ) {
|
|
207 my $dir = $self->io()->tempdir( CLEANUP => 1);
|
|
208 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
|
|
209 close $fh;
|
|
210 my ($resp) = $self->_request($request, $tmpfile);
|
|
211 if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
|
|
212 $self->throw("WebDBSeqI Error - check query sequences!\n");
|
|
213 }
|
|
214 $self->postprocess_data('type' => 'file',
|
|
215 'location' => $tmpfile);
|
|
216 # this may get reset when requesting batch mode
|
|
217 ($rformat,$ioformat) = $self->request_format();
|
|
218 if( $self->verbose > 0 ) {
|
|
219 open(ERR, "<$tmpfile");
|
|
220 while(<ERR>) { $self->debug($_);}
|
|
221 }
|
|
222 $stream = new Bio::Biblio::IO('-format' => $ioformat,
|
|
223 '-file' => $tmpfile);
|
|
224 } elsif( $self->retrieval_type =~ /io_string/i ) {
|
|
225 my ($resp) = $self->_request($request);
|
|
226 my $content = $resp->content_ref;
|
|
227 $self->debug( "content is $$content\n");
|
|
228 if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) {
|
|
229 $self->throw("WebDBSeqI Error - check query sequences!\n");
|
|
230 }
|
|
231 ($rformat,$ioformat) = $self->request_format();
|
|
232 $self->postprocess_data('type'=> 'string',
|
|
233 'location' => $content);
|
|
234 $stream = new Bio::Biblio::IO('-format' => $ioformat,
|
|
235 # '-data' => "<tag>". $$content. "</tag>");
|
|
236 '-data' => $$content
|
|
237 );
|
|
238 } else {
|
|
239 $self->throw("retrieval type " . $self->retrieval_type .
|
|
240 " unsupported\n");
|
|
241 }
|
|
242 return $stream;
|
|
243 }
|
|
244
|
|
245
|
|
246 =head2 postprocess_data
|
|
247
|
|
248 Title : postprocess_data
|
|
249 Usage : $self->postprocess_data ( 'type' => 'string',
|
|
250 'location' => \$datastr);
|
|
251 Function: process downloaded data before loading into a Bio::SeqIO
|
|
252 Returns : void
|
|
253 Args : hash with two keys - 'type' can be 'string' or 'file'
|
|
254 - 'location' either file location or string
|
|
255 reference containing data
|
|
256
|
|
257 =cut
|
|
258
|
|
259 # the default method, works for genbank/genpept, other classes should
|
|
260 # override it with their own method.
|
|
261
|
|
262 sub postprocess_data {
|
|
263 my ($self, %args) = @_;
|
|
264 my $data;
|
|
265 my $type = uc $args{'type'};
|
|
266 my $location = $args{'location'};
|
|
267 if( !defined $type || $type eq '' || !defined $location) {
|
|
268 return;
|
|
269 } elsif( $type eq 'STRING' ) {
|
|
270 $data = $$location;
|
|
271 } elsif ( $type eq 'FILE' ) {
|
|
272 open(TMP, $location) or $self->throw("could not open file $location");
|
|
273 my @in = <TMP>;
|
|
274 close TMP;
|
|
275 $data = join("", @in);
|
|
276 }
|
|
277
|
|
278 $data = "<tag>". $data. "</tag>";
|
|
279
|
|
280 if( $type eq 'FILE' ) {
|
|
281 open(TMP, ">$location") or $self->throw("could overwrite file $location");
|
|
282 print TMP $data;
|
|
283 close TMP;
|
|
284 } elsif ( $type eq 'STRING' ) {
|
|
285 ${$args{'location'}} = $data;
|
|
286 }
|
|
287
|
|
288 $self->debug("format is ". $self->request_format(). " data is $data\n");
|
|
289
|
|
290 }
|
|
291
|
|
292
|
|
293
|
|
294
|
|
295 =head2 VERSION and Revision
|
|
296
|
|
297 Usage : print $Bio::DB::Biblio::biofetch::VERSION;
|
|
298 print $Bio::DB::Biblio::biofetch::Revision;
|
|
299
|
|
300 =cut
|
|
301
|
|
302 =head2 Defaults
|
|
303
|
|
304 Usage : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE;
|
|
305
|
|
306 =cut
|
|
307
|
|
308 1;
|
|
309 __END__
|