comparison variant_effect_predictor/Bio/DB/Biblio/biofetch.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $
2 #
3 # BioPerl module Bio::DB::Biblio::biofetch.pm
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 # For copyright and disclaimer see below.
7
8 # POD documentation - main docs before the code
9
10 =head1 NAME
11
12 Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic
13 citation retrieval
14
15 =head1 SYNOPSIS
16
17 Do not use this object directly, only access it through the
18 I<Bio::Biblio> module:
19
20 use Bio::Biblio;
21 my $biblio = new Bio::Biblio (-access => 'biofetch');
22 my $ref = $biblio->get_by_id('20063307'));
23
24 my $ids = ['20063307', '98276153'];
25 my $refio = $biblio->get_all($ids);
26 while ($ref = $refio->next_bibref) {
27 print $ref->identifier, "\n";
28 }
29
30 =head1 DESCRIPTION
31
32 This class uses BioFetch protocol based service to retrieve Medline
33 references by their ID.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
42
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/MailList.shtml - About the mailing lists
45
46 =head2 Reporting Bugs
47
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 of the bugs and their resolution. Bug reports can be submitted via
50 email or the web:
51
52 bioperl-bugs@bioperl.org
53 http://bugzilla.bioperl.org/
54
55 =head1 AUTHOR
56
57 Heikki Lehvaslaiho (heikki@ebi.ac.uk)
58
59 =head1 COPYRIGHT
60
61 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
62
63 This module is free software; you can redistribute it and/or modify
64 it under the same terms as Perl itself.
65
66 =head1 DISCLAIMER
67
68 This software is provided "as is" without warranty of any kind.
69
70 =head1 BUGS AND LIMITATIONS
71
72 =over 1
73
74 =item *
75
76 Only method get_by_id() is supported.
77
78 =back
79
80 =head1 APPENDIX
81
82 The main documentation details are to be found in
83 L<Bio::DB::BiblioI>.
84
85 Here is the rest of the object methods. Internal methods are preceded
86 with an underscore _.
87
88 =cut
89
90
91 # Let the code begin...
92
93
94 package Bio::DB::Biblio::biofetch;
95 use vars qw(@ISA $VERSION %HOSTS %FORMATMAP $DEFAULTFORMAT
96 $Revision $DEFAULT_SERVICE $DEFAULT_NAMESPACE);
97 use strict;
98
99 use Bio::Biblio;
100 use Bio::DB::DBFetch;
101 use Bio::Biblio::IO;
102
103 @ISA = qw( Bio::DB::DBFetch Bio::Biblio);
104
105 BEGIN {
106
107 # set the version for version checking
108 $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d.%-02d", @r };
109 $Revision = q$Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $;
110
111 # you can add your own here theoretically.
112 %HOSTS = (
113 'dbfetch' => {
114 baseurl => 'http://%s/cgi-bin/dbfetch?db=medline&style=raw',
115 hosts => {
116 'ebi' => 'www.ebi.ac.uk'
117 }
118 }
119 );
120 %FORMATMAP = ( 'default' => 'medlinexml'
121 );
122 $DEFAULTFORMAT = 'default';
123
124 $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
125
126 }
127
128
129 sub new {
130 my ($class, @args ) = @_;
131 my $self = $class->SUPER::new(@args);
132
133 $self->{ '_hosts' } = {};
134 $self->{ '_formatmap' } = {};
135
136 $self->hosts(\%HOSTS);
137 $self->formatmap(\%FORMATMAP);
138 $self->{'_default_format'} = $DEFAULTFORMAT;
139
140 return $self;
141 }
142
143 =head2 get_by_id
144
145 Title : get_by_id
146 Usage : $entry = $db->get__by_id('20063307')
147 Function: Gets a Bio::Biblio::RefI object by its name
148 Returns : a Bio::Biblio::Medline object
149 Args : the id (as a string) of the reference
150
151 =cut
152
153 sub get_by_id {
154 my ($self,$id) = @_;
155 my $io = $self->get_Stream_by_id([$id]);
156 $self->throw("id does not exist") if( !defined $io ) ;
157 return $io->next_bibref();
158 }
159
160
161 =head2 get_all
162
163 Title : get_all
164 Usage : $seq = $db->get_all($ref);
165 Function: Retrieves reference objects from the server 'en masse',
166 rather than one at a time. For large numbers of sequences,
167 this is far superior than get_by_id().
168 Example :
169 Returns : a stream of Bio::Biblio::Medline objects
170 Args : $ref : either an array reference, a filename, or a filehandle
171 from which to get the list of unique ids/accession numbers.
172
173 =cut
174
175 sub get_all {
176 my ($self, $ids) = @_;
177 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
178 }
179
180 =head2 get_seq_stream
181
182 Title : get_seq_stream
183 Usage : my $seqio = $self->get_seq_sream(%qualifiers)
184 Function: builds a url and queries a web db
185 Returns : a Bio::SeqIO stream capable of producing sequence
186 Args : %qualifiers = a hash qualifiers that the implementing class
187 will process to make a url suitable for web querying
188
189 =cut
190
191 sub get_seq_stream {
192 my ($self, %qualifiers) = @_;
193 my ($rformat, $ioformat) = $self->request_format();
194 my $seen = 0;
195 foreach my $key ( keys %qualifiers ) {
196 if( $key =~ /format/i ) {
197 $rformat = $qualifiers{$key};
198 $seen = 1;
199 }
200 }
201 $qualifiers{'-format'} = $rformat if( !$seen);
202 ($rformat, $ioformat) = $self->request_format($rformat);
203
204 my $request = $self->get_request(%qualifiers);
205 my ($stream,$resp);
206 if( $self->retrieval_type =~ /temp/i ) {
207 my $dir = $self->io()->tempdir( CLEANUP => 1);
208 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
209 close $fh;
210 my ($resp) = $self->_request($request, $tmpfile);
211 if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
212 $self->throw("WebDBSeqI Error - check query sequences!\n");
213 }
214 $self->postprocess_data('type' => 'file',
215 'location' => $tmpfile);
216 # this may get reset when requesting batch mode
217 ($rformat,$ioformat) = $self->request_format();
218 if( $self->verbose > 0 ) {
219 open(ERR, "<$tmpfile");
220 while(<ERR>) { $self->debug($_);}
221 }
222 $stream = new Bio::Biblio::IO('-format' => $ioformat,
223 '-file' => $tmpfile);
224 } elsif( $self->retrieval_type =~ /io_string/i ) {
225 my ($resp) = $self->_request($request);
226 my $content = $resp->content_ref;
227 $self->debug( "content is $$content\n");
228 if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) {
229 $self->throw("WebDBSeqI Error - check query sequences!\n");
230 }
231 ($rformat,$ioformat) = $self->request_format();
232 $self->postprocess_data('type'=> 'string',
233 'location' => $content);
234 $stream = new Bio::Biblio::IO('-format' => $ioformat,
235 # '-data' => "<tag>". $$content. "</tag>");
236 '-data' => $$content
237 );
238 } else {
239 $self->throw("retrieval type " . $self->retrieval_type .
240 " unsupported\n");
241 }
242 return $stream;
243 }
244
245
246 =head2 postprocess_data
247
248 Title : postprocess_data
249 Usage : $self->postprocess_data ( 'type' => 'string',
250 'location' => \$datastr);
251 Function: process downloaded data before loading into a Bio::SeqIO
252 Returns : void
253 Args : hash with two keys - 'type' can be 'string' or 'file'
254 - 'location' either file location or string
255 reference containing data
256
257 =cut
258
259 # the default method, works for genbank/genpept, other classes should
260 # override it with their own method.
261
262 sub postprocess_data {
263 my ($self, %args) = @_;
264 my $data;
265 my $type = uc $args{'type'};
266 my $location = $args{'location'};
267 if( !defined $type || $type eq '' || !defined $location) {
268 return;
269 } elsif( $type eq 'STRING' ) {
270 $data = $$location;
271 } elsif ( $type eq 'FILE' ) {
272 open(TMP, $location) or $self->throw("could not open file $location");
273 my @in = <TMP>;
274 close TMP;
275 $data = join("", @in);
276 }
277
278 $data = "<tag>". $data. "</tag>";
279
280 if( $type eq 'FILE' ) {
281 open(TMP, ">$location") or $self->throw("could overwrite file $location");
282 print TMP $data;
283 close TMP;
284 } elsif ( $type eq 'STRING' ) {
285 ${$args{'location'}} = $data;
286 }
287
288 $self->debug("format is ". $self->request_format(). " data is $data\n");
289
290 }
291
292
293
294
295 =head2 VERSION and Revision
296
297 Usage : print $Bio::DB::Biblio::biofetch::VERSION;
298 print $Bio::DB::Biblio::biofetch::Revision;
299
300 =cut
301
302 =head2 Defaults
303
304 Usage : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE;
305
306 =cut
307
308 1;
309 __END__