Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/Biblio/biofetch.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $ | |
2 # | |
3 # BioPerl module Bio::DB::Biblio::biofetch.pm | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # For copyright and disclaimer see below. | |
7 | |
8 # POD documentation - main docs before the code | |
9 | |
10 =head1 NAME | |
11 | |
12 Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic | |
13 citation retrieval | |
14 | |
15 =head1 SYNOPSIS | |
16 | |
17 Do not use this object directly, only access it through the | |
18 I<Bio::Biblio> module: | |
19 | |
20 use Bio::Biblio; | |
21 my $biblio = new Bio::Biblio (-access => 'biofetch'); | |
22 my $ref = $biblio->get_by_id('20063307')); | |
23 | |
24 my $ids = ['20063307', '98276153']; | |
25 my $refio = $biblio->get_all($ids); | |
26 while ($ref = $refio->next_bibref) { | |
27 print $ref->identifier, "\n"; | |
28 } | |
29 | |
30 =head1 DESCRIPTION | |
31 | |
32 This class uses BioFetch protocol based service to retrieve Medline | |
33 references by their ID. | |
34 | |
35 =head1 FEEDBACK | |
36 | |
37 =head2 Mailing Lists | |
38 | |
39 User feedback is an integral part of the evolution of this and other | |
40 Bioperl modules. Send your comments and suggestions preferably to | |
41 the Bioperl mailing list. Your participation is much appreciated. | |
42 | |
43 bioperl-l@bioperl.org - General discussion | |
44 http://bioperl.org/MailList.shtml - About the mailing lists | |
45 | |
46 =head2 Reporting Bugs | |
47 | |
48 Report bugs to the Bioperl bug tracking system to help us keep track | |
49 of the bugs and their resolution. Bug reports can be submitted via | |
50 email or the web: | |
51 | |
52 bioperl-bugs@bioperl.org | |
53 http://bugzilla.bioperl.org/ | |
54 | |
55 =head1 AUTHOR | |
56 | |
57 Heikki Lehvaslaiho (heikki@ebi.ac.uk) | |
58 | |
59 =head1 COPYRIGHT | |
60 | |
61 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved. | |
62 | |
63 This module is free software; you can redistribute it and/or modify | |
64 it under the same terms as Perl itself. | |
65 | |
66 =head1 DISCLAIMER | |
67 | |
68 This software is provided "as is" without warranty of any kind. | |
69 | |
70 =head1 BUGS AND LIMITATIONS | |
71 | |
72 =over 1 | |
73 | |
74 =item * | |
75 | |
76 Only method get_by_id() is supported. | |
77 | |
78 =back | |
79 | |
80 =head1 APPENDIX | |
81 | |
82 The main documentation details are to be found in | |
83 L<Bio::DB::BiblioI>. | |
84 | |
85 Here is the rest of the object methods. Internal methods are preceded | |
86 with an underscore _. | |
87 | |
88 =cut | |
89 | |
90 | |
91 # Let the code begin... | |
92 | |
93 | |
94 package Bio::DB::Biblio::biofetch; | |
95 use vars qw(@ISA $VERSION %HOSTS %FORMATMAP $DEFAULTFORMAT | |
96 $Revision $DEFAULT_SERVICE $DEFAULT_NAMESPACE); | |
97 use strict; | |
98 | |
99 use Bio::Biblio; | |
100 use Bio::DB::DBFetch; | |
101 use Bio::Biblio::IO; | |
102 | |
103 @ISA = qw( Bio::DB::DBFetch Bio::Biblio); | |
104 | |
105 BEGIN { | |
106 | |
107 # set the version for version checking | |
108 $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d.%-02d", @r }; | |
109 $Revision = q$Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $; | |
110 | |
111 # you can add your own here theoretically. | |
112 %HOSTS = ( | |
113 'dbfetch' => { | |
114 baseurl => 'http://%s/cgi-bin/dbfetch?db=medline&style=raw', | |
115 hosts => { | |
116 'ebi' => 'www.ebi.ac.uk' | |
117 } | |
118 } | |
119 ); | |
120 %FORMATMAP = ( 'default' => 'medlinexml' | |
121 ); | |
122 $DEFAULTFORMAT = 'default'; | |
123 | |
124 $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch'; | |
125 | |
126 } | |
127 | |
128 | |
129 sub new { | |
130 my ($class, @args ) = @_; | |
131 my $self = $class->SUPER::new(@args); | |
132 | |
133 $self->{ '_hosts' } = {}; | |
134 $self->{ '_formatmap' } = {}; | |
135 | |
136 $self->hosts(\%HOSTS); | |
137 $self->formatmap(\%FORMATMAP); | |
138 $self->{'_default_format'} = $DEFAULTFORMAT; | |
139 | |
140 return $self; | |
141 } | |
142 | |
143 =head2 get_by_id | |
144 | |
145 Title : get_by_id | |
146 Usage : $entry = $db->get__by_id('20063307') | |
147 Function: Gets a Bio::Biblio::RefI object by its name | |
148 Returns : a Bio::Biblio::Medline object | |
149 Args : the id (as a string) of the reference | |
150 | |
151 =cut | |
152 | |
153 sub get_by_id { | |
154 my ($self,$id) = @_; | |
155 my $io = $self->get_Stream_by_id([$id]); | |
156 $self->throw("id does not exist") if( !defined $io ) ; | |
157 return $io->next_bibref(); | |
158 } | |
159 | |
160 | |
161 =head2 get_all | |
162 | |
163 Title : get_all | |
164 Usage : $seq = $db->get_all($ref); | |
165 Function: Retrieves reference objects from the server 'en masse', | |
166 rather than one at a time. For large numbers of sequences, | |
167 this is far superior than get_by_id(). | |
168 Example : | |
169 Returns : a stream of Bio::Biblio::Medline objects | |
170 Args : $ref : either an array reference, a filename, or a filehandle | |
171 from which to get the list of unique ids/accession numbers. | |
172 | |
173 =cut | |
174 | |
175 sub get_all { | |
176 my ($self, $ids) = @_; | |
177 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single'); | |
178 } | |
179 | |
180 =head2 get_seq_stream | |
181 | |
182 Title : get_seq_stream | |
183 Usage : my $seqio = $self->get_seq_sream(%qualifiers) | |
184 Function: builds a url and queries a web db | |
185 Returns : a Bio::SeqIO stream capable of producing sequence | |
186 Args : %qualifiers = a hash qualifiers that the implementing class | |
187 will process to make a url suitable for web querying | |
188 | |
189 =cut | |
190 | |
191 sub get_seq_stream { | |
192 my ($self, %qualifiers) = @_; | |
193 my ($rformat, $ioformat) = $self->request_format(); | |
194 my $seen = 0; | |
195 foreach my $key ( keys %qualifiers ) { | |
196 if( $key =~ /format/i ) { | |
197 $rformat = $qualifiers{$key}; | |
198 $seen = 1; | |
199 } | |
200 } | |
201 $qualifiers{'-format'} = $rformat if( !$seen); | |
202 ($rformat, $ioformat) = $self->request_format($rformat); | |
203 | |
204 my $request = $self->get_request(%qualifiers); | |
205 my ($stream,$resp); | |
206 if( $self->retrieval_type =~ /temp/i ) { | |
207 my $dir = $self->io()->tempdir( CLEANUP => 1); | |
208 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir ); | |
209 close $fh; | |
210 my ($resp) = $self->_request($request, $tmpfile); | |
211 if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) { | |
212 $self->throw("WebDBSeqI Error - check query sequences!\n"); | |
213 } | |
214 $self->postprocess_data('type' => 'file', | |
215 'location' => $tmpfile); | |
216 # this may get reset when requesting batch mode | |
217 ($rformat,$ioformat) = $self->request_format(); | |
218 if( $self->verbose > 0 ) { | |
219 open(ERR, "<$tmpfile"); | |
220 while(<ERR>) { $self->debug($_);} | |
221 } | |
222 $stream = new Bio::Biblio::IO('-format' => $ioformat, | |
223 '-file' => $tmpfile); | |
224 } elsif( $self->retrieval_type =~ /io_string/i ) { | |
225 my ($resp) = $self->_request($request); | |
226 my $content = $resp->content_ref; | |
227 $self->debug( "content is $$content\n"); | |
228 if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) { | |
229 $self->throw("WebDBSeqI Error - check query sequences!\n"); | |
230 } | |
231 ($rformat,$ioformat) = $self->request_format(); | |
232 $self->postprocess_data('type'=> 'string', | |
233 'location' => $content); | |
234 $stream = new Bio::Biblio::IO('-format' => $ioformat, | |
235 # '-data' => "<tag>". $$content. "</tag>"); | |
236 '-data' => $$content | |
237 ); | |
238 } else { | |
239 $self->throw("retrieval type " . $self->retrieval_type . | |
240 " unsupported\n"); | |
241 } | |
242 return $stream; | |
243 } | |
244 | |
245 | |
246 =head2 postprocess_data | |
247 | |
248 Title : postprocess_data | |
249 Usage : $self->postprocess_data ( 'type' => 'string', | |
250 'location' => \$datastr); | |
251 Function: process downloaded data before loading into a Bio::SeqIO | |
252 Returns : void | |
253 Args : hash with two keys - 'type' can be 'string' or 'file' | |
254 - 'location' either file location or string | |
255 reference containing data | |
256 | |
257 =cut | |
258 | |
259 # the default method, works for genbank/genpept, other classes should | |
260 # override it with their own method. | |
261 | |
262 sub postprocess_data { | |
263 my ($self, %args) = @_; | |
264 my $data; | |
265 my $type = uc $args{'type'}; | |
266 my $location = $args{'location'}; | |
267 if( !defined $type || $type eq '' || !defined $location) { | |
268 return; | |
269 } elsif( $type eq 'STRING' ) { | |
270 $data = $$location; | |
271 } elsif ( $type eq 'FILE' ) { | |
272 open(TMP, $location) or $self->throw("could not open file $location"); | |
273 my @in = <TMP>; | |
274 close TMP; | |
275 $data = join("", @in); | |
276 } | |
277 | |
278 $data = "<tag>". $data. "</tag>"; | |
279 | |
280 if( $type eq 'FILE' ) { | |
281 open(TMP, ">$location") or $self->throw("could overwrite file $location"); | |
282 print TMP $data; | |
283 close TMP; | |
284 } elsif ( $type eq 'STRING' ) { | |
285 ${$args{'location'}} = $data; | |
286 } | |
287 | |
288 $self->debug("format is ". $self->request_format(). " data is $data\n"); | |
289 | |
290 } | |
291 | |
292 | |
293 | |
294 | |
295 =head2 VERSION and Revision | |
296 | |
297 Usage : print $Bio::DB::Biblio::biofetch::VERSION; | |
298 print $Bio::DB::Biblio::biofetch::Revision; | |
299 | |
300 =cut | |
301 | |
302 =head2 Defaults | |
303 | |
304 Usage : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE; | |
305 | |
306 =cut | |
307 | |
308 1; | |
309 __END__ |