comparison variant_effect_predictor/Bio/DB/Query/GenBank.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: GenBank.pm,v 1.4.2.1 2003/09/09 21:28:52 lstein Exp $
2 #
3 # BioPerl module for Bio::DB::Query::GenBank.pm
4 #
5 # Cared for by Lincoln Stein <lstein@cshl.org>
6 #
7 # Copyright Lincoln Stein
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12 #
13
14 =head1 NAME
15
16 Bio::DB::Query::GenBank - Build a GenBank Entrez Query
17
18 =head1 SYNOPSIS
19
20 my $query_string = 'Oryza[Organism] AND EST[Keyword]';
21 my $query = Bio::DB::Query::GenBank->new(-db=>'nucleotide',
22 -query=>$query_string,
23 -mindate => '2001',
24 -maxdate => '2002');
25 my $count = $query->count;
26 my @ids = $query->ids;
27
28 # get a genbank database handle
29 my $gb = new Bio::DB::GenBank;
30 my $stream = $gb->get_Stream_by_query($query);
31 while (my $seq = $stream->next_seq) {
32 ...
33 }
34
35 # initialize the list yourself
36 my $query = Bio::DB::Query::GenBank->new(-ids=>[195052,2981014,11127914]);
37
38
39 =head1 DESCRIPTION
40
41 This class encapsulates NCBI Entrez queries. It can be used to store
42 a list of GI numbers, to translate an Entrez query expression into a
43 list of GI numbers, or to count the number of terms that would be
44 returned by a query. Once created, the query object can be passed to
45 a Bio::DB::GenBank object in order to retrieve the entries
46 corresponding to the query.
47
48 =head1 FEEDBACK
49
50 =head2 Mailing Lists
51
52 User feedback is an integral part of the
53 evolution of this and other Bioperl modules. Send
54 your comments and suggestions preferably to one
55 of the Bioperl mailing lists. Your participation
56 is much appreciated.
57
58 bioperl-l@bioperl.org - General discussion
59 http://bioperl.org/MailList.shtml - About the mailing lists
60
61 =head2 Reporting Bugs
62
63 Report bugs to the Bioperl bug tracking system to
64 help us keep track the bugs and their resolution.
65 Bug reports can be submitted via email or the
66 web:
67
68 bioperl-bugs@bio.perl.org
69 http://bugzilla.bioperl.org/
70
71 =head1 AUTHOR - Lincoln Stein
72
73 Email lstein@cshl.org
74
75 =head1 APPENDIX
76
77 The rest of the documentation details each of the
78 object methods. Internal methods are usually
79 preceded with a _
80
81 =cut
82
83 # Let the code begin...
84
85 package Bio::DB::Query::GenBank;
86 use strict;
87 use Bio::DB::Query::WebQuery;
88 use URI::Escape 'uri_unescape';
89
90 use constant EPOST => 'http://www.ncbi.nih.gov/entrez/eutils/epost.fcgi';
91 use constant ESEARCH => 'http://www.ncbi.nih.gov/entrez/eutils/esearch.fcgi';
92 use constant DEFAULT_DB => 'protein';
93 use constant MAXENTRY => 100;
94
95 use vars qw(@ISA @ATTRIBUTES $VERSION);
96
97 @ISA = 'Bio::DB::Query::WebQuery';
98 $VERSION = '0.2';
99
100 BEGIN {
101 @ATTRIBUTES = qw(db reldate mindate maxdate datetype);
102 for my $method (@ATTRIBUTES) {
103 eval <<END;
104 sub $method {
105 my \$self = shift;
106 my \$d = \$self->{'_$method'};
107 \$self->{'_$method'} = shift if \@_;
108 \$d;
109 }
110 END
111 }
112 }
113
114 =head2 new
115
116 Title : new
117 Usage : $db = Bio::DB::Query::GenBank->new(@args)
118 Function: create new query object
119 Returns : new query object
120 Args : -db database ('protein' or 'nucleotide')
121 -query query string
122 -mindate minimum date to retrieve from
123 -maxdate maximum date to retrieve from
124 -reldate relative date to retrieve from (days)
125 -datetype date field to use ('edat' or 'mdat')
126 -ids array ref of gids (overrides query)
127
128 This method creates a new query object. Typically you will specify a
129 -db and a -query argument, possibly modified by -mindate, -maxdate, or
130 -reldate. -mindate and -maxdate specify minimum and maximum dates for
131 entries you are interested in retrieving, expressed in the form
132 DD/MM/YYYY. -reldate is used to fetch entries that are more recent
133 than the indicated number of days.
134
135 If you provide an array reference of IDs in -ids, the query will be
136 ignored and the list of IDs will be used when the query is passed to a
137 Bio::DB::GenBank object's get_Stream_by_query() method. A variety of
138 IDs are automatically recognized, including GI numbers, Accession
139 numbers, Accession.version numbers and locus names.
140
141 =cut
142
143 sub new {
144 my $class = shift;
145 my $self = $class->SUPER::new(@_);
146 my ($db,$reldate,$mindate,$maxdate,$datetype,$ids)
147 = $self->_rearrange([qw(DB RELDATE MINDATE MAXDATE DATETYPE IDS)],@_);
148 $self->db($db || DEFAULT_DB);
149 $reldate && $self->reldate($reldate);
150 $mindate && $self->mindate($mindate);
151 $maxdate && $self->maxdate($maxdate);
152 $datetype ||= 'mdat';
153 $datetype && $self->datetype($datetype);
154 $self;
155 }
156
157 =head2 cookie
158
159 Title : cookie
160 Usage : ($cookie,$querynum) = $db->cookie
161 Function: return the NCBI query cookie
162 Returns : list of (cookie,querynum)
163 Args : none
164
165 NOTE: this information is used by Bio::DB::GenBank in
166 conjunction with efetch.
167
168 =cut
169
170 sub cookie {
171 my $self = shift;
172 if (@_) {
173 $self->{'_cookie'} = shift;
174 $self->{'_querynum'} = shift;
175 }
176
177 else {
178 $self->_run_query;
179 @{$self}{qw(_cookie _querynum)};
180 }
181 }
182
183 =head2 _request_parameters
184
185 Title : _request_parameters
186 Usage : ($method,$base,@params = $db->_request_parameters
187 Function: return information needed to construct the request
188 Returns : list of method, url base and key=>value pairs
189 Args : none
190
191 =cut
192
193 sub _request_parameters {
194 my $self = shift;
195 my ($method,$base);
196 my @params = map {eval("\$self->$_") ? ($_ => eval("\$self->$_")) : () } @ATTRIBUTES;
197 push @params,('usehistory'=>'y','tool'=>'bioperl');
198 $method = 'get';
199 $base = ESEARCH;
200 push @params,('term' => $self->query);
201 push @params,('retmax' => $self->{'_count'} || MAXENTRY);
202 ($method,$base,@params);
203 }
204
205
206 =head2 count
207
208 Title : count
209 Usage : $count = $db->count;
210 Function: return count of number of entries retrieved by query
211 Returns : integer
212 Args : none
213
214 Returns the number of entries that are matched by the query.
215
216 =cut
217
218 sub count {
219 my $self = shift;
220 if (@_) {
221 my $d = $self->{'_count'};
222 $self->{'_count'} = shift;
223 return $d;
224 }
225 else {
226 $self->_run_query;
227 return $self->{'_count'};
228 }
229 }
230
231 =head2 ids
232
233 Title : ids
234 Usage : @ids = $db->ids([@ids])
235 Function: get/set matching ids
236 Returns : array of sequence ids
237 Args : (optional) array ref with new set of ids
238
239 =cut
240
241 =head2 query
242
243 Title : query
244 Usage : $query = $db->query([$query])
245 Function: get/set query string
246 Returns : string
247 Args : (optional) new query string
248
249 =cut
250
251 =head2 _parse_response
252
253 Title : _parse_response
254 Usage : $db->_parse_response($content)
255 Function: parse out response
256 Returns : empty
257 Args : none
258 Throws : 'unparseable output exception'
259
260 =cut
261
262 sub _parse_response {
263 my $self = shift;
264 my $content = shift;
265 if (my ($warning) = $content =~ m!<ErrorList>(.+)</ErrorList>!s) {
266 warn "Warning(s) from GenBank: $warning\n";
267 }
268 if (my ($error) = $content =~ /<OutputMessage>([^<]+)/) {
269 $self->throw("Error from Genbank: $error");
270 }
271
272 my ($count) = $content =~ /<Count>(\d+)/;
273 my ($max) = $content =~ /<RetMax>(\d+)/;
274 my $truncated = $count > $max;
275 $self->count($count);
276 if (!$truncated) {
277 my @ids = $content =~ /<Id>(\d+)/g;
278 $self->ids(\@ids);
279 }
280 $self->_truncated($truncated);
281 my ($cookie) = $content =~ m!<WebEnv>(\S+)</WebEnv>!;
282 my ($querykey) = $content =~ m!<QueryKey>(\d+)!;
283 $self->cookie(uri_unescape($cookie),$querykey);
284 }
285
286 1;