Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/Query/GenBank.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: GenBank.pm,v 1.4.2.1 2003/09/09 21:28:52 lstein Exp $ | |
2 # | |
3 # BioPerl module for Bio::DB::Query::GenBank.pm | |
4 # | |
5 # Cared for by Lincoln Stein <lstein@cshl.org> | |
6 # | |
7 # Copyright Lincoln Stein | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 # | |
13 | |
14 =head1 NAME | |
15 | |
16 Bio::DB::Query::GenBank - Build a GenBank Entrez Query | |
17 | |
18 =head1 SYNOPSIS | |
19 | |
20 my $query_string = 'Oryza[Organism] AND EST[Keyword]'; | |
21 my $query = Bio::DB::Query::GenBank->new(-db=>'nucleotide', | |
22 -query=>$query_string, | |
23 -mindate => '2001', | |
24 -maxdate => '2002'); | |
25 my $count = $query->count; | |
26 my @ids = $query->ids; | |
27 | |
28 # get a genbank database handle | |
29 my $gb = new Bio::DB::GenBank; | |
30 my $stream = $gb->get_Stream_by_query($query); | |
31 while (my $seq = $stream->next_seq) { | |
32 ... | |
33 } | |
34 | |
35 # initialize the list yourself | |
36 my $query = Bio::DB::Query::GenBank->new(-ids=>[195052,2981014,11127914]); | |
37 | |
38 | |
39 =head1 DESCRIPTION | |
40 | |
41 This class encapsulates NCBI Entrez queries. It can be used to store | |
42 a list of GI numbers, to translate an Entrez query expression into a | |
43 list of GI numbers, or to count the number of terms that would be | |
44 returned by a query. Once created, the query object can be passed to | |
45 a Bio::DB::GenBank object in order to retrieve the entries | |
46 corresponding to the query. | |
47 | |
48 =head1 FEEDBACK | |
49 | |
50 =head2 Mailing Lists | |
51 | |
52 User feedback is an integral part of the | |
53 evolution of this and other Bioperl modules. Send | |
54 your comments and suggestions preferably to one | |
55 of the Bioperl mailing lists. Your participation | |
56 is much appreciated. | |
57 | |
58 bioperl-l@bioperl.org - General discussion | |
59 http://bioperl.org/MailList.shtml - About the mailing lists | |
60 | |
61 =head2 Reporting Bugs | |
62 | |
63 Report bugs to the Bioperl bug tracking system to | |
64 help us keep track the bugs and their resolution. | |
65 Bug reports can be submitted via email or the | |
66 web: | |
67 | |
68 bioperl-bugs@bio.perl.org | |
69 http://bugzilla.bioperl.org/ | |
70 | |
71 =head1 AUTHOR - Lincoln Stein | |
72 | |
73 Email lstein@cshl.org | |
74 | |
75 =head1 APPENDIX | |
76 | |
77 The rest of the documentation details each of the | |
78 object methods. Internal methods are usually | |
79 preceded with a _ | |
80 | |
81 =cut | |
82 | |
83 # Let the code begin... | |
84 | |
85 package Bio::DB::Query::GenBank; | |
86 use strict; | |
87 use Bio::DB::Query::WebQuery; | |
88 use URI::Escape 'uri_unescape'; | |
89 | |
90 use constant EPOST => 'http://www.ncbi.nih.gov/entrez/eutils/epost.fcgi'; | |
91 use constant ESEARCH => 'http://www.ncbi.nih.gov/entrez/eutils/esearch.fcgi'; | |
92 use constant DEFAULT_DB => 'protein'; | |
93 use constant MAXENTRY => 100; | |
94 | |
95 use vars qw(@ISA @ATTRIBUTES $VERSION); | |
96 | |
97 @ISA = 'Bio::DB::Query::WebQuery'; | |
98 $VERSION = '0.2'; | |
99 | |
100 BEGIN { | |
101 @ATTRIBUTES = qw(db reldate mindate maxdate datetype); | |
102 for my $method (@ATTRIBUTES) { | |
103 eval <<END; | |
104 sub $method { | |
105 my \$self = shift; | |
106 my \$d = \$self->{'_$method'}; | |
107 \$self->{'_$method'} = shift if \@_; | |
108 \$d; | |
109 } | |
110 END | |
111 } | |
112 } | |
113 | |
114 =head2 new | |
115 | |
116 Title : new | |
117 Usage : $db = Bio::DB::Query::GenBank->new(@args) | |
118 Function: create new query object | |
119 Returns : new query object | |
120 Args : -db database ('protein' or 'nucleotide') | |
121 -query query string | |
122 -mindate minimum date to retrieve from | |
123 -maxdate maximum date to retrieve from | |
124 -reldate relative date to retrieve from (days) | |
125 -datetype date field to use ('edat' or 'mdat') | |
126 -ids array ref of gids (overrides query) | |
127 | |
128 This method creates a new query object. Typically you will specify a | |
129 -db and a -query argument, possibly modified by -mindate, -maxdate, or | |
130 -reldate. -mindate and -maxdate specify minimum and maximum dates for | |
131 entries you are interested in retrieving, expressed in the form | |
132 DD/MM/YYYY. -reldate is used to fetch entries that are more recent | |
133 than the indicated number of days. | |
134 | |
135 If you provide an array reference of IDs in -ids, the query will be | |
136 ignored and the list of IDs will be used when the query is passed to a | |
137 Bio::DB::GenBank object's get_Stream_by_query() method. A variety of | |
138 IDs are automatically recognized, including GI numbers, Accession | |
139 numbers, Accession.version numbers and locus names. | |
140 | |
141 =cut | |
142 | |
143 sub new { | |
144 my $class = shift; | |
145 my $self = $class->SUPER::new(@_); | |
146 my ($db,$reldate,$mindate,$maxdate,$datetype,$ids) | |
147 = $self->_rearrange([qw(DB RELDATE MINDATE MAXDATE DATETYPE IDS)],@_); | |
148 $self->db($db || DEFAULT_DB); | |
149 $reldate && $self->reldate($reldate); | |
150 $mindate && $self->mindate($mindate); | |
151 $maxdate && $self->maxdate($maxdate); | |
152 $datetype ||= 'mdat'; | |
153 $datetype && $self->datetype($datetype); | |
154 $self; | |
155 } | |
156 | |
157 =head2 cookie | |
158 | |
159 Title : cookie | |
160 Usage : ($cookie,$querynum) = $db->cookie | |
161 Function: return the NCBI query cookie | |
162 Returns : list of (cookie,querynum) | |
163 Args : none | |
164 | |
165 NOTE: this information is used by Bio::DB::GenBank in | |
166 conjunction with efetch. | |
167 | |
168 =cut | |
169 | |
170 sub cookie { | |
171 my $self = shift; | |
172 if (@_) { | |
173 $self->{'_cookie'} = shift; | |
174 $self->{'_querynum'} = shift; | |
175 } | |
176 | |
177 else { | |
178 $self->_run_query; | |
179 @{$self}{qw(_cookie _querynum)}; | |
180 } | |
181 } | |
182 | |
183 =head2 _request_parameters | |
184 | |
185 Title : _request_parameters | |
186 Usage : ($method,$base,@params = $db->_request_parameters | |
187 Function: return information needed to construct the request | |
188 Returns : list of method, url base and key=>value pairs | |
189 Args : none | |
190 | |
191 =cut | |
192 | |
193 sub _request_parameters { | |
194 my $self = shift; | |
195 my ($method,$base); | |
196 my @params = map {eval("\$self->$_") ? ($_ => eval("\$self->$_")) : () } @ATTRIBUTES; | |
197 push @params,('usehistory'=>'y','tool'=>'bioperl'); | |
198 $method = 'get'; | |
199 $base = ESEARCH; | |
200 push @params,('term' => $self->query); | |
201 push @params,('retmax' => $self->{'_count'} || MAXENTRY); | |
202 ($method,$base,@params); | |
203 } | |
204 | |
205 | |
206 =head2 count | |
207 | |
208 Title : count | |
209 Usage : $count = $db->count; | |
210 Function: return count of number of entries retrieved by query | |
211 Returns : integer | |
212 Args : none | |
213 | |
214 Returns the number of entries that are matched by the query. | |
215 | |
216 =cut | |
217 | |
218 sub count { | |
219 my $self = shift; | |
220 if (@_) { | |
221 my $d = $self->{'_count'}; | |
222 $self->{'_count'} = shift; | |
223 return $d; | |
224 } | |
225 else { | |
226 $self->_run_query; | |
227 return $self->{'_count'}; | |
228 } | |
229 } | |
230 | |
231 =head2 ids | |
232 | |
233 Title : ids | |
234 Usage : @ids = $db->ids([@ids]) | |
235 Function: get/set matching ids | |
236 Returns : array of sequence ids | |
237 Args : (optional) array ref with new set of ids | |
238 | |
239 =cut | |
240 | |
241 =head2 query | |
242 | |
243 Title : query | |
244 Usage : $query = $db->query([$query]) | |
245 Function: get/set query string | |
246 Returns : string | |
247 Args : (optional) new query string | |
248 | |
249 =cut | |
250 | |
251 =head2 _parse_response | |
252 | |
253 Title : _parse_response | |
254 Usage : $db->_parse_response($content) | |
255 Function: parse out response | |
256 Returns : empty | |
257 Args : none | |
258 Throws : 'unparseable output exception' | |
259 | |
260 =cut | |
261 | |
262 sub _parse_response { | |
263 my $self = shift; | |
264 my $content = shift; | |
265 if (my ($warning) = $content =~ m!<ErrorList>(.+)</ErrorList>!s) { | |
266 warn "Warning(s) from GenBank: $warning\n"; | |
267 } | |
268 if (my ($error) = $content =~ /<OutputMessage>([^<]+)/) { | |
269 $self->throw("Error from Genbank: $error"); | |
270 } | |
271 | |
272 my ($count) = $content =~ /<Count>(\d+)/; | |
273 my ($max) = $content =~ /<RetMax>(\d+)/; | |
274 my $truncated = $count > $max; | |
275 $self->count($count); | |
276 if (!$truncated) { | |
277 my @ids = $content =~ /<Id>(\d+)/g; | |
278 $self->ids(\@ids); | |
279 } | |
280 $self->_truncated($truncated); | |
281 my ($cookie) = $content =~ m!<WebEnv>(\S+)</WebEnv>!; | |
282 my ($querykey) = $content =~ m!<QueryKey>(\d+)!; | |
283 $self->cookie(uri_unescape($cookie),$querykey); | |
284 } | |
285 | |
286 1; |