annotate variant_effect_predictor/Bio/DB/BioFetch.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: BioFetch.pm,v 1.13.2.1 2003/06/25 13:44:18 heikki Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::DB::BioFetch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Lincoln Stein <lstein@cshl.org>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Lincoln Stein
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14 package Bio::DB::BioFetch;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16 use Bio::DB::WebDBSeqI;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 use HTTP::Request::Common 'POST';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 Bio::DB::BioFetch - Database object interface to BioFetch retrieval
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25 use Bio::DB::BioFetch;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 $bf = new Bio::DB::BioFetch;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29 $seq = $sp->get_Seq_by_id('BUM'); # EMBL or SWALL ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 # change formats, storage procedures
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32 $bf = new Bio::DB::BioFetch(-format => 'fasta',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 -retrievaltype => 'tempfile',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 -db => 'EMBL');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 $stream = $bf->get_Stream_by_id(['BUM','J00231']);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 while (my $s = $stream->next_seq) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38 print $s->seq,"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 # get a RefSeq entry
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41 $bf->db('refseq');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 eval {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 $seq = $bf->get_Seq_by_version('NM_006732.1'); # RefSeq VERSION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 print "accession is ", $seq->accession_number, "\n" unless $@;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50 Bio::DB::BioFetch is a guaranteed best effort sequence entry fetching
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 method. It goes to the Web-based dbfetch server located at the EBI
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52 (http://www.ebi.ac.uk/cgi-bin/dbfetch) to retrieve sequences in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 EMBL or GenBank sequence repositories.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 This module implements all the Bio::DB::RandomAccessI interface, plus
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 the get_Stream_by_id() and get_Stream_by_acc() methods that are found
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 in the Bio::DB::SwissProt interface.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61 =head2 Mailing Lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63 User feedback is an integral part of the evolution of this and other
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 Bioperl modules. Send your comments and suggestions preferably to one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65 of the Bioperl mailing lists. Your participation is much appreciated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 bioperl-l@bioperl.org - General discussion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 http://bio.perl.org/MailList.html - About the mailing lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 Report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 the bugs and their resolution. Bug reports can be submitted via email
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80 =head1 AUTHOR - Lincoln Stein
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 Email Lincoln Stein E<lt>lstein@cshl.orgE<lt>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 Also thanks to Heikki Lehvaslaiho E<lt>heikki@ebi.ac.ukE<gt> for the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 BioFetch server and interface specification.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 The rest of the documentation details each of the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 methods. Internal methods are usually preceded with a _
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 # Let the code begin...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 use vars qw(@ISA $VERSION %FORMATMAP );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 use Bio::Root::Root;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 @ISA = qw(Bio::DB::WebDBSeqI Bio::Root::Root);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98 $VERSION = '1.0';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 # warning: names used here must map into Bio::SeqIO::* space
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 use constant DEFAULT_LOCATION => 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 BEGIN {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 %FORMATMAP = (
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 'embl' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 default => 'embl', # default BioFetch format/SeqIOmodule pair
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108 embl => 'embl', # alternative BioFetch format/module pair
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 fasta => 'fasta', # alternative BioFetch format/module pair
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110 namespace => 'embl',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 'swissprot' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 default => 'swiss',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114 swissprot => 'swiss',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 fasta => 'fasta',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 namespace => 'swall',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 'refseq' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 default => 'genbank',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 genbank => 'genbank',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 fasta => 'fasta',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 namespace => 'RefSeq',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 'swall' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 default => 'swiss',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 swissprot => 'swiss',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 fasta => 'fasta',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 namespace => 'swall',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 'genbank' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 default => 'genbank',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 genbank => 'genbank',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 namespace => 'genbank',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135 'genpep' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 default => 'genbank',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 genbank => 'genbank',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 namespace => 'genpep',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 =head2 new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 Title : new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 Usage : $bf = Bio::DB::BioFetch->new(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 Function: Construct a new Bio::DB::BioFetch object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 Returns : a Bio::DB::BioFetch object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 Throws :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 @args are standard -name=E<gt>value options as listed in the following
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 table. If you do not provide any options, the module assumes reasonable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 defaults.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 Option Value Default
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 ------ ----- -------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 -baseaddress location of dbfetch server http://www.ebi.ac.uk/cgi-bin/dbfetch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 -retrievaltype "tempfile" or "io_string" io_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 -format "embl", "fasta", "swissprot", embl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 or "genbank"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 -db "embl", "genbank" or "swissprot" embl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 #'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 my ($class,@args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 my $self = $class->SUPER::new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 my ($db) = $self->_rearrange([qw(DB)],@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172 $db ||= $self->default_db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 $self->db($db);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 $self->url_base_address(DEFAULT_LOCATION) unless $self->url_base_address;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 =head2 new_from_registry
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 Title : new_from_registry
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 Usage : $biofetch = $db->new_from_registry(%config)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 Function: Creates a BioFetch object from the registry config hash
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 Returns : itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 Args : A configuration hash (see Registry.pm)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 Throws :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 sub new_from_registry {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 my ($class,%config)=@_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 my $self = $class->SUPER::new(
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 -BASEADDRESS=>$config{'location'}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196 $self->db($config{'dbname'}) if $config{dbname};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 return $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 # from Bio::DB::RandomAccessI
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 =head2 get_Seq_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 Title : get_Seq_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 Function: Gets a Bio::Seq object by its name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 Returns : a Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 Args : the id (as a string) of a sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 Throws : "id does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 =head2 get_Seq_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 Title : get_Seq_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 Usage : $seq = $db->get_Seq_by_acc('X77802');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 Function: Gets a Bio::Seq object by accession number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 Args : accession number (as a string)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 Throws : "acc does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 =head2 get_Seq_by_gi
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 Title : get_Seq_by_gi
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 Usage : $seq = $db->get_Seq_by_gi('405830');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 Function: Gets a Bio::Seq object by gi number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 Args : gi number (as a string)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 Throws : "gi does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 =head2 get_Seq_by_version
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 Title : get_Seq_by_version
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 Usage : $seq = $db->get_Seq_by_version('X77802.1');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 Function: Gets a Bio::Seq object by sequence version
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 Args : accession.version (as a string)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 Throws : "acc.version does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247 sub get_Seq_by_version {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 my ($self,$seqid) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 return $self->get_Seq_by_acc($seqid);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 =head2 get_Stream_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 Title : get_Stream_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 Function: Gets a series of Seq objects by unique identifiers
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 Args : $ref : a reference to an array of unique identifiers for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260 the desired sequence entries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 =head2 get_Stream_by_gi
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 Title : get_Stream_by_gi
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 Usage : $seq = $db->get_Seq_by_gi([$gi1, $gi2]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 Function: Gets a series of Seq objects by gi numbers
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 Args : $ref : a reference to an array of gi numbers for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271 the desired sequence entries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 Note : For GenBank, this just calls the same code for get_Stream_by_id()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276 =head2 get_Stream_by_batch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 Title : get_Stream_by_batch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 Usage : $seq = $db->get_Stream_by_batch($ref);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 Function: Get a series of Seq objects by their IDs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 Args : $ref : an array reference containing a list of unique
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 ids/accession numbers.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 In some of the Bio::DB::* moduels, get_Stream_by_id() is called
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287 get_Stream_by_batch(). Since there seems to be no consensus, this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 is provided as an alias.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292 *get_Stream_by_batch = \&Bio::DB::WebDBSeqI::get_Stream_by_id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 =head1 The remainder of these methods are for internal use
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 =head2 get_request
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298 Title : get_request
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 Usage : my $url = $self->get_request
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 Function: returns a HTTP::Request object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 sub get_request {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308 my ($self, @qualifiers) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 @qualifiers);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 my $db = $self->db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 my $namespace = $self->_namespace;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 $self->throw("Must specify a value for UIDs to fetch")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315 unless defined $uids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 my $tmp;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317 my $format_string = '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 $format ||= $self->default_format;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 ($format, $tmp) = $self->request_format($format);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 my $base = $self->url_base_address;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 my $uid = join('+', ref $uids ? @$uids : $uids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 $self->debug("\n$base$format_string&id=$uid\n");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 return POST($base,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326 [ db => $namespace,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 id => join('+',ref $uids ? @$uids : $uids),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 format => $format,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 style => 'raw'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 ]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 =head2 default_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 Title : default_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 Usage : $format = $self->default_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 Function: return the default format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 Returns : a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 Args :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 sub default_format {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 return 'default';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 =head2 default_db
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 Title : default_db
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 Usage : $db = $self->default_db
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351 Function: return the default database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 Returns : a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353 Args :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 sub default_db { 'embl' }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 =head2 db
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361 Title : db
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 Usage : $db = $self->db([$db])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363 Function: get/set the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 Returns : a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 Args : new database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 sub db {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 if (@_) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 my $db = lc shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 $FORMATMAP{$db} or $self->throw("invalid db [$db], must be one of [".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 join(' ',keys %FORMATMAP). "]");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 $self->{_db} = $db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 return $self->{_db} || $self->default_db ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 sub _namespace {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 my $db = $self->db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385 return $FORMATMAP{$db}{namespace} or $db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 =head2 postprocess_data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 Title : postprocess_data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 Usage : $self->postprocess_data ( 'type' => 'string',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 'location' => \$datastr);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 Function: process downloaded data before loading into a Bio::SeqIO
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 Returns : void
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 Args : hash with two keys - 'type' can be 'string' or 'file'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396 - 'location' either file location or string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 reference containing data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 sub postprocess_data {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402 my ($self,%args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 # check for errors in the stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 if ($args{'type'} eq 'string') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 my $stringref = $args{'location'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 if ($$stringref =~ /^ERROR (\d+) (.+)/m) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 $self->throw("BioFetch Error $1: $2");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 elsif ($args{'type'} eq 'file') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 open (F,$args{'location'}) or $self->throw("Couldn't open $args{location}: $!");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 # this is dumb, but the error may be anywhere on the first three lines because the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415 # CGI headers are sometimes printed out by the server...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 my @data = (scalar <F>,scalar <F>,scalar <F>);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417 if (join('',@data) =~ /^ERROR (\d+) (.+)/m) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 $self->throw("BioFetch Error $1: $2");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 close F;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 $self->throw("Don't know how to postprocess data of type $args{'type'}");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429 =head2 request_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 Title : request_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 Usage : my ($req_format, $ioformat) = $self->request_format;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433 $self->request_format("genbank");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 $self->request_format("fasta");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435 Function: Get/Set sequence format retrieval. The get-form will normally not
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 be used outside of this and derived modules.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 Returns : Array of two strings, the first representing the format for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438 retrieval, and the second specifying the corresponding SeqIO format.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 Args : $format = sequence format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 sub request_format {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 if ( defined $value ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 my $db = $self->db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447 my $namespace = $self->_namespace;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 my $format = lc $value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449 print "format:", $format, " module:", $FORMATMAP{$db}->{$format}, " ($namespace)\n"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450 if $self->verbose > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451 $self->throw("Invalid format [$format], must be one of [".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452 join(' ',keys %{$FORMATMAP{$db}}). "]")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453 unless $format eq 'default' || $FORMATMAP{$db}->{$format};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455 $self->{'_format'} = [ $format, $FORMATMAP{$db}->{$format}];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457 return @{$self->{'_format'}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 =head2 Bio::DB::WebDBSeqI methods
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 Overriding WebDBSeqI method to help newbies to retrieve sequences.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 EMBL database is all too often passed RefSeq accessions. This
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 redirects those calls. See L<Bio::DB::RefSeq>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 =head2 get_Stream_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470 Title : get_Stream_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 Usage : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472 Function: Gets a series of Seq objects by accession numbers
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474 Args : $ref : a reference to an array of accession numbers for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 the desired sequence entries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 sub get_Stream_by_acc {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 my ($self, $ids ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 $self->_check_id($ids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 =head2 _check_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488 Title : _check_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 Usage :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490 Function: Throw on whole chromosome NCBI sequences not in sequence databases
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 and redirect RefSeq accession requests sent to EMBL.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 Args : $id(s), $string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494 Throws : if accessionn number indicates whole chromosome NCBI sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 sub _check_id {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499 my ($self, $id) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 # NT contigs can not be retrieved
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 $self->throw("NT_ contigs are whole chromosome files which are not part of regular".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 if $id =~ /NT_/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 # Asking for a RefSeq from EMBL/GenBank
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 if ($id =~ /N._/ && $self->db ne 'refseq') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509 $self->warn("[$id] is not a normal sequence database but a RefSeq entry.".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510 " Redirecting the request.\n")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 if $self->verbose >= 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 $self->db('RefSeq');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516 1;