comparison variant_effect_predictor/Bio/DB/SwissProt.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # $Id: SwissProt.pm,v 1.19 2002/12/01 00:05:19 jason Exp $
3 #
4 # BioPerl module for Bio::DB::SwissProt
5 #
6 # Cared for by Jason Stajich <jason@bioperl.org>
7 #
8 # Copyright Jason Stajich
9 #
10 # You may distribute this module under the same terms as perl itself
11
12 # POD documentation - main docs before the code
13 # Reworked to use Bio::DB::WebDBSeqI 2000-12-11
14
15 =head1 NAME
16
17 Bio::DB::SwissProt - Database object interface to SwissProt retrieval
18
19 =head1 SYNOPSIS
20
21 use Bio::DB::SwissProt;
22
23 $sp = new Bio::DB::SwissProt;
24
25 $seq = $sp->get_Seq_by_id('KPY1_ECOLI'); # SwissProt ID
26 # <4-letter-identifier>_<species 5-letter code>
27 # or ...
28 $seq = $sp->get_Seq_by_acc('P43780'); # SwissProt AC
29 # [OPQ]xxxxx
30
31
32 # In fact in this implementation
33 # these methods call the same webscript so you can use
34 # then interchangeably
35
36 # choose a different server to query
37 $sp = new Bio::DB::SwissProt('-servertype' => 'expasy',
38 '-hostlocation' => 'us');
39
40 $seq = $sp->get_Seq_by_id('BOLA_HAEIN'); # SwissProtID
41
42 =head1 DESCRIPTION
43
44 SwissProt is a curated database of proteins managed by the Swiss
45 Bioinformatics Institute. This is in contrast to EMBL/GenBank/DDBJ
46 which are archives of protein information. Additional tools for
47 parsing and manipulating swissprot files can be found at
48 ftp://ftp.ebi.ac.uk/pub/software/swissprot/Swissknife/.
49
50 Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the
51 SwissProt database via an expasy retrieval. Perhaps through SRS
52 later.
53
54 In order to make changes transparent we have host type (currently only
55 expasy) and location (default to switzerland) separated out. This
56 allows the user to pick the closest expasy mirror for running their
57 queries.
58
59
60 =head1 FEEDBACK
61
62 =head2 Mailing Lists
63
64 User feedback is an integral part of the evolution of this and other
65 Bioperl modules. Send your comments and suggestions preferably to one
66 of the Bioperl mailing lists. Your participation is much appreciated.
67
68
69 bioperl-l@bioperl.org - General discussion
70 http://bio.perl.org/MailList.html - About the mailing lists
71
72 =head2 Reporting Bugs
73
74 Report bugs to the Bioperl bug tracking system to help us keep track
75 the bugs and their resolution. Bug reports can be submitted via email
76 or the web:
77
78 bioperl-bugs@bio.perl.org
79 http://bugzilla.bioperl.org/
80
81 =head1 AUTHOR - Jason Stajich
82
83 Email Jason Stajich E<lt>jason@bioperl.org E<lt>
84
85 Thanks go to Alexandre Gattiker E<lt>gattiker@isb-sib.chE<gt> of Swiss
86 Institute of Bioinformatics for helping point us in the direction of
87 the correct expasy scripts and for swissknife references.
88
89 Also thanks to Heikki Lehvaslaiho E<lt>heikki@ebi.ac.ukE<gt> for help with
90 adding EBI swall server.
91
92 =head1 APPENDIX
93
94 The rest of the documentation details each of the object
95 methods. Internal methods are usually preceded with a _
96
97 =cut
98
99 # Let the code begin...
100
101 package Bio::DB::SwissProt;
102 use strict;
103 use vars qw(@ISA $MODVERSION %HOSTS $DEFAULTFORMAT $DEFAULTSERVERTYPE);
104
105 $MODVERSION = '0.8.1';
106 use HTTP::Request::Common;
107 use Bio::DB::WebDBSeqI;
108
109 @ISA = qw(Bio::DB::WebDBSeqI);
110
111 # global vars
112 $DEFAULTSERVERTYPE = 'ebi';
113 $DEFAULTFORMAT = 'swissprot';
114
115 # you can add your own here theoretically.
116 %HOSTS = (
117 'expasy' => {
118 'default' => 'us',
119 'baseurl' => 'http://%s/cgi-bin/sprot-retrieve-list.pl',
120 'hosts' =>
121 {
122 'switzerland' => 'ch.expasy.org',
123 'canada' => 'ca.expasy.org',
124 'china' => 'cn.expasy.org',
125 'taiwan' => 'tw.expasy.org',
126 'australia' => 'au.expasy.org',
127 'korea' => 'kr.expasy.org',
128 'us' => 'us.expasy.org',
129 },
130 # ick, CGI variables
131 'jointype' => ' ',
132 'idvar' => 'list',
133 'basevars' => [ ],
134 },
135 'ebi' => {
136 'default' => 'uk',
137 'baseurl' => 'http://%s/cgi-bin/dbfetch',
138 'hosts' => {
139 'uk' => 'www.ebi.ac.uk',
140 },
141 'jointype' => ',',
142 'idvar' => 'id',
143 'basevars' => [ 'db' => 'swall',
144 'style' => 'raw' ],
145 }
146 );
147
148 # new modules should be a little more lightweight and
149 # should use Bio::Root::Root
150 sub new {
151 my ($class, @args) = @_;
152 my $self = $class->SUPER::new(@args);
153
154 my ($format, $hostlocation,$servertype) =
155 $self->_rearrange([qw(FORMAT HOSTLOCATION SERVERTYPE)],
156 @args);
157
158 if( $format && $format !~ /(swiss)|(fasta)/i ) {
159 $self->warn("Requested Format $format is ignored because only SwissProt and Fasta formats are currently supported");
160 $format = $self->default_format;
161 }
162 $servertype = $DEFAULTSERVERTYPE unless $servertype;
163 $servertype = lc $servertype;
164 $self->servertype($servertype);
165 if ( $hostlocation ) {
166 $self->hostlocation(lc $hostlocation);
167 }
168
169 $self->request_format($format); # let's always override the format, as it must be swiss or fasta
170 return $self;
171 }
172
173 =head2 Routines from Bio::DB::RandomAccessI
174
175 =cut
176
177 =head2 get_Seq_by_id
178
179 Title : get_Seq_by_id
180 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
181 Function: Gets a Bio::Seq object by its name
182 Returns : a Bio::Seq object
183 Args : the id (as a string) of a sequence
184 Throws : "id does not exist" exception
185
186 =cut
187
188 =head2 get_Seq_by_acc
189
190 Title : get_Seq_by_acc
191 Usage : $seq = $db->get_Seq_by_acc('X77802');
192 Function: Gets a Bio::Seq object by accession number
193 Returns : A Bio::Seq object
194 Args : accession number (as a string)
195 Throws : "acc does not exist" exception
196
197 =cut
198
199 =head2 get_Stream_by_id
200
201 Title : get_Stream_by_id
202 Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
203 Function: Gets a series of Seq objects by unique identifiers
204 Returns : a Bio::SeqIO stream object
205 Args : $ref : a reference to an array of unique identifiers for
206 the desired sequence entries
207
208 =cut
209
210 =head2 get_Stream_by_acc
211
212 Title : get_Stream_by_acc
213 Usage : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
214 Function: Gets a series of Seq objects by accession numbers
215 Returns : a Bio::SeqIO stream object
216 Args : $ref : a reference to an array of accession numbers for
217 the desired sequence entries
218 Note : For GenBank, this just calls the same code for get_Stream_by_id()
219
220 =cut
221
222 =head2 get_Stream_by_batch
223
224 Title : get_Stream_by_batch
225 Usage : $seq = $db->get_Stream_by_batch($ref);
226 Function: Retrieves Seq objects from SwissProt 'en masse', rather than one
227 at a time. This is implemented the same way as get_Stream_by_id,
228 but is provided here in keeping with access methods of NCBI
229 modules.
230 Example :
231 Returns : a Bio::SeqIO stream object
232 Args : $ref : either an array reference, a filename, or a filehandle
233 from which to get the list of unique ids/accession numbers.
234
235 =cut
236
237 sub get_Stream_by_batch {
238 my ($self, $ids) = @_;
239 return $self->get_Stream_by_id( $ids);
240 }
241
242 =head2 Implemented Routines from Bio::DB::WebDBSeqI interface
243
244 =cut
245
246 =head2 get_request
247
248 Title : get_request
249 Usage : my $url = $self->get_request
250 Function: returns a HTTP::Request object
251 Returns :
252 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
253
254 =cut
255
256 sub get_request {
257 my ($self, @qualifiers) = @_;
258 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
259 @qualifiers);
260
261 if( !defined $uids ) {
262 $self->throw("Must specify a value for uids to query");
263 }
264 my ($f,undef) = $self->request_format($format);
265
266 my %vars = (
267 @{$HOSTS{$self->servertype}->{'basevars'}},
268 ( 'format' => $f )
269 );
270
271 my $url = $self->location_url;
272
273 my $uid;
274 my $jointype = $HOSTS{$self->servertype}->{'jointype'} || ' ';
275 my $idvar = $HOSTS{$self->servertype}->{'idvar'} || 'id';
276
277 if( ref($uids) =~ /ARRAY/i ) {
278 # HTTP::Request automagically converts the ' ' to %20
279 $uid = join($jointype, @$uids);
280 } else {
281 $uid = $uids;
282 }
283 $vars{$idvar} = $uid;
284
285 return POST $url, \%vars;
286 }
287
288 =head2 postprocess_data
289
290 Title : postprocess_data
291 Usage : $self->postprocess_data ( 'type' => 'string',
292 'location' => \$datastr);
293 Function: process downloaded data before loading into a Bio::SeqIO
294 Returns : void
295 Args : hash with two keys - 'type' can be 'string' or 'file'
296 - 'location' either file location or string
297 reference containing data
298
299 =cut
300
301 # don't need to do anything
302
303 sub postprocess_data {
304 my ($self, %args) = @_;
305 return;
306 }
307
308 =head2 default_format
309
310 Title : default_format
311 Usage : my $format = $self->default_format
312 Function: Returns default sequence format for this module
313 Returns : string
314 Args : none
315
316 =cut
317
318 sub default_format {
319 return $DEFAULTFORMAT;
320 }
321
322 =head2 Bio::DB::SwissProt specific routines
323
324 =cut
325
326 =head2 servertype
327
328 Title : servertype
329 Usage : my $servertype = $self->servertype
330 $self->servertype($servertype);
331 Function: Get/Set server type
332 Returns : string
333 Args : server type string [optional]
334
335 =cut
336
337 sub servertype {
338 my ($self, $servertype) = @_;
339 if( defined $servertype && $servertype ne '') {
340 $self->throw("You gave an invalid server type ($servertype)".
341 " - available types are ".
342 keys %HOSTS) unless( $HOSTS{$servertype} );
343 $self->{'_servertype'} = $servertype;
344 $self->{'_hostlocation'} = $HOSTS{$servertype}->{'default'};
345
346 # make sure format is reset properly in that different
347 # servers have different syntaxes
348 my ($existingformat,$seqioformat) = $self->request_format;
349 $self->request_format($existingformat);
350 }
351 return $self->{'_servertype'} || $DEFAULTSERVERTYPE;
352 }
353
354
355 =head2 hostlocation
356
357 Title : hostlocation
358 Usage : my $location = $self->hostlocation()
359 $self->hostlocation($location)
360 Function: Set/Get Hostlocation
361 Returns : string representing hostlocation
362 Args : string specifying hostlocation [optional]
363
364 =cut
365
366 sub hostlocation {
367 my ($self, $location ) = @_;
368 $location = lc $location;
369 my $servertype = $self->servertype;
370 $self->throw("Must have a valid servertype defined not $servertype")
371 unless defined $servertype;
372 my %hosts = %{$HOSTS{$servertype}->{'hosts'}};
373 if( defined $location && $location ne '' ) {
374 if( ! $hosts{$location} ) {
375 $self->throw("Must specify a known host, not $location,".
376 " possible values (".
377 join(",", sort keys %hosts ). ")");
378 }
379 $self->{'_hostlocation'} = $location;
380 }
381 return $self->{'_hostlocation'};
382 }
383
384 =head2 location_url
385
386 Title : location
387 Usage : my $url = $self->location_url()
388 Function: Get host url
389 Returns : string representing url
390 Args : none
391
392 =cut
393
394 sub location_url {
395 my ($self) = @_;
396 my $servertype = $self->servertype();
397 my $location = $self->hostlocation();
398
399 if( ! defined $location || !defined $servertype ) {
400 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
401 }
402 return sprintf($HOSTS{$servertype}->{'baseurl'},
403 $HOSTS{$servertype}->{'hosts'}->{$location});
404 }
405
406 =head2 request_format
407
408 Title : request_format
409 Usage : my ($req_format, $ioformat) = $self->request_format;
410 $self->request_format("genbank");
411 $self->request_format("fasta");
412 Function: Get/Set sequence format retrieval. The get-form will normally not
413 be used outside of this and derived modules.
414 Returns : Array of two strings, the first representing the format for
415 retrieval, and the second specifying the corresponding SeqIO format.
416 Args : $format = sequence format
417
418 =cut
419
420 sub request_format {
421 my ($self, $value) = @_;
422 if( defined $value ) {
423 if( $self->servertype =~ /expasy/ ) {
424 if( $value =~ /sprot/ || $value =~ /swiss/ ) {
425 $self->{'_format'} = [ 'sprot', 'swiss'];
426 } elsif( $value =~ /^fa/ ) {
427 $self->{'_format'} = [ 'fasta', 'fasta'];
428 } else {
429 $self->warn("Unrecognized format $value requested");
430 $self->{'_format'} = [ 'fasta', 'fasta'];
431 }
432 } elsif( $self->servertype =~ /ebi/ ) {
433 if( $value =~ /sprot/ || $value =~ /swiss/ ) {
434 $self->{'_format'} = [ 'swissprot', 'swiss' ];
435 } elsif( $value =~ /^fa/ ) {
436 $self->{'_format'} = [ 'fasta', 'fasta'];
437 } else {
438 $self->warn("Unrecognized format $value requested");
439 $self->{'_format'} = [ 'swissprot', 'swiss'];
440 }
441 }
442 }
443 return @{$self->{'_format'}};
444 }
445
446 1;
447 __END__