annotate variant_effect_predictor/Bio/DB/SwissProt.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 # $Id: SwissProt.pm,v 1.19 2002/12/01 00:05:19 jason Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 # BioPerl module for Bio::DB::SwissProt
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 # Cared for by Jason Stajich <jason@bioperl.org>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 # Copyright Jason Stajich
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10 # You may distribute this module under the same terms as perl itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12 # POD documentation - main docs before the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 # Reworked to use Bio::DB::WebDBSeqI 2000-12-11
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 Bio::DB::SwissProt - Database object interface to SwissProt retrieval
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 use Bio::DB::SwissProt;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 $sp = new Bio::DB::SwissProt;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25 $seq = $sp->get_Seq_by_id('KPY1_ECOLI'); # SwissProt ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 # <4-letter-identifier>_<species 5-letter code>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 # or ...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 $seq = $sp->get_Seq_by_acc('P43780'); # SwissProt AC
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29 # [OPQ]xxxxx
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32 # In fact in this implementation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 # these methods call the same webscript so you can use
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 # then interchangeably
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 # choose a different server to query
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 $sp = new Bio::DB::SwissProt('-servertype' => 'expasy',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38 '-hostlocation' => 'us');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 $seq = $sp->get_Seq_by_id('BOLA_HAEIN'); # SwissProtID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 SwissProt is a curated database of proteins managed by the Swiss
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 Bioinformatics Institute. This is in contrast to EMBL/GenBank/DDBJ
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46 which are archives of protein information. Additional tools for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47 parsing and manipulating swissprot files can be found at
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 ftp://ftp.ebi.ac.uk/pub/software/swissprot/Swissknife/.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50 Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 SwissProt database via an expasy retrieval. Perhaps through SRS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52 later.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54 In order to make changes transparent we have host type (currently only
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 expasy) and location (default to switzerland) separated out. This
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 allows the user to pick the closest expasy mirror for running their
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 queries.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 =head2 Mailing Lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 User feedback is an integral part of the evolution of this and other
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65 Bioperl modules. Send your comments and suggestions preferably to one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 of the Bioperl mailing lists. Your participation is much appreciated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 bioperl-l@bioperl.org - General discussion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 http://bio.perl.org/MailList.html - About the mailing lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 Report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 the bugs and their resolution. Bug reports can be submitted via email
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76 or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 =head1 AUTHOR - Jason Stajich
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 Email Jason Stajich E<lt>jason@bioperl.org E<lt>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 Thanks go to Alexandre Gattiker E<lt>gattiker@isb-sib.chE<gt> of Swiss
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 Institute of Bioinformatics for helping point us in the direction of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 the correct expasy scripts and for swissknife references.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 Also thanks to Heikki Lehvaslaiho E<lt>heikki@ebi.ac.ukE<gt> for help with
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 adding EBI swall server.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 The rest of the documentation details each of the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 methods. Internal methods are usually preceded with a _
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 # Let the code begin...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 package Bio::DB::SwissProt;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 use vars qw(@ISA $MODVERSION %HOSTS $DEFAULTFORMAT $DEFAULTSERVERTYPE);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 $MODVERSION = '0.8.1';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 use HTTP::Request::Common;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 use Bio::DB::WebDBSeqI;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 @ISA = qw(Bio::DB::WebDBSeqI);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 # global vars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 $DEFAULTSERVERTYPE = 'ebi';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 $DEFAULTFORMAT = 'swissprot';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 # you can add your own here theoretically.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 %HOSTS = (
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 'expasy' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 'default' => 'us',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 'baseurl' => 'http://%s/cgi-bin/sprot-retrieve-list.pl',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 'hosts' =>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 'switzerland' => 'ch.expasy.org',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 'canada' => 'ca.expasy.org',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 'china' => 'cn.expasy.org',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 'taiwan' => 'tw.expasy.org',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 'australia' => 'au.expasy.org',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 'korea' => 'kr.expasy.org',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 'us' => 'us.expasy.org',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 # ick, CGI variables
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 'jointype' => ' ',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 'idvar' => 'list',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 'basevars' => [ ],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135 'ebi' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 'default' => 'uk',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 'baseurl' => 'http://%s/cgi-bin/dbfetch',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 'hosts' => {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 'uk' => 'www.ebi.ac.uk',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 'jointype' => ',',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 'idvar' => 'id',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 'basevars' => [ 'db' => 'swall',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 'style' => 'raw' ],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 # new modules should be a little more lightweight and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 # should use Bio::Root::Root
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 my ($class, @args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 my $self = $class->SUPER::new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 my ($format, $hostlocation,$servertype) =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155 $self->_rearrange([qw(FORMAT HOSTLOCATION SERVERTYPE)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 @args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 if( $format && $format !~ /(swiss)|(fasta)/i ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 $self->warn("Requested Format $format is ignored because only SwissProt and Fasta formats are currently supported");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 $format = $self->default_format;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 $servertype = $DEFAULTSERVERTYPE unless $servertype;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 $servertype = lc $servertype;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 $self->servertype($servertype);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 if ( $hostlocation ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 $self->hostlocation(lc $hostlocation);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 $self->request_format($format); # let's always override the format, as it must be swiss or fasta
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 return $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 =head2 Routines from Bio::DB::RandomAccessI
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 =head2 get_Seq_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 Title : get_Seq_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 Function: Gets a Bio::Seq object by its name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 Returns : a Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 Args : the id (as a string) of a sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 Throws : "id does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 =head2 get_Seq_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 Title : get_Seq_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 Usage : $seq = $db->get_Seq_by_acc('X77802');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 Function: Gets a Bio::Seq object by accession number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 Args : accession number (as a string)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 Throws : "acc does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 =head2 get_Stream_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 Title : get_Stream_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 Function: Gets a series of Seq objects by unique identifiers
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 Args : $ref : a reference to an array of unique identifiers for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 the desired sequence entries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 =head2 get_Stream_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 Title : get_Stream_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 Usage : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 Function: Gets a series of Seq objects by accession numbers
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 Args : $ref : a reference to an array of accession numbers for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 the desired sequence entries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 Note : For GenBank, this just calls the same code for get_Stream_by_id()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 =head2 get_Stream_by_batch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224 Title : get_Stream_by_batch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 Usage : $seq = $db->get_Stream_by_batch($ref);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 Function: Retrieves Seq objects from SwissProt 'en masse', rather than one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 at a time. This is implemented the same way as get_Stream_by_id,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 but is provided here in keeping with access methods of NCBI
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 modules.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 Args : $ref : either an array reference, a filename, or a filehandle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 from which to get the list of unique ids/accession numbers.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 sub get_Stream_by_batch {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 my ($self, $ids) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 return $self->get_Stream_by_id( $ids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 =head2 Implemented Routines from Bio::DB::WebDBSeqI interface
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 =head2 get_request
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 Title : get_request
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 Usage : my $url = $self->get_request
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 Function: returns a HTTP::Request object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 sub get_request {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 my ($self, @qualifiers) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 @qualifiers);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 if( !defined $uids ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 $self->throw("Must specify a value for uids to query");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 my ($f,undef) = $self->request_format($format);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 my %vars = (
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 @{$HOSTS{$self->servertype}->{'basevars'}},
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 ( 'format' => $f )
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271 my $url = $self->location_url;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 my $uid;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 my $jointype = $HOSTS{$self->servertype}->{'jointype'} || ' ';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 my $idvar = $HOSTS{$self->servertype}->{'idvar'} || 'id';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 if( ref($uids) =~ /ARRAY/i ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 # HTTP::Request automagically converts the ' ' to %20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 $uid = join($jointype, @$uids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281 $uid = $uids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 $vars{$idvar} = $uid;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 return POST $url, \%vars;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 =head2 postprocess_data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 Title : postprocess_data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291 Usage : $self->postprocess_data ( 'type' => 'string',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292 'location' => \$datastr);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293 Function: process downloaded data before loading into a Bio::SeqIO
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 Returns : void
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 Args : hash with two keys - 'type' can be 'string' or 'file'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 - 'location' either file location or string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 reference containing data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 # don't need to do anything
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 sub postprocess_data {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 my ($self, %args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308 =head2 default_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 Title : default_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 Usage : my $format = $self->default_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 Function: Returns default sequence format for this module
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313 Returns : string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 sub default_format {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 return $DEFAULTFORMAT;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 =head2 Bio::DB::SwissProt specific routines
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326 =head2 servertype
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 Title : servertype
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 Usage : my $servertype = $self->servertype
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 $self->servertype($servertype);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 Function: Get/Set server type
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 Returns : string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 Args : server type string [optional]
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 sub servertype {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 my ($self, $servertype) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 if( defined $servertype && $servertype ne '') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340 $self->throw("You gave an invalid server type ($servertype)".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 " - available types are ".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342 keys %HOSTS) unless( $HOSTS{$servertype} );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 $self->{'_servertype'} = $servertype;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 $self->{'_hostlocation'} = $HOSTS{$servertype}->{'default'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 # make sure format is reset properly in that different
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 # servers have different syntaxes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 my ($existingformat,$seqioformat) = $self->request_format;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 $self->request_format($existingformat);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351 return $self->{'_servertype'} || $DEFAULTSERVERTYPE;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 =head2 hostlocation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 Title : hostlocation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358 Usage : my $location = $self->hostlocation()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 $self->hostlocation($location)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360 Function: Set/Get Hostlocation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361 Returns : string representing hostlocation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 Args : string specifying hostlocation [optional]
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366 sub hostlocation {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 my ($self, $location ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368 $location = lc $location;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 my $servertype = $self->servertype;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 $self->throw("Must have a valid servertype defined not $servertype")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 unless defined $servertype;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 my %hosts = %{$HOSTS{$servertype}->{'hosts'}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 if( defined $location && $location ne '' ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 if( ! $hosts{$location} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 $self->throw("Must specify a known host, not $location,".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 " possible values (".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 join(",", sort keys %hosts ). ")");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 $self->{'_hostlocation'} = $location;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 return $self->{'_hostlocation'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 =head2 location_url
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 Title : location
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387 Usage : my $url = $self->location_url()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 Function: Get host url
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 Returns : string representing url
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 sub location_url {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 my ($self) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396 my $servertype = $self->servertype();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 my $location = $self->hostlocation();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 if( ! defined $location || !defined $servertype ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402 return sprintf($HOSTS{$servertype}->{'baseurl'},
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 $HOSTS{$servertype}->{'hosts'}->{$location});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 =head2 request_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 Title : request_format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 Usage : my ($req_format, $ioformat) = $self->request_format;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 $self->request_format("genbank");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411 $self->request_format("fasta");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 Function: Get/Set sequence format retrieval. The get-form will normally not
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 be used outside of this and derived modules.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 Returns : Array of two strings, the first representing the format for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415 retrieval, and the second specifying the corresponding SeqIO format.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 Args : $format = sequence format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 sub request_format {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421 my ($self, $value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 if( defined $value ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 if( $self->servertype =~ /expasy/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 if( $value =~ /sprot/ || $value =~ /swiss/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 $self->{'_format'} = [ 'sprot', 'swiss'];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 } elsif( $value =~ /^fa/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 $self->{'_format'} = [ 'fasta', 'fasta'];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429 $self->warn("Unrecognized format $value requested");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 $self->{'_format'} = [ 'fasta', 'fasta'];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 } elsif( $self->servertype =~ /ebi/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433 if( $value =~ /sprot/ || $value =~ /swiss/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 $self->{'_format'} = [ 'swissprot', 'swiss' ];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435 } elsif( $value =~ /^fa/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 $self->{'_format'} = [ 'fasta', 'fasta'];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438 $self->warn("Unrecognized format $value requested");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 $self->{'_format'} = [ 'swissprot', 'swiss'];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 return @{$self->{'_format'}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447 __END__