0
|
1 # $Id: GenPept.pm,v 1.26 2002/11/21 17:45:59 lstein Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::DB::GenPept
|
|
4 #
|
|
5 # Cared for by Aaron Mackey <amackey@virginia.edu>
|
|
6 #
|
|
7 # Copyright Aaron Mackey
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 # completely reworked by Jason Stajich to use Bio::DB::WebDBSeqI 2000-12-12
|
|
14
|
|
15 =head1 NAME
|
|
16
|
|
17 Bio::DB::GenPept - Database object interface to GenPept
|
|
18
|
|
19 =head1 SYNOPSIS
|
|
20
|
|
21 $gb = new Bio::DB::GenPept;
|
|
22
|
|
23 $seq = $gb->get_Seq_by_id('195055'); # Unique ID
|
|
24
|
|
25 # or ...
|
|
26
|
|
27 $seq = $gb->get_Seq_by_acc('DEECTH'); # Accession Number
|
|
28
|
|
29 my $seqio = $gb->get_Stream_by_id(['195055', 'DEECTH']);
|
|
30 while( my $seq = $seqio->next_seq ) {
|
|
31 print "seq is is ", $seq->display_id, "\n";
|
|
32 }
|
|
33
|
|
34 =head1 DESCRIPTION
|
|
35
|
|
36 Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the GenPept
|
|
37 database at NCBI, via an Entrez query.
|
|
38
|
|
39 WARNING: Please do NOT spam the Entrez web server with multiple requests.
|
|
40 NCBI offers Batch Entrez for this purpose. Batch Entrez support will likely
|
|
41 be supported in a future version of DB::GenPept.
|
|
42
|
|
43 Currently the only return format supported by NCBI Entrez for GenPept
|
|
44 database is GenPept format, so any format specification passed to
|
|
45 GenPept will be ignored still be forced to GenPept format (which is
|
|
46 just GenBank format).
|
|
47
|
|
48 =head1 FEEDBACK
|
|
49
|
|
50 =head2 Mailing Lists
|
|
51
|
|
52 User feedback is an integral part of the
|
|
53 evolution of this and other Bioperl modules. Send
|
|
54 your comments and suggestions preferably to one
|
|
55 of the Bioperl mailing lists. Your participation
|
|
56 is much appreciated.
|
|
57
|
|
58 bioperl-l@bioperl.org - General discussion
|
|
59 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
60
|
|
61 =head2 Reporting Bugs
|
|
62
|
|
63 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
64 the bugs and their resolution.
|
|
65 Bug reports can be submitted via email or the web:
|
|
66
|
|
67 bioperl-bugs@bio.perl.org
|
|
68 http://bugzilla.bioperl.org/
|
|
69
|
|
70 =head1 AUTHOR - Aaron Mackey, Jason Stajich
|
|
71
|
|
72 Email amackey@virginia.edu
|
|
73 Email jason@bioperl.org
|
|
74
|
|
75 =head1 APPENDIX
|
|
76
|
|
77 The rest of the documentation details each of the object
|
|
78 methods. Internal methods are usually preceded with a _
|
|
79
|
|
80 =cut
|
|
81
|
|
82 # Let the code begin...
|
|
83
|
|
84 package Bio::DB::GenPept;
|
|
85 use strict;
|
|
86 use vars qw(@ISA $DEFAULTFORMAT $DEFAULTMODE %PARAMSTRING );
|
|
87 use Bio::DB::NCBIHelper;
|
|
88
|
|
89 @ISA = qw(Bio::DB::NCBIHelper);
|
|
90 BEGIN {
|
|
91 $DEFAULTMODE = 'single';
|
|
92 $DEFAULTFORMAT = 'gp';
|
|
93 %PARAMSTRING = (
|
|
94 'batch' => { 'db' => 'protein',
|
|
95 'usehistory' => 'n',
|
|
96 'tool' => 'bioperl',
|
|
97 'retmode' => 'text'},
|
|
98 'gi' => { 'db' => 'protein',
|
|
99 'usehistory' => 'n',
|
|
100 'tool' => 'bioperl',
|
|
101 'retmode' => 'text'},
|
|
102 'version' => { 'db' => 'protein',
|
|
103 'usehistory' => 'n',
|
|
104 'tool' => 'bioperl',
|
|
105 'retmode' => 'text'},
|
|
106 'single' => { 'db' => 'protein',
|
|
107 'usehistory' => 'n',
|
|
108 'tool' => 'bioperl',
|
|
109 'retmode' => 'text'},
|
|
110 );
|
|
111 }
|
|
112
|
|
113 # the new way to make modules a little more lightweight
|
|
114 sub new {
|
|
115 my($class, @args) = @_;
|
|
116 my $self = $class->SUPER::new(@args);
|
|
117 $self->request_format($self->default_format);
|
|
118 return $self;
|
|
119 }
|
|
120
|
|
121 =head2 get_params
|
|
122
|
|
123 Title : get_params
|
|
124 Usage : my %params = $self->get_params($mode)
|
|
125 Function: Returns key,value pairs to be passed to NCBI database
|
|
126 for either 'batch' or 'single' sequence retrieval method
|
|
127 Returns : a key,value pair hash
|
|
128 Args : 'single' or 'batch' mode for retrieval
|
|
129
|
|
130 =cut
|
|
131
|
|
132 sub get_params {
|
|
133 my ($self, $mode) = @_;
|
|
134 return defined $PARAMSTRING{$mode} ? %{$PARAMSTRING{$mode}} : %{$PARAMSTRING{$DEFAULTMODE}};
|
|
135 }
|
|
136
|
|
137 =head2 default_format
|
|
138
|
|
139 Title : default_format
|
|
140 Usage : my $format = $self->default_format
|
|
141 Function: Returns default sequence format for this module
|
|
142 Returns : string
|
|
143 Args : none
|
|
144
|
|
145 =cut
|
|
146
|
|
147 sub default_format {
|
|
148 return $DEFAULTFORMAT;
|
|
149 }
|
|
150
|
|
151 # from Bio::DB::WebDBSeqI from Bio::DB::RandomAccessI
|
|
152
|
|
153 =head1 Routines from Bio::DB::WebDBSeqI and Bio::DB::RandomAccessI
|
|
154
|
|
155 =head2 get_Seq_by_id
|
|
156
|
|
157 Title : get_Seq_by_id
|
|
158 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
|
|
159 Function: Gets a Bio::Seq object by its name
|
|
160 Returns : a Bio::Seq object
|
|
161 Args : the id (as a string) of a sequence
|
|
162 Throws : "id does not exist" exception
|
|
163
|
|
164 =head2 get_Seq_by_acc
|
|
165
|
|
166 Title : get_Seq_by_acc
|
|
167 Usage : $seq = $db->get_Seq_by_acc('AAC73346');
|
|
168 Function: Gets a Seq objects by accession number
|
|
169 Returns : Bio::Seq object
|
|
170 Args : accession number to retrive by
|
|
171
|
|
172 =head1 Routines implemented by Bio::DB::NCBIHelper
|
|
173
|
|
174 =head2 get_request
|
|
175
|
|
176 Title : get_request
|
|
177 Usage : my $url = $self->get_request
|
|
178 Function: HTTP::Request
|
|
179 Returns :
|
|
180 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
|
|
181
|
|
182 =head2 get_Stream_by_id
|
|
183
|
|
184 Title : get_Stream_by_id
|
|
185 Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
|
|
186 Function: Gets a series of Seq objects by unique identifiers
|
|
187 Returns : a Bio::SeqIO stream object
|
|
188 Args : $ref : a reference to an array of unique identifiers for
|
|
189 the desired sequence entries
|
|
190
|
|
191 =head2 get_Stream_by_acc (2)
|
|
192
|
|
193 Title : get_Stream_by_acc
|
|
194 Usage : $seq = $db->get_Stream_by_acc($acc);
|
|
195 Function: Gets a series of Seq objects by accession numbers
|
|
196 Returns : a Bio::SeqIO stream object
|
|
197 Args : $ref : a reference to an array of accession numbers for
|
|
198 the desired sequence entries
|
|
199 Note : For GenBank, this just calls the same code for get_Stream_by_id()
|
|
200
|
|
201 =head2 request_format
|
|
202
|
|
203 Title : request_format
|
|
204 Usage : my $format = $self->request_format;
|
|
205 $self->request_format($format);
|
|
206 Function: Get/Set sequence format retrieval
|
|
207 Returns : string representing format
|
|
208 Args : $format = sequence format
|
|
209
|
|
210 =cut
|
|
211
|
|
212 # oberride to force format to be GenPept regardless
|
|
213 sub request_format {
|
|
214 my ($self) = @_;
|
|
215 return $self->SUPER::request_format($self->default_format());
|
|
216 }
|
|
217
|
|
218 1;
|
|
219 __END__
|
|
220
|
|
221
|
|
222
|
|
223
|
|
224
|
|
225
|
|
226
|
|
227
|
|
228
|
|
229
|
|
230
|
|
231
|
|
232
|
|
233
|
|
234
|
|
235
|