0
|
1
|
|
2 #
|
|
3 # BioPerl module for Bio::DB::Universal
|
|
4 #
|
|
5 # Cared for by Ewan Birney <birney@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Ewan Birney
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::DB::Universal - Artificial database that delegates to specific databases
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 $uni = Bio::DB::Universal->new();
|
|
20
|
|
21 # by default connects to web databases. We can also
|
|
22 # substitute local databases
|
|
23
|
|
24 $embl = Bio::Index::EMBL->new( -filename => '/some/index/filename/locally/stored');
|
|
25 $uni->use_database('embl',$embl);
|
|
26
|
|
27 # treat it like a normal database. Recognises strings
|
|
28 # like gb|XXXXXX and embl:YYYYYY
|
|
29
|
|
30 $seq1 = $uni->get_Seq_by_id("embl:HSHNRNPA");
|
|
31 $seq2 = $uni->get_Seq_by_acc("gb|A000012");
|
|
32
|
|
33 # with no separator, tries to guess database. In this case the
|
|
34 # _ is considered to be indicative of swissprot
|
|
35 $seq3 = $uni->get_Seq_by_id('ROA1_HUMAN');
|
|
36
|
|
37 =head1 DESCRIPTION
|
|
38
|
|
39 Artificial database that delegates to specific databases, with a
|
|
40 "smart" (well, smartish) guessing routine for what the ids. No doubt
|
|
41 the smart routine can be made smarter.
|
|
42
|
|
43 The hope is that you can make this database and just throw ids at it -
|
|
44 for most easy cases it will sort you out. Personally, I would be
|
|
45 making sure I knew where each id came from and putting it into its own
|
|
46 database first - but this is a quick and dirty solution.
|
|
47
|
|
48 By default this connects to web orientated databases, with all the
|
|
49 reliability and network bandwidth costs this implies. However you can
|
|
50 subsistute your own local databases - they could be Bio::Index
|
|
51 databases (DBM file and flat file) or bioperl-db based (MySQL based)
|
|
52 or biocorba-based (whatever you like behind the corba interface).
|
|
53
|
|
54 Internally the tags for the databases are
|
|
55
|
|
56 genbank - ncbi dna database
|
|
57 embl - ebi's dna database (these two share accession number space)
|
|
58 swiss - swissprot + sptrembl (EBI's protein database)
|
|
59
|
|
60 We should extend this for RefSeq and other sequence databases which
|
|
61 are out there... ;)
|
|
62
|
|
63 Inspired by Lincoln Stein, written by Ewan Birney.
|
|
64
|
|
65 =head1 FEEDBACK
|
|
66
|
|
67 =head2 Mailing Lists
|
|
68
|
|
69 User feedback is an integral part of the evolution of this and other
|
|
70 Bioperl modules. Send your comments and suggestions preferably to one
|
|
71 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
72
|
|
73 bioperl-l@bio.perl.org
|
|
74
|
|
75 =head2 Reporting Bugs
|
|
76
|
|
77 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
78 the bugs and their resolution. Bug reports can be submitted via email
|
|
79 or the web:
|
|
80
|
|
81 bioperl-bugs@bio.perl.org
|
|
82 http://bugzilla.bioperl.org/
|
|
83
|
|
84 =head1 AUTHOR - Ewan Birney
|
|
85
|
|
86 Email birney@ebi.ac.uk
|
|
87
|
|
88 Describe contact details here
|
|
89
|
|
90 =head1 APPENDIX
|
|
91
|
|
92 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
93
|
|
94 =cut
|
|
95
|
|
96
|
|
97 # Let the code begin...
|
|
98
|
|
99
|
|
100 package Bio::DB::Universal;
|
|
101 use vars qw(@ISA);
|
|
102 use strict;
|
|
103
|
|
104 # Object preamble - inherits from Bio::Root::Root
|
|
105
|
|
106 use Bio::Root::Root;
|
|
107 use Bio::DB::RandomAccessI;
|
|
108
|
|
109 use Bio::DB::GenBank;
|
|
110 use Bio::DB::SwissProt;
|
|
111 use Bio::DB::EMBL;
|
|
112
|
|
113
|
|
114 @ISA = qw(Bio::DB::RandomAccessI Bio::Root::Root);
|
|
115 # new() can be inherited from Bio::Root::Root
|
|
116
|
|
117 sub new {
|
|
118 my ($class) = @_;
|
|
119
|
|
120 my $self = {};
|
|
121 bless $self,$class;
|
|
122
|
|
123 $self->{'db_hash'} = {};
|
|
124
|
|
125 # default databases
|
|
126
|
|
127 $self->use_database('embl',Bio::DB::EMBL->new);
|
|
128 $self->use_database('genbank',Bio::DB::GenBank->new);
|
|
129 $self->use_database('swiss',Bio::DB::GenBank->new);
|
|
130
|
|
131 return $self;
|
|
132 }
|
|
133
|
|
134
|
|
135 =head2 get_Seq_by_id
|
|
136
|
|
137 Title : get_Seq_by_id
|
|
138 Usage :
|
|
139 Function:
|
|
140 Example :
|
|
141 Returns :
|
|
142 Args :
|
|
143
|
|
144
|
|
145 =cut
|
|
146
|
|
147 sub get_Seq_by_id{
|
|
148 my ($self,$str) = @_;
|
|
149
|
|
150 my ($tag,$id) = $self->guess_id($str);
|
|
151
|
|
152 return $self->{'db_hash'}->{$tag}->get_Seq_by_id($id);
|
|
153 }
|
|
154
|
|
155
|
|
156 =head2 get_Seq_by_acc
|
|
157
|
|
158 Title : get_Seq_by_acc
|
|
159 Usage :
|
|
160 Function:
|
|
161 Example :
|
|
162 Returns :
|
|
163 Args :
|
|
164
|
|
165
|
|
166 =cut
|
|
167
|
|
168 sub get_Seq_by_acc {
|
|
169 my ($self,$str) = @_;
|
|
170
|
|
171 my ($tag,$id) = $self->guess_id($str);
|
|
172
|
|
173 return $self->{'db_hash'}->{$tag}->get_Seq_by_acc($id);
|
|
174 }
|
|
175
|
|
176
|
|
177
|
|
178 =head2 guess_id
|
|
179
|
|
180 Title : guess_id
|
|
181 Usage :
|
|
182 Function:
|
|
183 Example :
|
|
184 Returns :
|
|
185 Args :
|
|
186
|
|
187
|
|
188 =cut
|
|
189
|
|
190 sub guess_id{
|
|
191 my ($self,$str) = @_;
|
|
192
|
|
193 if( $str =~ /(\S+)[:|\/;](\w+)/ ) {
|
|
194 my $tag;
|
|
195 my $db = $1;
|
|
196 my $id = $2;
|
|
197 if( $db =~ /gb/i || $db =~ /genbank/i || $db =~ /ncbi/i ) {
|
|
198 $tag = 'genbank';
|
|
199 } elsif ( $db =~ /embl/i || $db =~ /emblbank/ || $db =~ /^em/i ) {
|
|
200 $tag = 'embl';
|
|
201 } elsif ( $db =~ /swiss/i || $db =~ /^sw/i || $db =~ /sptr/ ) {
|
|
202 $tag = 'swiss';
|
|
203 } else {
|
|
204 # throw for the moment
|
|
205 $self->throw("Could not guess database type $db from $str");
|
|
206 }
|
|
207 return ($tag,$id);
|
|
208
|
|
209 } else {
|
|
210 my $tag;
|
|
211 # auto-guess from just the id
|
|
212 if( $str =~ /_/ ) {
|
|
213 $tag = 'swiss';
|
|
214 } elsif ( $str =~ /^[QPR]\w+\d$/ ) {
|
|
215 $tag = 'swiss';
|
|
216 } elsif ( $str =~ /[A-Z]\d+/ ) {
|
|
217 $tag = 'genbank';
|
|
218 } else {
|
|
219 # default genbank...
|
|
220 $tag = 'genbank';
|
|
221 }
|
|
222 return ($tag,$str);
|
|
223 }
|
|
224
|
|
225
|
|
226 }
|
|
227
|
|
228
|
|
229 =head2 use_database
|
|
230
|
|
231 Title : use_database
|
|
232 Usage :
|
|
233 Function:
|
|
234 Example :
|
|
235 Returns :
|
|
236 Args :
|
|
237
|
|
238
|
|
239 =cut
|
|
240
|
|
241 sub use_database{
|
|
242 my ($self,$name,$database) = @_;
|
|
243
|
|
244 $self->{'db_hash'}->{$name} = $database;
|
|
245 }
|
|
246
|
|
247
|