comparison variant_effect_predictor/Bio/DB/Universal.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1
2 #
3 # BioPerl module for Bio::DB::Universal
4 #
5 # Cared for by Ewan Birney <birney@ebi.ac.uk>
6 #
7 # Copyright Ewan Birney
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::DB::Universal - Artificial database that delegates to specific databases
16
17 =head1 SYNOPSIS
18
19 $uni = Bio::DB::Universal->new();
20
21 # by default connects to web databases. We can also
22 # substitute local databases
23
24 $embl = Bio::Index::EMBL->new( -filename => '/some/index/filename/locally/stored');
25 $uni->use_database('embl',$embl);
26
27 # treat it like a normal database. Recognises strings
28 # like gb|XXXXXX and embl:YYYYYY
29
30 $seq1 = $uni->get_Seq_by_id("embl:HSHNRNPA");
31 $seq2 = $uni->get_Seq_by_acc("gb|A000012");
32
33 # with no separator, tries to guess database. In this case the
34 # _ is considered to be indicative of swissprot
35 $seq3 = $uni->get_Seq_by_id('ROA1_HUMAN');
36
37 =head1 DESCRIPTION
38
39 Artificial database that delegates to specific databases, with a
40 "smart" (well, smartish) guessing routine for what the ids. No doubt
41 the smart routine can be made smarter.
42
43 The hope is that you can make this database and just throw ids at it -
44 for most easy cases it will sort you out. Personally, I would be
45 making sure I knew where each id came from and putting it into its own
46 database first - but this is a quick and dirty solution.
47
48 By default this connects to web orientated databases, with all the
49 reliability and network bandwidth costs this implies. However you can
50 subsistute your own local databases - they could be Bio::Index
51 databases (DBM file and flat file) or bioperl-db based (MySQL based)
52 or biocorba-based (whatever you like behind the corba interface).
53
54 Internally the tags for the databases are
55
56 genbank - ncbi dna database
57 embl - ebi's dna database (these two share accession number space)
58 swiss - swissprot + sptrembl (EBI's protein database)
59
60 We should extend this for RefSeq and other sequence databases which
61 are out there... ;)
62
63 Inspired by Lincoln Stein, written by Ewan Birney.
64
65 =head1 FEEDBACK
66
67 =head2 Mailing Lists
68
69 User feedback is an integral part of the evolution of this and other
70 Bioperl modules. Send your comments and suggestions preferably to one
71 of the Bioperl mailing lists. Your participation is much appreciated.
72
73 bioperl-l@bio.perl.org
74
75 =head2 Reporting Bugs
76
77 Report bugs to the Bioperl bug tracking system to help us keep track
78 the bugs and their resolution. Bug reports can be submitted via email
79 or the web:
80
81 bioperl-bugs@bio.perl.org
82 http://bugzilla.bioperl.org/
83
84 =head1 AUTHOR - Ewan Birney
85
86 Email birney@ebi.ac.uk
87
88 Describe contact details here
89
90 =head1 APPENDIX
91
92 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
93
94 =cut
95
96
97 # Let the code begin...
98
99
100 package Bio::DB::Universal;
101 use vars qw(@ISA);
102 use strict;
103
104 # Object preamble - inherits from Bio::Root::Root
105
106 use Bio::Root::Root;
107 use Bio::DB::RandomAccessI;
108
109 use Bio::DB::GenBank;
110 use Bio::DB::SwissProt;
111 use Bio::DB::EMBL;
112
113
114 @ISA = qw(Bio::DB::RandomAccessI Bio::Root::Root);
115 # new() can be inherited from Bio::Root::Root
116
117 sub new {
118 my ($class) = @_;
119
120 my $self = {};
121 bless $self,$class;
122
123 $self->{'db_hash'} = {};
124
125 # default databases
126
127 $self->use_database('embl',Bio::DB::EMBL->new);
128 $self->use_database('genbank',Bio::DB::GenBank->new);
129 $self->use_database('swiss',Bio::DB::GenBank->new);
130
131 return $self;
132 }
133
134
135 =head2 get_Seq_by_id
136
137 Title : get_Seq_by_id
138 Usage :
139 Function:
140 Example :
141 Returns :
142 Args :
143
144
145 =cut
146
147 sub get_Seq_by_id{
148 my ($self,$str) = @_;
149
150 my ($tag,$id) = $self->guess_id($str);
151
152 return $self->{'db_hash'}->{$tag}->get_Seq_by_id($id);
153 }
154
155
156 =head2 get_Seq_by_acc
157
158 Title : get_Seq_by_acc
159 Usage :
160 Function:
161 Example :
162 Returns :
163 Args :
164
165
166 =cut
167
168 sub get_Seq_by_acc {
169 my ($self,$str) = @_;
170
171 my ($tag,$id) = $self->guess_id($str);
172
173 return $self->{'db_hash'}->{$tag}->get_Seq_by_acc($id);
174 }
175
176
177
178 =head2 guess_id
179
180 Title : guess_id
181 Usage :
182 Function:
183 Example :
184 Returns :
185 Args :
186
187
188 =cut
189
190 sub guess_id{
191 my ($self,$str) = @_;
192
193 if( $str =~ /(\S+)[:|\/;](\w+)/ ) {
194 my $tag;
195 my $db = $1;
196 my $id = $2;
197 if( $db =~ /gb/i || $db =~ /genbank/i || $db =~ /ncbi/i ) {
198 $tag = 'genbank';
199 } elsif ( $db =~ /embl/i || $db =~ /emblbank/ || $db =~ /^em/i ) {
200 $tag = 'embl';
201 } elsif ( $db =~ /swiss/i || $db =~ /^sw/i || $db =~ /sptr/ ) {
202 $tag = 'swiss';
203 } else {
204 # throw for the moment
205 $self->throw("Could not guess database type $db from $str");
206 }
207 return ($tag,$id);
208
209 } else {
210 my $tag;
211 # auto-guess from just the id
212 if( $str =~ /_/ ) {
213 $tag = 'swiss';
214 } elsif ( $str =~ /^[QPR]\w+\d$/ ) {
215 $tag = 'swiss';
216 } elsif ( $str =~ /[A-Z]\d+/ ) {
217 $tag = 'genbank';
218 } else {
219 # default genbank...
220 $tag = 'genbank';
221 }
222 return ($tag,$str);
223 }
224
225
226 }
227
228
229 =head2 use_database
230
231 Title : use_database
232 Usage :
233 Function:
234 Example :
235 Returns :
236 Args :
237
238
239 =cut
240
241 sub use_database{
242 my ($self,$name,$database) = @_;
243
244 $self->{'db_hash'}->{$name} = $database;
245 }
246
247