Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/DB/Universal.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 | |
2 # | |
3 # BioPerl module for Bio::DB::Universal | |
4 # | |
5 # Cared for by Ewan Birney <birney@ebi.ac.uk> | |
6 # | |
7 # Copyright Ewan Birney | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::DB::Universal - Artificial database that delegates to specific databases | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 $uni = Bio::DB::Universal->new(); | |
20 | |
21 # by default connects to web databases. We can also | |
22 # substitute local databases | |
23 | |
24 $embl = Bio::Index::EMBL->new( -filename => '/some/index/filename/locally/stored'); | |
25 $uni->use_database('embl',$embl); | |
26 | |
27 # treat it like a normal database. Recognises strings | |
28 # like gb|XXXXXX and embl:YYYYYY | |
29 | |
30 $seq1 = $uni->get_Seq_by_id("embl:HSHNRNPA"); | |
31 $seq2 = $uni->get_Seq_by_acc("gb|A000012"); | |
32 | |
33 # with no separator, tries to guess database. In this case the | |
34 # _ is considered to be indicative of swissprot | |
35 $seq3 = $uni->get_Seq_by_id('ROA1_HUMAN'); | |
36 | |
37 =head1 DESCRIPTION | |
38 | |
39 Artificial database that delegates to specific databases, with a | |
40 "smart" (well, smartish) guessing routine for what the ids. No doubt | |
41 the smart routine can be made smarter. | |
42 | |
43 The hope is that you can make this database and just throw ids at it - | |
44 for most easy cases it will sort you out. Personally, I would be | |
45 making sure I knew where each id came from and putting it into its own | |
46 database first - but this is a quick and dirty solution. | |
47 | |
48 By default this connects to web orientated databases, with all the | |
49 reliability and network bandwidth costs this implies. However you can | |
50 subsistute your own local databases - they could be Bio::Index | |
51 databases (DBM file and flat file) or bioperl-db based (MySQL based) | |
52 or biocorba-based (whatever you like behind the corba interface). | |
53 | |
54 Internally the tags for the databases are | |
55 | |
56 genbank - ncbi dna database | |
57 embl - ebi's dna database (these two share accession number space) | |
58 swiss - swissprot + sptrembl (EBI's protein database) | |
59 | |
60 We should extend this for RefSeq and other sequence databases which | |
61 are out there... ;) | |
62 | |
63 Inspired by Lincoln Stein, written by Ewan Birney. | |
64 | |
65 =head1 FEEDBACK | |
66 | |
67 =head2 Mailing Lists | |
68 | |
69 User feedback is an integral part of the evolution of this and other | |
70 Bioperl modules. Send your comments and suggestions preferably to one | |
71 of the Bioperl mailing lists. Your participation is much appreciated. | |
72 | |
73 bioperl-l@bio.perl.org | |
74 | |
75 =head2 Reporting Bugs | |
76 | |
77 Report bugs to the Bioperl bug tracking system to help us keep track | |
78 the bugs and their resolution. Bug reports can be submitted via email | |
79 or the web: | |
80 | |
81 bioperl-bugs@bio.perl.org | |
82 http://bugzilla.bioperl.org/ | |
83 | |
84 =head1 AUTHOR - Ewan Birney | |
85 | |
86 Email birney@ebi.ac.uk | |
87 | |
88 Describe contact details here | |
89 | |
90 =head1 APPENDIX | |
91 | |
92 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
93 | |
94 =cut | |
95 | |
96 | |
97 # Let the code begin... | |
98 | |
99 | |
100 package Bio::DB::Universal; | |
101 use vars qw(@ISA); | |
102 use strict; | |
103 | |
104 # Object preamble - inherits from Bio::Root::Root | |
105 | |
106 use Bio::Root::Root; | |
107 use Bio::DB::RandomAccessI; | |
108 | |
109 use Bio::DB::GenBank; | |
110 use Bio::DB::SwissProt; | |
111 use Bio::DB::EMBL; | |
112 | |
113 | |
114 @ISA = qw(Bio::DB::RandomAccessI Bio::Root::Root); | |
115 # new() can be inherited from Bio::Root::Root | |
116 | |
117 sub new { | |
118 my ($class) = @_; | |
119 | |
120 my $self = {}; | |
121 bless $self,$class; | |
122 | |
123 $self->{'db_hash'} = {}; | |
124 | |
125 # default databases | |
126 | |
127 $self->use_database('embl',Bio::DB::EMBL->new); | |
128 $self->use_database('genbank',Bio::DB::GenBank->new); | |
129 $self->use_database('swiss',Bio::DB::GenBank->new); | |
130 | |
131 return $self; | |
132 } | |
133 | |
134 | |
135 =head2 get_Seq_by_id | |
136 | |
137 Title : get_Seq_by_id | |
138 Usage : | |
139 Function: | |
140 Example : | |
141 Returns : | |
142 Args : | |
143 | |
144 | |
145 =cut | |
146 | |
147 sub get_Seq_by_id{ | |
148 my ($self,$str) = @_; | |
149 | |
150 my ($tag,$id) = $self->guess_id($str); | |
151 | |
152 return $self->{'db_hash'}->{$tag}->get_Seq_by_id($id); | |
153 } | |
154 | |
155 | |
156 =head2 get_Seq_by_acc | |
157 | |
158 Title : get_Seq_by_acc | |
159 Usage : | |
160 Function: | |
161 Example : | |
162 Returns : | |
163 Args : | |
164 | |
165 | |
166 =cut | |
167 | |
168 sub get_Seq_by_acc { | |
169 my ($self,$str) = @_; | |
170 | |
171 my ($tag,$id) = $self->guess_id($str); | |
172 | |
173 return $self->{'db_hash'}->{$tag}->get_Seq_by_acc($id); | |
174 } | |
175 | |
176 | |
177 | |
178 =head2 guess_id | |
179 | |
180 Title : guess_id | |
181 Usage : | |
182 Function: | |
183 Example : | |
184 Returns : | |
185 Args : | |
186 | |
187 | |
188 =cut | |
189 | |
190 sub guess_id{ | |
191 my ($self,$str) = @_; | |
192 | |
193 if( $str =~ /(\S+)[:|\/;](\w+)/ ) { | |
194 my $tag; | |
195 my $db = $1; | |
196 my $id = $2; | |
197 if( $db =~ /gb/i || $db =~ /genbank/i || $db =~ /ncbi/i ) { | |
198 $tag = 'genbank'; | |
199 } elsif ( $db =~ /embl/i || $db =~ /emblbank/ || $db =~ /^em/i ) { | |
200 $tag = 'embl'; | |
201 } elsif ( $db =~ /swiss/i || $db =~ /^sw/i || $db =~ /sptr/ ) { | |
202 $tag = 'swiss'; | |
203 } else { | |
204 # throw for the moment | |
205 $self->throw("Could not guess database type $db from $str"); | |
206 } | |
207 return ($tag,$id); | |
208 | |
209 } else { | |
210 my $tag; | |
211 # auto-guess from just the id | |
212 if( $str =~ /_/ ) { | |
213 $tag = 'swiss'; | |
214 } elsif ( $str =~ /^[QPR]\w+\d$/ ) { | |
215 $tag = 'swiss'; | |
216 } elsif ( $str =~ /[A-Z]\d+/ ) { | |
217 $tag = 'genbank'; | |
218 } else { | |
219 # default genbank... | |
220 $tag = 'genbank'; | |
221 } | |
222 return ($tag,$str); | |
223 } | |
224 | |
225 | |
226 } | |
227 | |
228 | |
229 =head2 use_database | |
230 | |
231 Title : use_database | |
232 Usage : | |
233 Function: | |
234 Example : | |
235 Returns : | |
236 Args : | |
237 | |
238 | |
239 =cut | |
240 | |
241 sub use_database{ | |
242 my ($self,$name,$database) = @_; | |
243 | |
244 $self->{'db_hash'}->{$name} = $database; | |
245 } | |
246 | |
247 |