Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/EMBL.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # | |
2 # $Id: EMBL.pm,v 1.12.2.1 2003/06/25 13:44:18 heikki Exp $ | |
3 # | |
4 # BioPerl module for Bio::DB::EMBL | |
5 # | |
6 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk> | |
7 # | |
8 # Copyright Jason Stajich | |
9 # | |
10 # You may distribute this module under the same terms as perl itself | |
11 | |
12 # POD documentation - main docs before the code | |
13 | |
14 =head1 NAME | |
15 | |
16 Bio::DB::EMBL - Database object interface for EMBL entry retrieval | |
17 | |
18 =head1 SYNOPSIS | |
19 | |
20 use Bio::DB::EMBL; | |
21 | |
22 $embl = new Bio::DB::EMBL; | |
23 | |
24 # remember that EMBL_ID does not equal GenBank_ID! | |
25 $seq = $embl->get_Seq_by_id('BUM'); # EMBL ID | |
26 print "cloneid is ", $seq->id, "\n"; | |
27 | |
28 # or changeing to accession number and Fasta format ... | |
29 $embl->request_format('fasta'); | |
30 $seq = $embl->get_Seq_by_acc('J02231'); # EMBL ACC | |
31 print "cloneid is ", $seq->id, "\n"; | |
32 | |
33 # especially when using versions, you better be prepared | |
34 # in not getting what what want | |
35 eval { | |
36 $seq = $embl->get_Seq_by_version('J02231.1'); # EMBL VERSION | |
37 }; | |
38 print "cloneid is ", $seq->id, "\n" unless $@; | |
39 | |
40 # or ... best when downloading very large files, prevents | |
41 # keeping all of the file in memory | |
42 | |
43 # also don't want features, just sequence so let's save bandwith | |
44 # and request Fasta sequence | |
45 $embl = new Bio::DB::EMBL(-retrievaltype => 'tempfile' , | |
46 -format => 'fasta'); | |
47 my $seqio = $embl->get_Stream_by_batch(['AC013798', 'AC021953'] ); | |
48 while( my $clone = $seqio->next_seq ) { | |
49 print "cloneid is ", $clone->id, "\n"; | |
50 } | |
51 | |
52 =head1 DESCRIPTION | |
53 | |
54 Allows the dynamic retrieval of sequence objects L<Bio::Seq> from the | |
55 EMBL database using the dbfetch script at EBI: | |
56 L<http://www.ebi.ac.uk/cgi-bin/dbfetch>. | |
57 | |
58 In order to make changes transparent we have host type (currently only | |
59 ebi) and location (defaults to ebi) separated out. This allows later | |
60 additions of more servers in different geographical locations. | |
61 | |
62 The functionality of this module is inherited from L<Bio::DB::DBFetch> | |
63 which implements L<Bio::DB::WebDBSeqI>. | |
64 | |
65 =head1 FEEDBACK | |
66 | |
67 =head2 Mailing Lists | |
68 | |
69 User feedback is an integral part of the evolution of this and other | |
70 Bioperl modules. Send your comments and suggestions preferably to one | |
71 of the Bioperl mailing lists. Your participation is much appreciated. | |
72 | |
73 bioperl-l@bioperl.org - General discussion | |
74 http://bio.perl.org/MailList.html - About the mailing lists | |
75 | |
76 =head2 Reporting Bugs | |
77 | |
78 Report bugs to the Bioperl bug tracking system to help us keep track | |
79 the bugs and their resolution. | |
80 Bug reports can be submitted via email or the web: | |
81 | |
82 bioperl-bugs@bio.perl.org | |
83 http://bugzilla.bioperl.org/ | |
84 | |
85 =head1 AUTHOR - Heikki Lehvaslaiho | |
86 | |
87 Email Heikki Lehvaslaiho E<lt>Heikki@ebi.ac.ukE<gt> | |
88 | |
89 =head1 APPENDIX | |
90 | |
91 The rest of the documentation details each of the object | |
92 methods. Internal methods are usually preceded with a _ | |
93 | |
94 =cut | |
95 | |
96 # Let the code begin... | |
97 | |
98 package Bio::DB::EMBL; | |
99 use strict; | |
100 use vars qw(@ISA $MODVERSION %HOSTS %FORMATMAP $DEFAULTFORMAT); | |
101 | |
102 $MODVERSION = '0.2'; | |
103 use Bio::DB::DBFetch; | |
104 use Bio::DB::RefSeq; | |
105 | |
106 @ISA = qw(Bio::DB::DBFetch); | |
107 | |
108 BEGIN { | |
109 # you can add your own here theoretically. | |
110 %HOSTS = ( | |
111 'dbfetch' => { | |
112 baseurl => 'http://%s/cgi-bin/dbfetch?db=embl&style=raw', | |
113 hosts => { | |
114 'ebi' => 'www.ebi.ac.uk' | |
115 } | |
116 } | |
117 ); | |
118 %FORMATMAP = ( 'embl' => 'embl', | |
119 'fasta' => 'fasta' | |
120 ); | |
121 $DEFAULTFORMAT = 'embl'; | |
122 } | |
123 | |
124 =head2 new | |
125 | |
126 Title : new | |
127 Usage : $gb = Bio::DB::GenBank->new(@options) | |
128 Function: Creates a new genbank handle | |
129 Returns : New genbank handle | |
130 Args : -delay number of seconds to delay between fetches (3s) | |
131 | |
132 NOTE: There are other options that are used internally. | |
133 | |
134 =cut | |
135 | |
136 sub new { | |
137 my ($class, @args ) = @_; | |
138 my $self = $class->SUPER::new(@args); | |
139 | |
140 $self->{ '_hosts' } = {}; | |
141 $self->{ '_formatmap' } = {}; | |
142 | |
143 $self->hosts(\%HOSTS); | |
144 $self->formatmap(\%FORMATMAP); | |
145 $self->{'_default_format'} = $DEFAULTFORMAT; | |
146 | |
147 return $self; | |
148 } | |
149 | |
150 | |
151 =head2 Bio::DB::WebDBSeqI methods | |
152 | |
153 Overriding WebDBSeqI method to help newbies to retrieve sequences. | |
154 EMBL database is all too often passed RefSeq accessions. This | |
155 redirects those calls. See L<Bio::DB::RefSeq>. | |
156 | |
157 | |
158 =head2 get_Stream_by_acc | |
159 | |
160 Title : get_Stream_by_acc | |
161 Usage : $seq = $db->get_Seq_by_acc([$acc1, $acc2]); | |
162 Function: Gets a series of Seq objects by accession numbers | |
163 Returns : a Bio::SeqIO stream object | |
164 Args : $ref : a reference to an array of accession numbers for | |
165 the desired sequence entries | |
166 Note : For GenBank, this just calls the same code for get_Stream_by_id() | |
167 | |
168 =cut | |
169 | |
170 sub get_Stream_by_acc { | |
171 my ($self, $ids ) = @_; | |
172 my $newdb = $self->_check_id($ids); | |
173 if ($newdb && $newdb->isa('Bio::DB::RefSeq')) { | |
174 return $newdb->get_seq_stream('-uids' => $ids, '-mode' => 'single'); | |
175 } else { | |
176 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single'); | |
177 } | |
178 } | |
179 | |
180 | |
181 =head2 _check_id | |
182 | |
183 Title : _check_id | |
184 Usage : | |
185 Function: | |
186 Returns : A Bio::DB::RefSeq reference or throws | |
187 Args : $id(s), $string | |
188 =cut | |
189 | |
190 sub _check_id { | |
191 my ($self, $ids) = @_; | |
192 | |
193 # NT contigs can not be retrieved | |
194 $self->throw("NT_ contigs are whole chromosome files which are not part of regular". | |
195 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.") | |
196 if $ids =~ /NT_/; | |
197 | |
198 # Asking for a RefSeq from EMBL/GenBank | |
199 | |
200 if ($ids =~ /N._/) { | |
201 $self->warn("[$ids] is not a normal sequence database but a RefSeq entry.". | |
202 " Redirecting the request.\n") | |
203 if $self->verbose >= 0; | |
204 return new Bio::DB::RefSeq(-verbose => $self->verbose); | |
205 } | |
206 } | |
207 | |
208 | |
209 1; |