comparison variant_effect_predictor/Bio/DB/EMBL.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # $Id: EMBL.pm,v 1.12.2.1 2003/06/25 13:44:18 heikki Exp $
3 #
4 # BioPerl module for Bio::DB::EMBL
5 #
6 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
7 #
8 # Copyright Jason Stajich
9 #
10 # You may distribute this module under the same terms as perl itself
11
12 # POD documentation - main docs before the code
13
14 =head1 NAME
15
16 Bio::DB::EMBL - Database object interface for EMBL entry retrieval
17
18 =head1 SYNOPSIS
19
20 use Bio::DB::EMBL;
21
22 $embl = new Bio::DB::EMBL;
23
24 # remember that EMBL_ID does not equal GenBank_ID!
25 $seq = $embl->get_Seq_by_id('BUM'); # EMBL ID
26 print "cloneid is ", $seq->id, "\n";
27
28 # or changeing to accession number and Fasta format ...
29 $embl->request_format('fasta');
30 $seq = $embl->get_Seq_by_acc('J02231'); # EMBL ACC
31 print "cloneid is ", $seq->id, "\n";
32
33 # especially when using versions, you better be prepared
34 # in not getting what what want
35 eval {
36 $seq = $embl->get_Seq_by_version('J02231.1'); # EMBL VERSION
37 };
38 print "cloneid is ", $seq->id, "\n" unless $@;
39
40 # or ... best when downloading very large files, prevents
41 # keeping all of the file in memory
42
43 # also don't want features, just sequence so let's save bandwith
44 # and request Fasta sequence
45 $embl = new Bio::DB::EMBL(-retrievaltype => 'tempfile' ,
46 -format => 'fasta');
47 my $seqio = $embl->get_Stream_by_batch(['AC013798', 'AC021953'] );
48 while( my $clone = $seqio->next_seq ) {
49 print "cloneid is ", $clone->id, "\n";
50 }
51
52 =head1 DESCRIPTION
53
54 Allows the dynamic retrieval of sequence objects L<Bio::Seq> from the
55 EMBL database using the dbfetch script at EBI:
56 L<http://www.ebi.ac.uk/cgi-bin/dbfetch>.
57
58 In order to make changes transparent we have host type (currently only
59 ebi) and location (defaults to ebi) separated out. This allows later
60 additions of more servers in different geographical locations.
61
62 The functionality of this module is inherited from L<Bio::DB::DBFetch>
63 which implements L<Bio::DB::WebDBSeqI>.
64
65 =head1 FEEDBACK
66
67 =head2 Mailing Lists
68
69 User feedback is an integral part of the evolution of this and other
70 Bioperl modules. Send your comments and suggestions preferably to one
71 of the Bioperl mailing lists. Your participation is much appreciated.
72
73 bioperl-l@bioperl.org - General discussion
74 http://bio.perl.org/MailList.html - About the mailing lists
75
76 =head2 Reporting Bugs
77
78 Report bugs to the Bioperl bug tracking system to help us keep track
79 the bugs and their resolution.
80 Bug reports can be submitted via email or the web:
81
82 bioperl-bugs@bio.perl.org
83 http://bugzilla.bioperl.org/
84
85 =head1 AUTHOR - Heikki Lehvaslaiho
86
87 Email Heikki Lehvaslaiho E<lt>Heikki@ebi.ac.ukE<gt>
88
89 =head1 APPENDIX
90
91 The rest of the documentation details each of the object
92 methods. Internal methods are usually preceded with a _
93
94 =cut
95
96 # Let the code begin...
97
98 package Bio::DB::EMBL;
99 use strict;
100 use vars qw(@ISA $MODVERSION %HOSTS %FORMATMAP $DEFAULTFORMAT);
101
102 $MODVERSION = '0.2';
103 use Bio::DB::DBFetch;
104 use Bio::DB::RefSeq;
105
106 @ISA = qw(Bio::DB::DBFetch);
107
108 BEGIN {
109 # you can add your own here theoretically.
110 %HOSTS = (
111 'dbfetch' => {
112 baseurl => 'http://%s/cgi-bin/dbfetch?db=embl&style=raw',
113 hosts => {
114 'ebi' => 'www.ebi.ac.uk'
115 }
116 }
117 );
118 %FORMATMAP = ( 'embl' => 'embl',
119 'fasta' => 'fasta'
120 );
121 $DEFAULTFORMAT = 'embl';
122 }
123
124 =head2 new
125
126 Title : new
127 Usage : $gb = Bio::DB::GenBank->new(@options)
128 Function: Creates a new genbank handle
129 Returns : New genbank handle
130 Args : -delay number of seconds to delay between fetches (3s)
131
132 NOTE: There are other options that are used internally.
133
134 =cut
135
136 sub new {
137 my ($class, @args ) = @_;
138 my $self = $class->SUPER::new(@args);
139
140 $self->{ '_hosts' } = {};
141 $self->{ '_formatmap' } = {};
142
143 $self->hosts(\%HOSTS);
144 $self->formatmap(\%FORMATMAP);
145 $self->{'_default_format'} = $DEFAULTFORMAT;
146
147 return $self;
148 }
149
150
151 =head2 Bio::DB::WebDBSeqI methods
152
153 Overriding WebDBSeqI method to help newbies to retrieve sequences.
154 EMBL database is all too often passed RefSeq accessions. This
155 redirects those calls. See L<Bio::DB::RefSeq>.
156
157
158 =head2 get_Stream_by_acc
159
160 Title : get_Stream_by_acc
161 Usage : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
162 Function: Gets a series of Seq objects by accession numbers
163 Returns : a Bio::SeqIO stream object
164 Args : $ref : a reference to an array of accession numbers for
165 the desired sequence entries
166 Note : For GenBank, this just calls the same code for get_Stream_by_id()
167
168 =cut
169
170 sub get_Stream_by_acc {
171 my ($self, $ids ) = @_;
172 my $newdb = $self->_check_id($ids);
173 if ($newdb && $newdb->isa('Bio::DB::RefSeq')) {
174 return $newdb->get_seq_stream('-uids' => $ids, '-mode' => 'single');
175 } else {
176 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
177 }
178 }
179
180
181 =head2 _check_id
182
183 Title : _check_id
184 Usage :
185 Function:
186 Returns : A Bio::DB::RefSeq reference or throws
187 Args : $id(s), $string
188 =cut
189
190 sub _check_id {
191 my ($self, $ids) = @_;
192
193 # NT contigs can not be retrieved
194 $self->throw("NT_ contigs are whole chromosome files which are not part of regular".
195 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.")
196 if $ids =~ /NT_/;
197
198 # Asking for a RefSeq from EMBL/GenBank
199
200 if ($ids =~ /N._/) {
201 $self->warn("[$ids] is not a normal sequence database but a RefSeq entry.".
202 " Redirecting the request.\n")
203 if $self->verbose >= 0;
204 return new Bio::DB::RefSeq(-verbose => $self->verbose);
205 }
206 }
207
208
209 1;