0
|
1 #
|
|
2 # $Id: RefSeq.pm,v 1.5 2002/10/22 07:38:29 lapp Exp $
|
|
3 #
|
|
4 # BioPerl module for Bio::DB::EMBL
|
|
5 #
|
|
6 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
|
|
7 #
|
|
8 # Copyright Jason Stajich
|
|
9 #
|
|
10 # You may distribute this module under the same terms as perl itself
|
|
11
|
|
12 # POD documentation - main docs before the code
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 Bio::DB::RefSeq - Database object interface for RefSeq retrieval
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19 use Bio::DB::RefSeq;
|
|
20
|
|
21 $db = new Bio::DB::RefSeq;
|
|
22
|
|
23 # most of the time RefSeq_ID eq RefSeq acc
|
|
24 $seq = $db->get_Seq_by_id('NM_006732'); # RefSeq ID
|
|
25 print "accession is ", $seq->accession_number, "\n";
|
|
26
|
|
27 # or changeing to accession number and Fasta format ...
|
|
28 $db->request_format('fasta');
|
|
29 $seq = $db->get_Seq_by_acc('NM_006732'); # RefSeq ACC
|
|
30 print "seq is ", $seq->seq, "\n";
|
|
31
|
|
32 # especially when using versions, you better be prepared
|
|
33 # in not getting what what want
|
|
34 eval {
|
|
35 $seq = $db->get_Seq_by_version('NM_006732.1'); # RefSeq VERSION
|
|
36 };
|
|
37 print "accesion is ", $seq->accession_number, "\n" unless $@;
|
|
38
|
|
39 # or ... best when downloading very large files, prevents
|
|
40 # keeping all of the file in memory
|
|
41
|
|
42 # also don't want features, just sequence so let's save bandwith
|
|
43 # and request Fasta sequence
|
|
44 $db = new Bio::DB::RefSeq(-retrievaltype => 'tempfile' ,
|
|
45 -format => 'fasta');
|
|
46 my $seqio = $db->get_Stream_by_batch(['NM_006732', 'NM_005252'] );
|
|
47 while( my $seq = $seqio->next_seq ) {
|
|
48 print "seqid is ", $seq->id, "\n";
|
|
49 }
|
|
50
|
|
51 =head1 DESCRIPTION
|
|
52
|
|
53 Allows the dynamic retrieval of sequence objects L<Bio::Seq> from the
|
|
54 RefSeq database using the dbfetch script at EBI:
|
|
55 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
|
|
56
|
|
57 In order to make changes transparent we have host type (currently only
|
|
58 ebi) and location (defaults to ebi) separated out. This allows later
|
|
59 additions of more servers in different geographical locations.
|
|
60
|
|
61 The functionality of this module is inherited from L<Bio::DB::DBFetch>
|
|
62 which implements L<Bio::DB::WebDBSeqI>.
|
|
63
|
|
64 This module retrieves entries from EBI although it
|
|
65 retrives database entries produced at NCBI. When read into bioperl
|
|
66 objects, the parser for GenBank format it used. RefSeq is a
|
|
67 NONSTANDARD GenBank file so be ready for surprises.
|
|
68
|
|
69 =head1 FEEDBACK
|
|
70
|
|
71 =head2 Mailing Lists
|
|
72
|
|
73 User feedback is an integral part of the evolution of this and other
|
|
74 Bioperl modules. Send your comments and suggestions preferably to one
|
|
75 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
76
|
|
77 bioperl-l@bioperl.org - General discussion
|
|
78 http://bio.perl.org/MailList.html - About the mailing lists
|
|
79
|
|
80 =head2 Reporting Bugs
|
|
81
|
|
82 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
83 the bugs and their resolution.
|
|
84 Bug reports can be submitted via email or the web:
|
|
85
|
|
86 bioperl-bugs@bio.perl.org
|
|
87 http://bugzilla.bioperl.org/
|
|
88
|
|
89 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
90
|
|
91 Email Heikki Lehvaslaiho E<lt>Heikki@ebi.ac.ukE<gt>
|
|
92
|
|
93 =head1 APPENDIX
|
|
94
|
|
95 The rest of the documentation details each of the object
|
|
96 methods. Internal methods are usually preceded with a _
|
|
97
|
|
98 =cut
|
|
99
|
|
100 # Let the code begin...
|
|
101
|
|
102 package Bio::DB::RefSeq;
|
|
103 use strict;
|
|
104 use vars qw(@ISA $MODVERSION %HOSTS %FORMATMAP $DEFAULTFORMAT);
|
|
105
|
|
106 $MODVERSION = '0.1';
|
|
107 use Bio::DB::DBFetch;
|
|
108
|
|
109 @ISA = qw(Bio::DB::DBFetch);
|
|
110
|
|
111 BEGIN {
|
|
112 # you can add your own here theoretically.
|
|
113 %HOSTS = (
|
|
114 'dbfetch' => {
|
|
115 baseurl => 'http://%s/cgi-bin/dbfetch?db=refseq&style=raw',
|
|
116 hosts => {
|
|
117 'ebi' => 'www.ebi.ac.uk'
|
|
118 }
|
|
119 }
|
|
120 );
|
|
121 %FORMATMAP = ( 'embl' => 'embl',
|
|
122 'genbank' => 'genbank',
|
|
123 'fasta' => 'fasta'
|
|
124 );
|
|
125 $DEFAULTFORMAT = 'genbank';
|
|
126 }
|
|
127
|
|
128 sub new {
|
|
129 my ($class, @args ) = @_;
|
|
130 my $self = $class->SUPER::new(@args);
|
|
131
|
|
132 $self->{ '_hosts' } = {};
|
|
133 $self->{ '_formatmap' } = {};
|
|
134
|
|
135 $self->hosts(\%HOSTS);
|
|
136 $self->formatmap(\%FORMATMAP);
|
|
137 $self->{'_default_format'} = $DEFAULTFORMAT;
|
|
138
|
|
139 return $self;
|
|
140 }
|
|
141
|
|
142 1;
|