comparison variant_effect_predictor/Bio/Index/EMBL.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: EMBL.pm,v 1.21.2.1 2003/03/19 16:23:08 heikki Exp $
2 #
3 # BioPerl module for Bio::Index::EMBL
4 #
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
6 #
7 # You may distribute this module under the same terms as perl itself
8
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::Index::EMBL - Interface for indexing (multiple) EMBL/Swissprot
14 .dat files (ie flat file embl/swissprot format).
15
16 =head1 SYNOPSIS
17
18 # Complete code for making an index for several
19 # EMBL files
20 use Bio::Index::EMBL;
21 use strict;
22
23 my $Index_File_Name = shift;
24 my $inx = Bio::Index::EMBL->new('-filename' => $Index_File_Name,
25 '-write_flag' => 'WRITE');
26 $inx->make_index(@ARGV);
27
28 # Print out several sequences present in the index
29 # in Fasta format
30 use Bio::Index::EMBL;
31 use strict;
32
33 my $Index_File_Name = shift;
34 my $inx = Bio::Index::EMBL->new('-filename' => $Index_File_Name);
35 my $out = Bio::SeqIO->new('-format' => 'Fasta','-fh' => \*STDOUT);
36
37 foreach my $id (@ARGV) {
38 my $seq = $inx->fetch($id); # Returns Bio::Seq object
39 $out->write_seq($seq);
40 }
41
42 # alternatively
43
44 my $seq1 = $inx->get_Seq_by_id($id);
45 my $seq2 = $inx->get_Seq_by_acc($acc);
46
47 =head1 DESCRIPTION
48
49 Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
50 and provides the basic funtionallity for indexing EMBL files, and
51 retrieving the sequence from them. Heavily snaffled from James Gilbert's
52 Fasta system. Note: for best results 'use strict'.
53
54 =head1 FEED_BACK
55
56 =head2 Mailing Lists
57
58 User feedback is an integral part of the evolution of this and other
59 Bioperl modules. Send your comments and suggestions preferably to one
60 of the Bioperl mailing lists. Your participation is much appreciated.
61
62 bioperl-l@bioperl.org - General discussion
63 http://bioperl.org/MailList.shtml - About the mailing lists
64
65
66
67 =head2 Reporting Bugs
68
69 Report bugs to the Bioperl bug tracking system to help us keep track
70 the bugs and their resolution. Bug reports can be submitted via
71 email or the web:
72
73 bioperl-bugs@bio.perl.org
74 http://bugzilla.bioperl.org/
75
76 =head1 AUTHOR - Ewan Birney
77
78 Email - birney@sanger.ac.uk
79
80 =head1 APPENDIX
81
82 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
83
84 =cut
85
86
87 # Let's begin the code...
88
89
90 package Bio::Index::EMBL;
91
92 use vars qw($VERSION @ISA);
93 use strict;
94
95 use Bio::Index::AbstractSeq;
96 use Bio::Seq;
97
98 @ISA = qw(Bio::Index::AbstractSeq);
99
100 sub _type_stamp {
101 return '__EMBL_FLAT__'; # What kind of index are we?
102 }
103
104 #
105 # Suggested fix by Michael G Schwern <schwern@pobox.com> to
106 # get around a clash with CPAN shell...
107 #
108
109 BEGIN {
110 $VERSION = 0.1;
111 }
112
113 sub _version {
114 return $VERSION;
115 }
116
117 =head2 _index_file
118
119 Title : _index_file
120 Usage : $index->_index_file( $file_name, $i )
121 Function: Specialist function to index EMBL format files.
122 Is provided with a filename and an integer
123 by make_index in its SUPER class.
124 Example :
125 Returns :
126 Args :
127
128 =cut
129
130 sub _index_file {
131 my( $self,
132 $file, # File name
133 $i # Index-number of file being indexed
134 ) = @_;
135
136 my( $begin, # Offset from start of file of the start
137 # of the last found record.
138 $id, # ID of last found record.
139 @accs, # accession of last record. Also put into the index
140 );
141
142 $begin = 0;
143
144 open EMBL, $file or $self->throw("Can't open file for read : $file");
145
146 # Main indexing loop
147 $id = undef;
148 @accs = ();
149 while (<EMBL>) {
150 if( /^\/\// ) {
151 if( ! defined $id ) {
152 $self->throw("Got to a end of entry line for an EMBL flat file with no parsed ID. Considering this a problem!");
153 next;
154 }
155 if( ! @accs ) {
156 $self->warn("For id [$id] in embl flat file, got no accession number. Storing id index anyway");
157 }
158
159 $self->add_record($id, $i, $begin);
160
161 foreach my $acc (@accs) {
162 if( $acc ne $id ) {
163 $self->add_record($acc, $i, $begin);
164 }
165 }
166 } elsif (/^ID\s+(\S+)/) {
167 $id = $1;
168 # not sure if I like this. Assummes tell is in bytes.
169 # we could tell before each line and save it.
170 $begin = tell(EMBL) - length( $_ );
171
172 } elsif (/^AC\s+(.*)?/) {
173 push @accs , split (/[; ]+/, $1);
174 } else {
175 # do nothing
176 }
177 }
178
179 close EMBL;
180 return 1;
181 }
182
183 =head2 _file_format
184
185 Title : _file_format
186 Usage : Internal function for indexing system
187 Function: Provides file format for this database
188 Example :
189 Returns :
190 Args :
191
192
193 =cut
194
195 sub _file_format{
196 my ($self,@args) = @_;
197
198 return 'EMBL';
199 }
200
201
202
203 1;
204
205
206
207
208
209
210
211
212
213