comparison variant_effect_predictor/Bio/Index/GenBank.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # $Id: GenBank.pm,v 1.10 2002/10/22 07:38:33 lapp Exp $
3 #
4 # BioPerl module for Bio::Index::Abstract
5 #
6 # Cared for by Ewan Birney <birney@sanger.ac.uk>
7 #
8 # You may distribute this module under the same terms as perl itself
9
10 # POD documentation - main docs before the code
11
12 =head1 NAME
13
14 Bio::Index::GenBank - Interface for indexing (multiple) GenBank
15 .seq files (ie flat file GenBank format).
16
17 =head1 SYNOPSIS
18
19 # Complete code for making an index for several
20 # GenBank files
21 use Bio::Index::GenBank;
22 use strict;
23
24 my $Index_File_Name = shift;
25 my $inx = Bio::Index::GenBank->new('-filename' => $Index_File_Name,
26 '-write_flag' => 'WRITE');
27 $inx->make_index(@ARGV);
28
29 # Print out several sequences present in the index
30 # in gcg format
31 use Bio::Index::GenBank;
32 use Bio::SeqIO;
33 use strict;
34
35 my $Index_File_Name = shift;
36 my $inx = Bio::Index::GenBank->new('-filename' => $Index_File_Name);
37 my $seqio = new Bio::SeqIO(-format => 'gcg');
38 foreach my $id (@ARGV) {
39 my $seq = $inx->fetch($id); # Returns Bio::Seq object
40 $seqio->write_seq($seq);
41 }
42
43 # alternatively
44
45 my $seq1 = $inx->get_Seq_by_id($id);
46 my $seq2 = $inx->get_Seq_by_acc($acc);
47
48 =head1 DESCRIPTION
49
50 Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
51 and provides the basic funtionallity for indexing GenBank files, and
52 retrieving the sequence from them. Heavily snaffled from James Gilbert's
53 Fasta system. Note: for best results 'use strict'.
54
55 =head1 FEED_BACK
56
57 =head2 Mailing Lists
58
59 User feedback is an integral part of the evolution of this and other
60 Bioperl modules. Send your comments and suggestions preferably to one
61 of the Bioperl mailing lists. Your participation is much appreciated.
62
63 bioperl-l@bioperl.org - General discussion
64 http://bioperl.org/MailList.shtml - About the mailing lists
65
66 =head2 Reporting Bugs
67
68 Report bugs to the Bioperl bug tracking system to help us keep track
69 the bugs and their resolution. Bug reports can be submitted via
70 email or the web:
71
72 bioperl-bugs@bio.perl.org
73 http://bugzilla.bioperl.org/
74
75 =head1 AUTHOR - Ewan Birney
76
77 Email - birney@ebi.ac.uk
78
79 =head1 APPENDIX
80
81 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
82
83 =cut
84
85
86 # Let's begin the code...
87
88
89 package Bio::Index::GenBank;
90
91 use vars qw($VERSION @ISA);
92 use strict;
93
94 use Bio::Index::AbstractSeq;
95 use Bio::Seq;
96
97 @ISA = qw(Bio::Index::AbstractSeq);
98
99 sub _type_stamp {
100 return '__GenBank_FLAT__'; # What kind of index are we?
101 }
102
103 #
104 # Suggested fix by Michael G Schwern <schwern@pobox.com> to
105 # get around a clash with CPAN shell...
106 #
107
108 BEGIN {
109 $VERSION = 0.1;
110 }
111
112 sub _version {
113 return $VERSION;
114 }
115
116 =head2 _index_file
117
118 Title : _index_file
119 Usage : $index->_index_file( $file_name, $i )
120 Function: Specialist function to index GenBank format files.
121 Is provided with a filename and an integer
122 by make_index in its SUPER class.
123 Example :
124 Returns :
125 Args :
126
127 =cut
128
129 sub _index_file {
130 my( $self,
131 $file, # File name
132 $i # Index-number of file being indexed
133 ) = @_;
134
135 my( $begin, # Offset from start of file of the start
136 # of the last found record.
137 $id, # ID of last found record.
138 @accs, # accession of last record. Also put into the index
139 );
140
141 $begin = 0;
142
143 open GenBank, $file or $self->throw("Can't open file for read : $file");
144
145 # Main indexing loop
146 $id = undef;
147 @accs = ();
148 while (<GenBank>) {
149 if( /^\/\// ) {
150 if( ! defined $id ) {
151 $self->throw("Got to a end of entry line for an GenBank flat file with no parsed ID. Considering this a problem!");
152 next;
153 }
154 if( ! @accs ) {
155 $self->warn("For id [$id] in GenBank flat file, got no accession number. Storing id index anyway");
156 }
157
158 $self->add_record($id, $i, $begin);
159
160 foreach my $acc (@accs) {
161 if( $acc ne $id ) {
162 $self->add_record($acc, $i, $begin);
163 }
164 }
165 @accs = ();
166 } elsif (/^LOCUS\s+(\S+)/) {
167 $id = $1;
168 # not sure if I like this. Assummes tell is in bytes.
169 # we could tell before each line and save it.
170 $begin = tell(GenBank) - length( $_ );
171
172 } elsif (/^ACCESSION(.*)/) { # ignore ? if there.
173 @accs = ($1 =~ /\s*(\S+)/g);
174 } elsif( /^VERSION(.*)/) {
175 my $a = $1;
176 $a =~ s/^\s+//;
177 $a =~ s/\s+$//;
178 $a =~ s/GI\://;
179 push @accs, split(/\s+/,$a);
180 } else {
181 # do nothing
182 }
183 }
184
185 close GenBank;
186 return 1;
187 }
188
189 =head2 _file_format
190
191 Title : _file_format
192 Usage : Internal function for indexing system
193 Function: Provides file format for this database
194 Example :
195 Returns :
196 Args :
197
198
199 =cut
200
201 sub _file_format{
202 my ($self,@args) = @_;
203
204 return 'GenBank';
205 }
206
207 1;