comparison variant_effect_predictor/Bio/Index/Swissprot.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1
2 #
3 # $Id: Swissprot.pm,v 1.11 2002/10/22 07:38:33 lapp Exp $
4 #
5 # BioPerl module for Bio::Index::Abstract
6 #
7 # Cared for by Ewan Birney <birney@sanger.ac.uk>
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Index::Swissprot - Interface for indexing (multiple) Swissprot
16 .dat files (ie flat file swissprot format).
17
18 =head1 SYNOPSIS
19
20 # Complete code for making an index for several
21 # Swissprot files
22 use Bio::Index::Swissprot;
23 use strict;
24
25 my $Index_File_Name = shift;
26 my $inx = Bio::Index::Swissprot->new('-filename' => $Index_File_Name,
27 '-write_flag' => 'WRITE');
28 $inx->make_index(@ARGV);
29
30 # Print out several sequences present in the index
31 # in gcg format
32 use Bio::Index::Swissprot;
33 use Bio::SeqIO;
34 use strict;
35
36 my $out = Bio::SeqIO->new( '-format' => 'gcg', '-fh' => \*STDOUT );
37 my $Index_File_Name = shift;
38 my $inx = Bio::Index::Swissprot->new('-filename' => $Index_File_Name);
39
40 foreach my $id (@ARGV) {
41 my $seq = $inx->fetch($id); # Returns Bio::Seq object
42 $out->write_seq($seq);
43 }
44
45 # alternatively
46
47 my $seq1 = $inx->get_Seq_by_id($id);
48 my $seq2 = $inx->get_Seq_by_acc($acc);
49
50 =head1 DESCRIPTION
51
52 Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
53 and provides the basic funtionallity for indexing Swissprot files, and
54 retrieving the sequence from them. Heavily snaffled from James Gilbert's
55 Fasta system. Note: for best results 'use strict'.
56
57 =head1 FEED_BACK
58
59 =head2 Mailing Lists
60
61 User feedback is an integral part of the evolution of this and other
62 Bioperl modules. Send your comments and suggestions preferably to one
63 of the Bioperl mailing lists. Your participation is much appreciated.
64
65 bioperl-l@bioperl.org - General discussion
66 http://bio.perl.org/MailList.html - About the mailing lists
67
68 =head2 Reporting Bugs
69
70 Report bugs to the Bioperl bug tracking system to help us keep track
71 the bugs and their resolution. Bug reports can be submitted via
72 email or the web:
73
74 bioperl-bugs@bio.perl.org
75 http://bugzilla.bioperl.org/
76
77 =head1 AUTHOR - Ewan Birney
78
79 Email - birney@sanger.ac.uk
80 (Swissprot adaption: lorenz@ist.org)
81
82 =head1 APPENDIX
83
84 The rest of the documentation details each of the object methods.
85 Internal methods are usually preceded with a _
86
87 =cut
88
89
90 # Let's begin the code...
91
92
93 package Bio::Index::Swissprot;
94
95 use vars qw($VERSION @ISA);
96 use strict;
97
98 use Bio::Index::AbstractSeq;
99 use Bio::Seq;
100
101 @ISA = qw(Bio::Index::AbstractSeq);
102
103 sub _type_stamp {
104 return '__Swissprot_FLAT__'; # What kind of index are we?
105 }
106
107 #
108 # Suggested fix by Michael G Schwern <schwern@pobox.com> to
109 # get around a clash with CPAN shell...
110 #
111
112 BEGIN {
113 $VERSION = 0.1;
114 }
115
116 sub _version {
117 return $VERSION;
118 }
119
120 =head2 _index_file
121
122 Title : _index_file
123 Usage : $index->_index_file( $file_name, $i )
124 Function: Specialist function to index Swissprot format files.
125 Is provided with a filename and an integer
126 by make_index in its SUPER class.
127 Example :
128 Returns :
129 Args :
130
131 =cut
132
133 sub _index_file {
134 my( $self,
135 $file, # File name
136 $i # Index-number of file being indexed
137 ) = @_;
138
139 my( $begin, # Offset from start of file of the start
140 # of the last found record.
141 $id, # ID of last found record.
142 @accs, # accession of last record. Also put into the index
143 );
144
145 $begin = 0;
146
147 open SWISSPROT, $file or $self->throw("Can't open file for read : $file");
148
149 # Main indexing loop
150 $id = undef;
151 @accs = ();
152 while (<SWISSPROT>) {
153 if( /^\/\// ) {
154 if( ! defined $id ) {
155 $self->throw("Got to a end of entry line for an Swissprot flat file with no parsed ID. Considering this a problem!");
156 next;
157 }
158 if( ! @accs ) {
159 $self->warn("For id [$id] in Swissprot flat file, got no accession number. Storing id index anyway");
160 }
161
162 $self->add_record($id, $i, $begin);
163
164 foreach my $acc (@accs) {
165 if( $acc ne $id ) {
166 $self->add_record($acc, $i, $begin);
167 }
168 }
169 @accs = (); # reset acc array
170 $id = undef; # reset id
171 } elsif (/^ID\s+(\S+)/) {
172 $id = $1;
173 # not sure if I like this. Assummes tell is in bytes.
174 # we could tell before each line and save it.
175 $begin = tell(SWISSPROT) - length( $_ );
176
177 } elsif (/^AC(.*)/) { # ignore ? if there.
178 push(@accs, ($1 =~ /\s*(\S+);/g));
179 } else {
180 # do nothing
181 }
182 }
183
184 close SWISSPROT;
185 return 1;
186 }
187
188 =head2 _file_format
189
190 Title : _file_format
191 Usage : Internal function for indexing system
192 Function: Provides file format for this database
193 Example :
194 Returns :
195 Args :
196
197
198 =cut
199
200 sub _file_format{
201 my ($self,@args) = @_;
202
203 return 'swiss';
204 }
205
206
207
208 1;
209
210
211
212
213
214
215
216
217
218