comparison variant_effect_predictor/Bio/Index/Fasta.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # $Id: Fasta.pm,v 1.20 2002/10/22 07:38:33 lapp Exp $
3 #
4 # BioPerl module for Bio::Index::Fasta
5 #
6 # Cared for by James Gilbert <jgrg@sanger.ac.uk>
7 #
8 # You may distribute this module under the same terms as perl itself
9
10 # POD documentation - main docs before the code
11
12 =head1 NAME
13
14 Bio::Index::Fasta - Interface for indexing (multiple) fasta files
15
16 =head1 SYNOPSIS
17
18 # Complete code for making an index for several
19 # fasta files
20 use Bio::Index::Fasta;
21 use strict;
22
23 my $Index_File_Name = shift;
24 my $inx = Bio::Index::Fasta->new(
25 '-filename' => $Index_File_Name,
26 '-write_flag' => 1);
27 $inx->make_index(@ARGV);
28
29 # Print out several sequences present in the index
30 # in Fasta format
31 use Bio::Index::Fasta;
32 use strict;
33
34 my $Index_File_Name = shift;
35 my $inx = Bio::Index::Fasta->new('-filename' => $Index_File_Name);
36 my $out = Bio::SeqIO->new('-format' => 'Fasta','-fh' => \*STDOUT);
37
38 foreach my $id (@ARGV) {
39 my $seq = $inx->fetch($id); # Returns Bio::Seq object
40 $out->write_seq($seq);
41 }
42
43 # or, alternatively
44
45 my $seq = $inx->get_Seq_by_id($id); #identical to fetch
46
47 =head1 DESCRIPTION
48
49 Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
50 and provides the basic funtionallity for indexing fasta files, and
51 retrieving the sequence from them. Note: for best results 'use strict'.
52
53 Bio::Index::Fasta supports the Bio::DB::BioSeqI interface, meaning
54 it can be used a a Sequence database for other parts of bioperl
55
56 =head1 FEED_BACK
57
58 =head2 Mailing Lists
59
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to one
62 of the Bioperl mailing lists. Your participation is much appreciated.
63
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/MailList.shtml - About the mailing lists
66
67 =head2 Reporting Bugs
68
69 Report bugs to the Bioperl bug tracking system to help us keep track
70 the bugs and their resolution. Bug reports can be submitted via
71 email or the web:
72
73 bioperl-bugs@bio.perl.org
74 http://bugzilla.bioperl.org/
75
76 =head1 AUTHOR - James Gilbert
77
78 Email - jgrg@sanger.ac.uk
79
80 =head1 APPENDIX
81
82 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
83
84 =cut
85
86
87 # Let the code begin...
88
89
90 package Bio::Index::Fasta;
91
92 use vars qw($VERSION @ISA);
93 use strict;
94
95 use Bio::Index::AbstractSeq;
96 use Bio::Seq;
97
98 @ISA = qw(Bio::Index::AbstractSeq);
99
100 #
101 # Suggested fix by Michael G Schwern <schwern@pobox.com> to
102 # get around a clash with CPAN shell...
103 #
104
105 BEGIN {
106 $VERSION = 0.2;
107 }
108
109 sub _version {
110 return $VERSION;
111 }
112
113 =head2 _file_format
114
115 Title : _file_format
116 Function: The file format for this package, which is needed
117 by the SeqIO system when reading the sequence.
118 Returns : 'Fasta'
119
120 =cut
121
122 sub _file_format {
123 return 'Fasta';
124 }
125
126
127
128 =head2 _index_file
129
130 Title : _index_file
131 Usage : $index->_index_file( $file_name, $i )
132 Function: Specialist function to index FASTA format files.
133 Is provided with a filename and an integer
134 by make_index in its SUPER class.
135 Example :
136 Returns :
137 Args :
138
139 =cut
140
141 sub _index_file {
142 my( $self,
143 $file, # File name
144 $i, # Index-number of file being indexed
145 ) = @_;
146
147 my( $begin, # Offset from start of file of the start
148 # of the last found record.
149 );
150
151 $begin = 0;
152
153 my $id_parser = $self->id_parser;
154
155 open FASTA, $file or $self->throw("Can't open file for read : $file");
156
157 # Main indexing loop
158 while (<FASTA>) {
159 if (/^>/) {
160 # $begin is the position of the first character after the '>'
161 my $begin = tell(FASTA) - length( $_ ) + 1;
162
163 foreach my $id (&$id_parser($_)) {
164 $self->add_record($id, $i, $begin);
165 }
166 }
167 }
168
169 close FASTA;
170 return 1;
171 }
172
173 =head2 id_parser
174
175 Title : id_parser
176 Usage : $index->id_parser( CODE )
177 Function: Stores or returns the code used by record_id to
178 parse the ID for record from a string. Useful
179 for (for instance) specifying a different
180 parser for different flavours of FASTA file.
181 Returns \&default_id_parser (see below) if not
182 set. If you supply your own id_parser
183 subroutine, then it should expect a fasta
184 description line. An entry will be added to
185 the index for each string in the list returned.
186 Example : $index->id_parser( \&my_id_parser )
187 Returns : ref to CODE if called without arguments
188 Args : CODE
189
190 =cut
191
192 sub id_parser {
193 my( $self, $code ) = @_;
194
195 if ($code) {
196 $self->{'_id_parser'} = $code;
197 }
198 return $self->{'_id_parser'} || \&default_id_parser;
199 }
200
201
202
203 =head2 default_id_parser
204
205 Title : default_id_parser
206 Usage : $id = default_id_parser( $header )
207 Function: The default Fasta ID parser for Fasta.pm
208 Returns $1 from applying the regexp /^>\s*(\S+)/
209 to $header.
210 Returns : ID string
211 Args : a fasta header line string
212
213 =cut
214
215 sub default_id_parser {
216 if ($_[0] =~ /^>\s*(\S+)/) {
217 return $1;
218 } else {
219 return;
220 }
221 }
222
223 1;