comparison variant_effect_predictor/Bio/Index/Fastq.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 #
3 # BioPerl module for Bio::Index::Fastq
4 #
5 # Cared for by Tony Cox <avc@sanger.ac.uk>
6 #
7 # You may distribute this module under the same terms as perl itself
8
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::Index::Fastq - Interface for indexing (multiple) fastq files
14
15 =head1 SYNOPSIS
16
17 # Complete code for making an index for several
18 # fastq files
19 use Bio::Index::Fastq;
20 use strict;
21
22 my $Index_File_Name = shift;
23 my $inx = Bio::Index::Fastq->new(
24 '-filename' => $Index_File_Name,
25 '-write_flag' => 1);
26 $inx->make_index(@ARGV);
27
28 # Print out several sequences present in the index
29 # in Fastq format
30 use Bio::Index::Fastq;
31 use strict;
32
33 my $Index_File_Name = shift;
34 my $inx = Bio::Index::Fastq->new('-filename' => $Index_File_Name);
35 my $out = Bio::SeqIO->new('-format' => 'Fastq','-fh' => \*STDOUT);
36
37 foreach my $id (@ARGV) {
38 my $seq = $inx->fetch($id); # Returns Bio::Seq::SeqWithQuality object
39 $out->write_seq($seq);
40 }
41
42 # or, alternatively
43
44 my $seq = $inx->get_Seq_by_id($id); #identical to fetch
45
46 =head1 DESCRIPTION
47
48 Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
49 and provides the basic funtionallity for indexing fastq files, and
50 retrieving the sequence from them. Note: for best results 'use strict'.
51
52 Bio::Index::Fastq supports the Bio::DB::BioSeqI interface, meaning
53 it can be used as a Sequence database for other parts of bioperl
54
55 =head1 FEED_BACK
56
57 =head2 Mailing Lists
58
59 User feedback is an integral part of the evolution of this and other
60 Bioperl modules. Send your comments and suggestions preferably to one
61 of the Bioperl mailing lists. Your participation is much appreciated.
62
63 bioperl-l@bioperl.org - General discussion
64 http://bioperl.org/MailList.shtml - About the mailing lists
65
66 =head2 Reporting Bugs
67
68 Report bugs to the Bioperl bug tracking system to help us keep track
69 the bugs and their resolution. Bug reports can be submitted via
70 email or the web:
71
72 bioperl-bugs@bio.perl.org
73 http://bugzilla.bioperl.org/
74
75 =head1 AUTHOR - Tony Cox
76
77 Email - avc@sanger.ac.uk
78
79 =head1 APPENDIX
80
81 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
82
83 =cut
84
85
86 # Let the code begin...
87
88
89 package Bio::Index::Fastq;
90
91 use vars qw($VERSION @ISA);
92 use strict;
93
94 use Bio::Index::AbstractSeq;
95 use Bio::Seq;
96
97 @ISA = qw(Bio::Index::AbstractSeq);
98
99 #
100 # Suggested fix by Michael G Schwern <schwern@pobox.com> to
101 # get around a clash with CPAN shell...
102 #
103
104 BEGIN {
105 $VERSION = 0.2;
106 }
107
108 sub _version {
109 return $VERSION;
110 }
111
112 =head2 _file_format
113
114 Title : _file_format
115 Function: The file format for this package, which is needed
116 by the SeqIO system when reading the sequence.
117 Returns : 'Fastq'
118
119 =cut
120
121 sub _file_format {
122 return 'Fastq';
123 }
124
125
126
127 =head2 _index_file
128
129 Title : _index_file
130 Usage : $index->_index_file( $file_name, $i )
131 Function: Specialist function to index FASTQ format files.
132 Is provided with a filename and an integer
133 by make_index in its SUPER class.
134 Example :
135 Returns :
136 Args :
137
138 =cut
139
140 sub _index_file {
141 my( $self,
142 $file, # File name
143 $i, # Index-number of file being indexed
144 ) = @_;
145
146 my( $begin, # Offset from start of file of the start
147 # of the last found record.
148 );
149
150 $begin = 0;
151
152 my $id_parser = $self->id_parser;
153 my $c = 0;
154 open FASTQ, $file or $self->throw("Can't open file for read : $file");
155 # Main indexing loop
156 while (<FASTQ>) {
157 if (/^@/) {
158 # $begin is the position of the first character after the '@'
159 my $begin = tell(FASTQ) - length( $_ ) + 1;
160 foreach my $id (&$id_parser($_)) {
161 $self->add_record($id, $i, $begin);
162 $c++;
163 }
164 }
165 }
166
167 close FASTQ;
168 return ($c);
169 }
170
171 =head2 id_parser
172
173 Title : id_parser
174 Usage : $index->id_parser( CODE )
175 Function: Stores or returns the code used by record_id to
176 parse the ID for record from a string. Useful
177 for (for instance) specifying a different
178 parser for different flavours of FASTQ file.
179 Returns \&default_id_parser (see below) if not
180 set. If you supply your own id_parser
181 subroutine, then it should expect a fastq
182 description line. An entry will be added to
183 the index for each string in the list returned.
184 Example : $index->id_parser( \&my_id_parser )
185 Returns : ref to CODE if called without arguments
186 Args : CODE
187
188 =cut
189
190 sub id_parser {
191 my( $self, $code ) = @_;
192
193 if ($code) {
194 $self->{'_id_parser'} = $code;
195 }
196 return $self->{'_id_parser'} || \&default_id_parser;
197 }
198
199
200
201 =head2 default_id_parser
202
203 Title : default_id_parser
204 Usage : $id = default_id_parser( $header )
205 Function: The default Fastq ID parser for Fastq.pm
206 Returns $1 from applying the regexp /^>\s*(\S+)/
207 to $header.
208 Returns : ID string
209 Args : a fastq header line string
210
211 =cut
212
213 sub default_id_parser {
214 if ($_[0] =~ /^@\s*(\S+)/) {
215 return $1;
216 } else {
217 return;
218 }
219 }
220
221 1;