0
|
1 # $Id: IO.pm,v 1.14 2002/11/04 09:07:45 heikki Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Variation::IO
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Variation::IO - Handler for sequence variation IO Formats
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Variation::IO;
|
|
20
|
|
21 $in = Bio::Variation::IO->new(-file => "inputfilename" , '-format' => 'flat');
|
|
22 $out = Bio::Variation::IO->new(-file => ">outputfilename" , '-format' => 'xml');
|
|
23 # note: we quote -format to keep older perl's from complaining.
|
|
24
|
|
25 while ( my $seq = $in->next() ) {
|
|
26 $out->write($seq);
|
|
27 }
|
|
28
|
|
29 or
|
|
30
|
|
31 use Bio::Variation::IO;
|
|
32
|
|
33 #input file format can be read from the file extension (dat|xml)
|
|
34 $in = Bio::Variation::IO->newFh(-file => "inputfilename");
|
|
35 $out = Bio::Variation::IO->newFh('-format' => 'xml');
|
|
36
|
|
37 # World's shortest flat<->xml format converter:
|
|
38 print $out $_ while <$in>;
|
|
39
|
|
40 =head1 DESCRIPTION
|
|
41
|
|
42 Bio::Variation::IO is a handler module for the formats in the Variation IO set (eg,
|
|
43 Bio::Variation::IO::flat). It is the officially sanctioned way of getting at
|
|
44 the format objects, which most people should use.
|
|
45
|
|
46 The structure, conventions and most of the code is inherited from
|
|
47 L<Bio::SeqIO> module. The main difference is that instead of using
|
|
48 methods next_seq and write_seq, you drop '_seq' from the method names.
|
|
49
|
|
50 The idea is that you request a stream object for a particular format.
|
|
51 All the stream objects have a notion of an internal file that is read
|
|
52 from or written to. A particular SeqIO object instance is configured
|
|
53 for either input or output. A specific example of a stream object is
|
|
54 the Bio::Variation::IO::flat object.
|
|
55
|
|
56 Each stream object has functions
|
|
57
|
|
58 $stream->next();
|
|
59
|
|
60 and
|
|
61
|
|
62 $stream->write($seqDiff);
|
|
63
|
|
64 also
|
|
65
|
|
66 $stream->type() # returns 'INPUT' or 'OUTPUT'
|
|
67
|
|
68 As an added bonus, you can recover a filehandle that is tied to the
|
|
69 SeqIO object, allowing you to use the standard E<lt>E<gt> and print operations
|
|
70 to read and write sequence objects:
|
|
71
|
|
72 use Bio::Variation::IO;
|
|
73
|
|
74 $stream = Bio::Variation::IO->newFh(-format => 'flat'); # read from standard input
|
|
75
|
|
76 while ( $seq = <$stream> ) {
|
|
77 # do something with $seq
|
|
78 }
|
|
79
|
|
80 and
|
|
81
|
|
82 print $stream $seq; # when stream is in output mode
|
|
83
|
|
84 This makes the simplest ever reformatter
|
|
85
|
|
86 #!/usr/local/bin/perl
|
|
87
|
|
88 $format1 = shift;
|
|
89 $format2 = shift || die "Usage: reformat format1 format2 < input > output";
|
|
90
|
|
91 use Bio::Variation::IO;
|
|
92
|
|
93 $in = Bio::Variation::IO->newFh(-format => $format1 );
|
|
94 $out = Bio::Variation::IO->newFh(-format => $format2 );
|
|
95 #note: you might want to quote -format to keep older perl's from complaining.
|
|
96
|
|
97 print $out $_ while <$in>;
|
|
98
|
|
99
|
|
100 =head1 CONSTRUCTORS
|
|
101
|
|
102 =head2 Bio::Variation::IO-E<gt>new()
|
|
103
|
|
104 $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format);
|
|
105 $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format);
|
|
106 $seqIO = Bio::Variation::IO->new(-format => $format);
|
|
107
|
|
108 The new() class method constructs a new Bio::Variation::IO object. The
|
|
109 returned object can be used to retrieve or print BioSeq objects. new()
|
|
110 accepts the following parameters:
|
|
111
|
|
112 =over 4
|
|
113
|
|
114 =item -file
|
|
115
|
|
116 A file path to be opened for reading or writing. The usual Perl
|
|
117 conventions apply:
|
|
118
|
|
119 'file' # open file for reading
|
|
120 '>file' # open file for writing
|
|
121 '>>file' # open file for appending
|
|
122 '+<file' # open file read/write
|
|
123 'command |' # open a pipe from the command
|
|
124 '| command' # open a pipe to the command
|
|
125
|
|
126 =item -fh
|
|
127
|
|
128 You may provide new() with a previously-opened filehandle. For
|
|
129 example, to read from STDIN:
|
|
130
|
|
131 $seqIO = Bio::Variation::IO->new(-fh => \*STDIN);
|
|
132
|
|
133 Note that you must pass filehandles as references to globs.
|
|
134
|
|
135 If neither a filehandle nor a filename is specified, then the module
|
|
136 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
|
|
137 semantics.
|
|
138
|
|
139 =item -format
|
|
140
|
|
141 Specify the format of the file. Supported formats include:
|
|
142
|
|
143 flat pseudo EMBL format
|
|
144 xml seqvar xml format
|
|
145
|
|
146 If no format is specified and a filename is given, then the module
|
|
147 will attempt to deduce it from the filename. If this is unsuccessful,
|
|
148 Fasta format is assumed.
|
|
149
|
|
150 The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are
|
|
151 all supported.
|
|
152
|
|
153 =back
|
|
154
|
|
155 =head2 Bio::Variation::IO-E<gt>newFh()
|
|
156
|
|
157 $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format);
|
|
158 $fh = Bio::Variation::IO->newFh(-format => $format);
|
|
159 # etc.
|
|
160
|
|
161 #e.g.
|
|
162 $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat');
|
|
163 print $out $seqDiff;
|
|
164
|
|
165 This constructor behaves like new(), but returns a tied filehandle
|
|
166 rather than a Bio::Variation::IO object. You can read sequences from this
|
|
167 object using the familiar E<lt>E<gt> operator, and write to it using print().
|
|
168 The usual array and $_ semantics work. For example, you can read all
|
|
169 sequence objects into an array like this:
|
|
170
|
|
171 @mutations = <$fh>;
|
|
172
|
|
173 Other operations, such as read(), sysread(), write(), close(), and printf()
|
|
174 are not supported.
|
|
175
|
|
176 =head1 OBJECT METHODS
|
|
177
|
|
178 See below for more detailed summaries. The main methods are:
|
|
179
|
|
180 =head2 $sequence = $seqIO-E<gt>next()
|
|
181
|
|
182 Fetch the next sequence from the stream.
|
|
183
|
|
184 =head2 $seqIO-E<gt>write($sequence [,$another_sequence,...])
|
|
185
|
|
186 Write the specified sequence(s) to the stream.
|
|
187
|
|
188 =head2 TIEHANDLE(), READLINE(), PRINT()
|
|
189
|
|
190 These provide the tie interface. See L<perltie> for more details.
|
|
191
|
|
192 =head1 FEEDBACK
|
|
193
|
|
194 =head2 Mailing Lists
|
|
195
|
|
196 User feedback is an integral part of the evolution of this and other
|
|
197 Bioperl modules. Send your comments and suggestions preferably to the
|
|
198 Bioperl mailing lists Your participation is much appreciated.
|
|
199
|
|
200 bioperl-l@bioperl.org - General discussion
|
|
201 http://bio.perl.org/MailList.html - About the mailing lists
|
|
202
|
|
203 =head2 Reporting Bugs
|
|
204
|
|
205 report bugs to the Bioperl bug tracking system to help us keep track
|
|
206 the bugs and their resolution. Bug reports can be submitted via
|
|
207 email or the web:
|
|
208
|
|
209 bioperl-bugs@bio.perl.org
|
|
210 http://bugzilla.bioperl.org/
|
|
211
|
|
212 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
213
|
|
214 Email: heikki@ebi.ac.uk
|
|
215 Address:
|
|
216
|
|
217 EMBL Outstation, European Bioinformatics Institute
|
|
218 Wellcome Trust Genome Campus, Hinxton
|
|
219 Cambs. CB10 1SD, United Kingdom
|
|
220
|
|
221
|
|
222 =head1 APPENDIX
|
|
223
|
|
224 The rest of the documentation details each of the object
|
|
225 methods. Internal methods are usually preceded with a _
|
|
226
|
|
227 =cut
|
|
228
|
|
229 # Let the code begin...
|
|
230
|
|
231 package Bio::Variation::IO;
|
|
232 my $VERSION=1.0;
|
|
233
|
|
234 use strict;
|
|
235 use vars '@ISA';
|
|
236
|
|
237 use Bio::SeqIO;
|
|
238
|
|
239 @ISA = 'Bio::SeqIO';
|
|
240
|
|
241 =head2 new
|
|
242
|
|
243 Title : new
|
|
244 Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format')
|
|
245 Function: Returns a new seqstream
|
|
246 Returns : A Bio::Variation::IO::Handler initialised with the appropriate format
|
|
247 Args : -file => $filename
|
|
248 -format => format
|
|
249 -fh => filehandle to attach to
|
|
250
|
|
251 =cut
|
|
252
|
|
253
|
|
254 sub new {
|
|
255 my ($class, %param) = @_;
|
|
256 my ($format);
|
|
257
|
|
258 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
|
|
259 $format = $param{'-format'}
|
|
260 || $class->_guess_format( $param{-file} || $ARGV[0] )
|
|
261 || 'flat';
|
|
262 $format = "\L$format"; # normalize capitalization to lower case
|
|
263
|
|
264 return undef unless $class->_load_format_module($format);
|
|
265 return "Bio::Variation::IO::$format"->new(%param);
|
|
266 }
|
|
267
|
|
268
|
|
269 sub _load_format_module {
|
|
270 my ($class, $format) = @_;
|
|
271 my $module = "Bio::Variation::IO::" . $format;
|
|
272 my $ok;
|
|
273 eval {
|
|
274 $ok = $class->_load_module($module);
|
|
275 };
|
|
276 if ( $@ ) {
|
|
277 print STDERR <<END;
|
|
278 $class: $format cannot be found
|
|
279 Exception $@
|
|
280 For more information about the IO system please see the IO docs.
|
|
281 This includes ways of checking for formats at compile time, not run time
|
|
282 END
|
|
283 ;
|
|
284 }
|
|
285 return $ok;
|
|
286 }
|
|
287
|
|
288 =head2 next
|
|
289
|
|
290 Title : next
|
|
291 Usage : $seqDiff = $stream->next
|
|
292 Function: reads the next $seqDiff object from the stream
|
|
293 Returns : a Bio::Variation::SeqDiff object
|
|
294 Args :
|
|
295
|
|
296 =cut
|
|
297
|
|
298 sub next {
|
|
299 my ($self, $seq) = @_;
|
|
300 $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object.");
|
|
301 }
|
|
302
|
|
303 sub next_seq {
|
|
304 my ($self, $seq) = @_;
|
|
305 $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'.");
|
|
306 $self->next($seq);
|
|
307 }
|
|
308
|
|
309 =head2 write
|
|
310
|
|
311 Title : write
|
|
312 Usage : $stream->write($seq)
|
|
313 Function: writes the $seq object into the stream
|
|
314 Returns : 1 for success and 0 for error
|
|
315 Args : Bio::Variation::SeqDiff object
|
|
316
|
|
317 =cut
|
|
318
|
|
319 sub write {
|
|
320 my ($self, $seq) = @_;
|
|
321 $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object.");
|
|
322 }
|
|
323
|
|
324 sub write_seq {
|
|
325 my ($self, $seq) = @_;
|
|
326 $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'.");
|
|
327 $self->write($seq);
|
|
328 }
|
|
329
|
|
330 =head2 _guess_format
|
|
331
|
|
332 Title : _guess_format
|
|
333 Usage : $obj->_guess_format($filename)
|
|
334 Function:
|
|
335 Example :
|
|
336 Returns : guessed format of filename (lower case)
|
|
337 Args :
|
|
338
|
|
339 =cut
|
|
340
|
|
341 sub _guess_format {
|
|
342 my $class = shift;
|
|
343 return unless $_ = shift;
|
|
344 return 'flat' if /\.dat$/i;
|
|
345 return 'xml' if /\.xml$/i;
|
|
346 }
|
|
347
|
|
348
|
|
349 1;
|