comparison variant_effect_predictor/Bio/Variation/IO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: IO.pm,v 1.14 2002/11/04 09:07:45 heikki Exp $
2 #
3 # BioPerl module for Bio::Variation::IO
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 #
7 # Copyright Heikki Lehvaslaiho
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Variation::IO - Handler for sequence variation IO Formats
16
17 =head1 SYNOPSIS
18
19 use Bio::Variation::IO;
20
21 $in = Bio::Variation::IO->new(-file => "inputfilename" , '-format' => 'flat');
22 $out = Bio::Variation::IO->new(-file => ">outputfilename" , '-format' => 'xml');
23 # note: we quote -format to keep older perl's from complaining.
24
25 while ( my $seq = $in->next() ) {
26 $out->write($seq);
27 }
28
29 or
30
31 use Bio::Variation::IO;
32
33 #input file format can be read from the file extension (dat|xml)
34 $in = Bio::Variation::IO->newFh(-file => "inputfilename");
35 $out = Bio::Variation::IO->newFh('-format' => 'xml');
36
37 # World's shortest flat<->xml format converter:
38 print $out $_ while <$in>;
39
40 =head1 DESCRIPTION
41
42 Bio::Variation::IO is a handler module for the formats in the Variation IO set (eg,
43 Bio::Variation::IO::flat). It is the officially sanctioned way of getting at
44 the format objects, which most people should use.
45
46 The structure, conventions and most of the code is inherited from
47 L<Bio::SeqIO> module. The main difference is that instead of using
48 methods next_seq and write_seq, you drop '_seq' from the method names.
49
50 The idea is that you request a stream object for a particular format.
51 All the stream objects have a notion of an internal file that is read
52 from or written to. A particular SeqIO object instance is configured
53 for either input or output. A specific example of a stream object is
54 the Bio::Variation::IO::flat object.
55
56 Each stream object has functions
57
58 $stream->next();
59
60 and
61
62 $stream->write($seqDiff);
63
64 also
65
66 $stream->type() # returns 'INPUT' or 'OUTPUT'
67
68 As an added bonus, you can recover a filehandle that is tied to the
69 SeqIO object, allowing you to use the standard E<lt>E<gt> and print operations
70 to read and write sequence objects:
71
72 use Bio::Variation::IO;
73
74 $stream = Bio::Variation::IO->newFh(-format => 'flat'); # read from standard input
75
76 while ( $seq = <$stream> ) {
77 # do something with $seq
78 }
79
80 and
81
82 print $stream $seq; # when stream is in output mode
83
84 This makes the simplest ever reformatter
85
86 #!/usr/local/bin/perl
87
88 $format1 = shift;
89 $format2 = shift || die "Usage: reformat format1 format2 < input > output";
90
91 use Bio::Variation::IO;
92
93 $in = Bio::Variation::IO->newFh(-format => $format1 );
94 $out = Bio::Variation::IO->newFh(-format => $format2 );
95 #note: you might want to quote -format to keep older perl's from complaining.
96
97 print $out $_ while <$in>;
98
99
100 =head1 CONSTRUCTORS
101
102 =head2 Bio::Variation::IO-E<gt>new()
103
104 $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format);
105 $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format);
106 $seqIO = Bio::Variation::IO->new(-format => $format);
107
108 The new() class method constructs a new Bio::Variation::IO object. The
109 returned object can be used to retrieve or print BioSeq objects. new()
110 accepts the following parameters:
111
112 =over 4
113
114 =item -file
115
116 A file path to be opened for reading or writing. The usual Perl
117 conventions apply:
118
119 'file' # open file for reading
120 '>file' # open file for writing
121 '>>file' # open file for appending
122 '+<file' # open file read/write
123 'command |' # open a pipe from the command
124 '| command' # open a pipe to the command
125
126 =item -fh
127
128 You may provide new() with a previously-opened filehandle. For
129 example, to read from STDIN:
130
131 $seqIO = Bio::Variation::IO->new(-fh => \*STDIN);
132
133 Note that you must pass filehandles as references to globs.
134
135 If neither a filehandle nor a filename is specified, then the module
136 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
137 semantics.
138
139 =item -format
140
141 Specify the format of the file. Supported formats include:
142
143 flat pseudo EMBL format
144 xml seqvar xml format
145
146 If no format is specified and a filename is given, then the module
147 will attempt to deduce it from the filename. If this is unsuccessful,
148 Fasta format is assumed.
149
150 The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are
151 all supported.
152
153 =back
154
155 =head2 Bio::Variation::IO-E<gt>newFh()
156
157 $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format);
158 $fh = Bio::Variation::IO->newFh(-format => $format);
159 # etc.
160
161 #e.g.
162 $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat');
163 print $out $seqDiff;
164
165 This constructor behaves like new(), but returns a tied filehandle
166 rather than a Bio::Variation::IO object. You can read sequences from this
167 object using the familiar E<lt>E<gt> operator, and write to it using print().
168 The usual array and $_ semantics work. For example, you can read all
169 sequence objects into an array like this:
170
171 @mutations = <$fh>;
172
173 Other operations, such as read(), sysread(), write(), close(), and printf()
174 are not supported.
175
176 =head1 OBJECT METHODS
177
178 See below for more detailed summaries. The main methods are:
179
180 =head2 $sequence = $seqIO-E<gt>next()
181
182 Fetch the next sequence from the stream.
183
184 =head2 $seqIO-E<gt>write($sequence [,$another_sequence,...])
185
186 Write the specified sequence(s) to the stream.
187
188 =head2 TIEHANDLE(), READLINE(), PRINT()
189
190 These provide the tie interface. See L<perltie> for more details.
191
192 =head1 FEEDBACK
193
194 =head2 Mailing Lists
195
196 User feedback is an integral part of the evolution of this and other
197 Bioperl modules. Send your comments and suggestions preferably to the
198 Bioperl mailing lists Your participation is much appreciated.
199
200 bioperl-l@bioperl.org - General discussion
201 http://bio.perl.org/MailList.html - About the mailing lists
202
203 =head2 Reporting Bugs
204
205 report bugs to the Bioperl bug tracking system to help us keep track
206 the bugs and their resolution. Bug reports can be submitted via
207 email or the web:
208
209 bioperl-bugs@bio.perl.org
210 http://bugzilla.bioperl.org/
211
212 =head1 AUTHOR - Heikki Lehvaslaiho
213
214 Email: heikki@ebi.ac.uk
215 Address:
216
217 EMBL Outstation, European Bioinformatics Institute
218 Wellcome Trust Genome Campus, Hinxton
219 Cambs. CB10 1SD, United Kingdom
220
221
222 =head1 APPENDIX
223
224 The rest of the documentation details each of the object
225 methods. Internal methods are usually preceded with a _
226
227 =cut
228
229 # Let the code begin...
230
231 package Bio::Variation::IO;
232 my $VERSION=1.0;
233
234 use strict;
235 use vars '@ISA';
236
237 use Bio::SeqIO;
238
239 @ISA = 'Bio::SeqIO';
240
241 =head2 new
242
243 Title : new
244 Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format')
245 Function: Returns a new seqstream
246 Returns : A Bio::Variation::IO::Handler initialised with the appropriate format
247 Args : -file => $filename
248 -format => format
249 -fh => filehandle to attach to
250
251 =cut
252
253
254 sub new {
255 my ($class, %param) = @_;
256 my ($format);
257
258 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
259 $format = $param{'-format'}
260 || $class->_guess_format( $param{-file} || $ARGV[0] )
261 || 'flat';
262 $format = "\L$format"; # normalize capitalization to lower case
263
264 return undef unless $class->_load_format_module($format);
265 return "Bio::Variation::IO::$format"->new(%param);
266 }
267
268
269 sub _load_format_module {
270 my ($class, $format) = @_;
271 my $module = "Bio::Variation::IO::" . $format;
272 my $ok;
273 eval {
274 $ok = $class->_load_module($module);
275 };
276 if ( $@ ) {
277 print STDERR <<END;
278 $class: $format cannot be found
279 Exception $@
280 For more information about the IO system please see the IO docs.
281 This includes ways of checking for formats at compile time, not run time
282 END
283 ;
284 }
285 return $ok;
286 }
287
288 =head2 next
289
290 Title : next
291 Usage : $seqDiff = $stream->next
292 Function: reads the next $seqDiff object from the stream
293 Returns : a Bio::Variation::SeqDiff object
294 Args :
295
296 =cut
297
298 sub next {
299 my ($self, $seq) = @_;
300 $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object.");
301 }
302
303 sub next_seq {
304 my ($self, $seq) = @_;
305 $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'.");
306 $self->next($seq);
307 }
308
309 =head2 write
310
311 Title : write
312 Usage : $stream->write($seq)
313 Function: writes the $seq object into the stream
314 Returns : 1 for success and 0 for error
315 Args : Bio::Variation::SeqDiff object
316
317 =cut
318
319 sub write {
320 my ($self, $seq) = @_;
321 $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object.");
322 }
323
324 sub write_seq {
325 my ($self, $seq) = @_;
326 $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'.");
327 $self->write($seq);
328 }
329
330 =head2 _guess_format
331
332 Title : _guess_format
333 Usage : $obj->_guess_format($filename)
334 Function:
335 Example :
336 Returns : guessed format of filename (lower case)
337 Args :
338
339 =cut
340
341 sub _guess_format {
342 my $class = shift;
343 return unless $_ = shift;
344 return 'flat' if /\.dat$/i;
345 return 'xml' if /\.xml$/i;
346 }
347
348
349 1;