Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/IO.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: IO.pm,v 1.14 2002/11/04 09:07:45 heikki Exp $ | |
2 # | |
3 # BioPerl module for Bio::Variation::IO | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Variation::IO - Handler for sequence variation IO Formats | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Variation::IO; | |
20 | |
21 $in = Bio::Variation::IO->new(-file => "inputfilename" , '-format' => 'flat'); | |
22 $out = Bio::Variation::IO->new(-file => ">outputfilename" , '-format' => 'xml'); | |
23 # note: we quote -format to keep older perl's from complaining. | |
24 | |
25 while ( my $seq = $in->next() ) { | |
26 $out->write($seq); | |
27 } | |
28 | |
29 or | |
30 | |
31 use Bio::Variation::IO; | |
32 | |
33 #input file format can be read from the file extension (dat|xml) | |
34 $in = Bio::Variation::IO->newFh(-file => "inputfilename"); | |
35 $out = Bio::Variation::IO->newFh('-format' => 'xml'); | |
36 | |
37 # World's shortest flat<->xml format converter: | |
38 print $out $_ while <$in>; | |
39 | |
40 =head1 DESCRIPTION | |
41 | |
42 Bio::Variation::IO is a handler module for the formats in the Variation IO set (eg, | |
43 Bio::Variation::IO::flat). It is the officially sanctioned way of getting at | |
44 the format objects, which most people should use. | |
45 | |
46 The structure, conventions and most of the code is inherited from | |
47 L<Bio::SeqIO> module. The main difference is that instead of using | |
48 methods next_seq and write_seq, you drop '_seq' from the method names. | |
49 | |
50 The idea is that you request a stream object for a particular format. | |
51 All the stream objects have a notion of an internal file that is read | |
52 from or written to. A particular SeqIO object instance is configured | |
53 for either input or output. A specific example of a stream object is | |
54 the Bio::Variation::IO::flat object. | |
55 | |
56 Each stream object has functions | |
57 | |
58 $stream->next(); | |
59 | |
60 and | |
61 | |
62 $stream->write($seqDiff); | |
63 | |
64 also | |
65 | |
66 $stream->type() # returns 'INPUT' or 'OUTPUT' | |
67 | |
68 As an added bonus, you can recover a filehandle that is tied to the | |
69 SeqIO object, allowing you to use the standard E<lt>E<gt> and print operations | |
70 to read and write sequence objects: | |
71 | |
72 use Bio::Variation::IO; | |
73 | |
74 $stream = Bio::Variation::IO->newFh(-format => 'flat'); # read from standard input | |
75 | |
76 while ( $seq = <$stream> ) { | |
77 # do something with $seq | |
78 } | |
79 | |
80 and | |
81 | |
82 print $stream $seq; # when stream is in output mode | |
83 | |
84 This makes the simplest ever reformatter | |
85 | |
86 #!/usr/local/bin/perl | |
87 | |
88 $format1 = shift; | |
89 $format2 = shift || die "Usage: reformat format1 format2 < input > output"; | |
90 | |
91 use Bio::Variation::IO; | |
92 | |
93 $in = Bio::Variation::IO->newFh(-format => $format1 ); | |
94 $out = Bio::Variation::IO->newFh(-format => $format2 ); | |
95 #note: you might want to quote -format to keep older perl's from complaining. | |
96 | |
97 print $out $_ while <$in>; | |
98 | |
99 | |
100 =head1 CONSTRUCTORS | |
101 | |
102 =head2 Bio::Variation::IO-E<gt>new() | |
103 | |
104 $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format); | |
105 $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format); | |
106 $seqIO = Bio::Variation::IO->new(-format => $format); | |
107 | |
108 The new() class method constructs a new Bio::Variation::IO object. The | |
109 returned object can be used to retrieve or print BioSeq objects. new() | |
110 accepts the following parameters: | |
111 | |
112 =over 4 | |
113 | |
114 =item -file | |
115 | |
116 A file path to be opened for reading or writing. The usual Perl | |
117 conventions apply: | |
118 | |
119 'file' # open file for reading | |
120 '>file' # open file for writing | |
121 '>>file' # open file for appending | |
122 '+<file' # open file read/write | |
123 'command |' # open a pipe from the command | |
124 '| command' # open a pipe to the command | |
125 | |
126 =item -fh | |
127 | |
128 You may provide new() with a previously-opened filehandle. For | |
129 example, to read from STDIN: | |
130 | |
131 $seqIO = Bio::Variation::IO->new(-fh => \*STDIN); | |
132 | |
133 Note that you must pass filehandles as references to globs. | |
134 | |
135 If neither a filehandle nor a filename is specified, then the module | |
136 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt> | |
137 semantics. | |
138 | |
139 =item -format | |
140 | |
141 Specify the format of the file. Supported formats include: | |
142 | |
143 flat pseudo EMBL format | |
144 xml seqvar xml format | |
145 | |
146 If no format is specified and a filename is given, then the module | |
147 will attempt to deduce it from the filename. If this is unsuccessful, | |
148 Fasta format is assumed. | |
149 | |
150 The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are | |
151 all supported. | |
152 | |
153 =back | |
154 | |
155 =head2 Bio::Variation::IO-E<gt>newFh() | |
156 | |
157 $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format); | |
158 $fh = Bio::Variation::IO->newFh(-format => $format); | |
159 # etc. | |
160 | |
161 #e.g. | |
162 $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat'); | |
163 print $out $seqDiff; | |
164 | |
165 This constructor behaves like new(), but returns a tied filehandle | |
166 rather than a Bio::Variation::IO object. You can read sequences from this | |
167 object using the familiar E<lt>E<gt> operator, and write to it using print(). | |
168 The usual array and $_ semantics work. For example, you can read all | |
169 sequence objects into an array like this: | |
170 | |
171 @mutations = <$fh>; | |
172 | |
173 Other operations, such as read(), sysread(), write(), close(), and printf() | |
174 are not supported. | |
175 | |
176 =head1 OBJECT METHODS | |
177 | |
178 See below for more detailed summaries. The main methods are: | |
179 | |
180 =head2 $sequence = $seqIO-E<gt>next() | |
181 | |
182 Fetch the next sequence from the stream. | |
183 | |
184 =head2 $seqIO-E<gt>write($sequence [,$another_sequence,...]) | |
185 | |
186 Write the specified sequence(s) to the stream. | |
187 | |
188 =head2 TIEHANDLE(), READLINE(), PRINT() | |
189 | |
190 These provide the tie interface. See L<perltie> for more details. | |
191 | |
192 =head1 FEEDBACK | |
193 | |
194 =head2 Mailing Lists | |
195 | |
196 User feedback is an integral part of the evolution of this and other | |
197 Bioperl modules. Send your comments and suggestions preferably to the | |
198 Bioperl mailing lists Your participation is much appreciated. | |
199 | |
200 bioperl-l@bioperl.org - General discussion | |
201 http://bio.perl.org/MailList.html - About the mailing lists | |
202 | |
203 =head2 Reporting Bugs | |
204 | |
205 report bugs to the Bioperl bug tracking system to help us keep track | |
206 the bugs and their resolution. Bug reports can be submitted via | |
207 email or the web: | |
208 | |
209 bioperl-bugs@bio.perl.org | |
210 http://bugzilla.bioperl.org/ | |
211 | |
212 =head1 AUTHOR - Heikki Lehvaslaiho | |
213 | |
214 Email: heikki@ebi.ac.uk | |
215 Address: | |
216 | |
217 EMBL Outstation, European Bioinformatics Institute | |
218 Wellcome Trust Genome Campus, Hinxton | |
219 Cambs. CB10 1SD, United Kingdom | |
220 | |
221 | |
222 =head1 APPENDIX | |
223 | |
224 The rest of the documentation details each of the object | |
225 methods. Internal methods are usually preceded with a _ | |
226 | |
227 =cut | |
228 | |
229 # Let the code begin... | |
230 | |
231 package Bio::Variation::IO; | |
232 my $VERSION=1.0; | |
233 | |
234 use strict; | |
235 use vars '@ISA'; | |
236 | |
237 use Bio::SeqIO; | |
238 | |
239 @ISA = 'Bio::SeqIO'; | |
240 | |
241 =head2 new | |
242 | |
243 Title : new | |
244 Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format') | |
245 Function: Returns a new seqstream | |
246 Returns : A Bio::Variation::IO::Handler initialised with the appropriate format | |
247 Args : -file => $filename | |
248 -format => format | |
249 -fh => filehandle to attach to | |
250 | |
251 =cut | |
252 | |
253 | |
254 sub new { | |
255 my ($class, %param) = @_; | |
256 my ($format); | |
257 | |
258 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
259 $format = $param{'-format'} | |
260 || $class->_guess_format( $param{-file} || $ARGV[0] ) | |
261 || 'flat'; | |
262 $format = "\L$format"; # normalize capitalization to lower case | |
263 | |
264 return undef unless $class->_load_format_module($format); | |
265 return "Bio::Variation::IO::$format"->new(%param); | |
266 } | |
267 | |
268 | |
269 sub _load_format_module { | |
270 my ($class, $format) = @_; | |
271 my $module = "Bio::Variation::IO::" . $format; | |
272 my $ok; | |
273 eval { | |
274 $ok = $class->_load_module($module); | |
275 }; | |
276 if ( $@ ) { | |
277 print STDERR <<END; | |
278 $class: $format cannot be found | |
279 Exception $@ | |
280 For more information about the IO system please see the IO docs. | |
281 This includes ways of checking for formats at compile time, not run time | |
282 END | |
283 ; | |
284 } | |
285 return $ok; | |
286 } | |
287 | |
288 =head2 next | |
289 | |
290 Title : next | |
291 Usage : $seqDiff = $stream->next | |
292 Function: reads the next $seqDiff object from the stream | |
293 Returns : a Bio::Variation::SeqDiff object | |
294 Args : | |
295 | |
296 =cut | |
297 | |
298 sub next { | |
299 my ($self, $seq) = @_; | |
300 $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object."); | |
301 } | |
302 | |
303 sub next_seq { | |
304 my ($self, $seq) = @_; | |
305 $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'."); | |
306 $self->next($seq); | |
307 } | |
308 | |
309 =head2 write | |
310 | |
311 Title : write | |
312 Usage : $stream->write($seq) | |
313 Function: writes the $seq object into the stream | |
314 Returns : 1 for success and 0 for error | |
315 Args : Bio::Variation::SeqDiff object | |
316 | |
317 =cut | |
318 | |
319 sub write { | |
320 my ($self, $seq) = @_; | |
321 $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object."); | |
322 } | |
323 | |
324 sub write_seq { | |
325 my ($self, $seq) = @_; | |
326 $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'."); | |
327 $self->write($seq); | |
328 } | |
329 | |
330 =head2 _guess_format | |
331 | |
332 Title : _guess_format | |
333 Usage : $obj->_guess_format($filename) | |
334 Function: | |
335 Example : | |
336 Returns : guessed format of filename (lower case) | |
337 Args : | |
338 | |
339 =cut | |
340 | |
341 sub _guess_format { | |
342 my $class = shift; | |
343 return unless $_ = shift; | |
344 return 'flat' if /\.dat$/i; | |
345 return 'xml' if /\.xml$/i; | |
346 } | |
347 | |
348 | |
349 1; |