Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/IO.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 package Bio::EnsEMBL::Utils::IO; | |
2 | |
3 =pod | |
4 | |
5 =head1 LICENSE | |
6 | |
7 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
8 Genome Research Limited. All rights reserved. | |
9 | |
10 This software is distributed under a modified Apache license. | |
11 For license details, please see | |
12 | |
13 http://www.ensembl.org/info/about/code_licence.html | |
14 | |
15 =head1 CONTACT | |
16 | |
17 Please email comments or questions to the public Ensembl | |
18 developers list at <dev@ensembl.org>. | |
19 | |
20 Questions may also be sent to the Ensembl help desk at | |
21 <helpdesk@ensembl.org>. | |
22 | |
23 =cut | |
24 | |
25 =pod | |
26 | |
27 =head1 NAME | |
28 | |
29 Bio::EnsEMBL::Utils::IO | |
30 | |
31 =head1 SYNOPSIS | |
32 | |
33 use Bio::EnsEMBL::Utils::IO qw/slurp work_with_file slurp_to_array fh_to_array/; | |
34 #or | |
35 # use Bio::EnsEMBL::Utils::IO qw/:slurp/; #brings in any method starting with slurp | |
36 # use Bio::EnsEMBL::Utils::IO qw/:array/; #brings in any method which ends with _array | |
37 # use Bio::EnsEMBL::Utils::IO qw/:gz/; #brings all methods which start with gz_ | |
38 # use Bio::EnsEMBL::Utils::IO qw/:all/; #brings all methods in | |
39 | |
40 #As a scalar | |
41 my $file_contents = slurp('/my/file/location.txt'); | |
42 print length($file_contents); | |
43 | |
44 #As a ref | |
45 my $file_contents_ref = slurp('/my/file/location.txt', 1); | |
46 print length($$file_contents_ref); | |
47 | |
48 #Sending it to an array | |
49 my $array = slurp_to_array('/my/location'); | |
50 work_with_file('/my/location', 'r', sub { | |
51 $array = process_to_array($_[0], sub { | |
52 #Gives us input line by line | |
53 return "INPUT: $_"; | |
54 }); | |
55 }); | |
56 | |
57 #Simplified vesion but without the post processing | |
58 $array = fh_to_array($fh); | |
59 | |
60 #Sending this back out to another file | |
61 work_with_file('/my/file/newlocation.txt', 'w', sub { | |
62 my ($fh) = @_; | |
63 print $fh $$file_contents_ref; | |
64 return; | |
65 }); | |
66 | |
67 #Gzipping the data to another file | |
68 gz_work_with_file('/my/file.gz', 'w', sub { | |
69 my ($fh) = @_; | |
70 print $fh $$file_contents_ref; | |
71 return; | |
72 }); | |
73 | |
74 #Working with a set of lines manually | |
75 work_with_file('/my/file', 'r', sub { | |
76 my ($fh) = @_; | |
77 iterate_lines($fh, sub { | |
78 my ($line) = @_; | |
79 print $line; #Send the line in the file back out | |
80 return; | |
81 }); | |
82 return; | |
83 }); | |
84 | |
85 #Doing the same in one go | |
86 iterate_file('/my/file', sub { | |
87 my ($line) = @_; | |
88 print $line; #Send the line in the file back out | |
89 return; | |
90 }); | |
91 | |
92 #Move all data from one file handle to another. Bit like a copy | |
93 move_data($src_fh, $trg_fh); | |
94 | |
95 =head1 DESCRIPTION | |
96 | |
97 A collection of subroutines aimed to helping IO based operations | |
98 | |
99 =head1 METHODS | |
100 | |
101 See subroutines. | |
102 | |
103 =head1 MAINTAINER | |
104 | |
105 $Author: ady $ | |
106 | |
107 =head1 VERSION | |
108 | |
109 $Revision: 1.10 $ | |
110 | |
111 =cut | |
112 | |
113 use strict; | |
114 use warnings; | |
115 | |
116 use base qw(Exporter); | |
117 | |
118 our $GZIP_OK = 0; | |
119 our @EXPORT_OK = qw/slurp slurp_to_array fh_to_array process_to_array work_with_file gz_slurp gz_slurp_to_array gz_work_with_file iterate_file iterate_lines move_data/; | |
120 our %EXPORT_TAGS = ( | |
121 all => [@EXPORT_OK], | |
122 slurp => [qw/slurp slurp_to_array gz_slurp gz_slurp_to_array/], | |
123 array => [qw/fh_to_array process_to_array slurp_to_array gz_slurp_to_array/], | |
124 gz => [qw/gz_slurp gz_slurp_to_array gz_work_with_file/], | |
125 iterate => [qw/iterate_file iterate_lines/], | |
126 ); | |
127 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
128 use Bio::EnsEMBL::Utils::Scalar qw(:assert); | |
129 use IO::File; | |
130 eval { | |
131 require IO::Compress::Gzip; | |
132 require IO::Uncompress::Gunzip; | |
133 $GZIP_OK = 1; | |
134 }; | |
135 | |
136 =head2 slurp() | |
137 | |
138 Arg [1] : string $file | |
139 Arg [2] : boolean; $want_ref | |
140 Arg [3] : boolean; $binary | |
141 Indicates if we want to return a scalar reference | |
142 Description : Forces the contents of a file into a scalar. This is the | |
143 fastest way to get a file into memory in Perl. You can also | |
144 get a scalar reference back to avoid copying the file contents | |
145 in Scalar references. If the input file is binary then specify | |
146 with the binary flag | |
147 Returntype : Scalar or reference of the file contents depending on arg 2 | |
148 Example : my $contents = slurp('/tmp/file.txt'); | |
149 Exceptions : If the file did not exist or was not readable | |
150 Status : Stable | |
151 | |
152 =cut | |
153 | |
154 sub slurp { | |
155 my ($file, $want_ref, $binary) = @_; | |
156 my $contents = q{}; | |
157 work_with_file($file, 'r', sub { | |
158 my ($fh) = @_; | |
159 binmode($fh) if $binary; | |
160 my $size_left = -s $file; | |
161 while( $size_left > 0 ) { | |
162 my $read_cnt = sysread($fh, $contents, $size_left, length($contents)); | |
163 unless( $read_cnt ) { | |
164 throw "read error in file $file: $!" ; | |
165 last; | |
166 } | |
167 $size_left -= $read_cnt ; | |
168 } | |
169 return; | |
170 }); | |
171 return ($want_ref) ? \$contents : $contents; | |
172 } | |
173 | |
174 =head2 gz_slurp() | |
175 | |
176 Arg [1] : string $file | |
177 Arg [2] : boolean; $want_ref Indicates if we want to return a scalar reference | |
178 Arg [3] : boolean; $binary | |
179 Arg [4] : HashRef arguments to pass into IO compression layers | |
180 Description : Forces the contents of a file into a scalar. This is the | |
181 fastest way to get a file into memory in Perl. You can also | |
182 get a scalar reference back to avoid copying the file contents | |
183 in Scalar references. If the input file is binary then specify | |
184 with the binary flag | |
185 Returntype : Scalar or reference of the file contents depending on arg 2 | |
186 Example : my $contents = slurp('/tmp/file.txt.gz'); | |
187 Exceptions : If the file did not exist or was not readable | |
188 Status : Stable | |
189 | |
190 =cut | |
191 | |
192 sub gz_slurp { | |
193 my ($file, $want_ref, $binary, $args) = @_; | |
194 my $contents; | |
195 gz_work_with_file($file, 'r', sub { | |
196 my ($fh) = @_; | |
197 local $/ = undef; | |
198 binmode($fh) if $binary; | |
199 $contents = <$fh>; | |
200 return; | |
201 }, $args); | |
202 return ($want_ref) ? \$contents : $contents; | |
203 } | |
204 | |
205 =head2 slurp_to_array() | |
206 | |
207 Arg [1] : string $file | |
208 Arg [2] : boolean $chomp | |
209 Description : Sends the contents of the given file into an ArrayRef | |
210 Returntype : ArrayRef | |
211 Example : my $contents_array = slurp_to_array('/tmp/file.txt'); | |
212 Exceptions : If the file did not exist or was not readable | |
213 Status : Stable | |
214 | |
215 =cut | |
216 | |
217 sub slurp_to_array { | |
218 my ($file, $chomp) = @_; | |
219 my $contents; | |
220 work_with_file($file, 'r', sub { | |
221 my ($fh) = @_; | |
222 $contents = fh_to_array($fh, $chomp); | |
223 return; | |
224 }); | |
225 return $contents; | |
226 } | |
227 | |
228 =head2 gz_slurp_to_array() | |
229 | |
230 Arg [1] : string $file | |
231 Arg [2] : boolean $chomp | |
232 Arg [3] : HashRef arguments to pass into IO compression layers | |
233 Description : Sends the contents of the given gzipped file into an ArrayRef | |
234 Returntype : ArrayRef | |
235 Example : my $contents_array = slurp_to_array('/tmp/file.txt.gz'); | |
236 Exceptions : If the file did not exist or was not readable | |
237 Status : Stable | |
238 | |
239 =cut | |
240 | |
241 sub gz_slurp_to_array { | |
242 my ($file, $chomp, $args) = @_; | |
243 my $contents; | |
244 gz_work_with_file($file, 'r', sub { | |
245 my ($fh) = @_; | |
246 $contents = fh_to_array($fh, $chomp); | |
247 return; | |
248 }, $args); | |
249 return $contents; | |
250 } | |
251 | |
252 =head2 fh_to_array() | |
253 | |
254 Arg [1] : Glob/IO::Handle $fh | |
255 Arg [2] : boolean $chomp | |
256 Description : Sends the contents of the given filehandle into an ArrayRef. | |
257 Will perform chomp on each line if specified. If you require | |
258 any more advanced line based processing then see | |
259 L<process_to_array>. | |
260 Returntype : ArrayRef | |
261 Example : my $contents_array = fh_to_array($fh); | |
262 Exceptions : None | |
263 Status : Stable | |
264 | |
265 =cut | |
266 | |
267 sub fh_to_array { | |
268 my ($fh, $chomp) = @_; | |
269 if($chomp) { | |
270 return process_to_array($fh, sub { | |
271 my ($line) = @_; | |
272 chomp($line); | |
273 return $line; | |
274 }); | |
275 } | |
276 my @contents = <$fh>; | |
277 return \@contents; | |
278 } | |
279 | |
280 =head2 process_to_array | |
281 | |
282 Arg [1] : Glob/IO::Handle $fh | |
283 Arg [2] : CodeRef $callback | |
284 Description : Sends the contents of the given file handle into an ArrayRef | |
285 via the processing callback. Assumes line based input. | |
286 Returntype : ArrayRef | |
287 Example : my $array = process_to_array($fh, sub { return "INPUT: $_"; }); | |
288 Exceptions : If the fh did not exist or if a callback was not given. | |
289 Status : Stable | |
290 | |
291 =cut | |
292 | |
293 sub process_to_array { | |
294 my ($fh, $callback) = @_; | |
295 assert_file_handle($fh, 'FileHandle'); | |
296 assert_ref($callback, 'CODE', 'callback'); | |
297 my @contents; | |
298 iterate_lines($fh, sub { | |
299 my ($line) = @_; | |
300 push(@contents, $callback->($line)); | |
301 return; | |
302 }); | |
303 return \@contents; | |
304 } | |
305 | |
306 =head2 iterate_lines | |
307 | |
308 Arg [1] : Glob/IO::Handle $fh | |
309 Arg [2] : CodeRef $callback | |
310 Description : Iterates through each line from the given file handle and | |
311 hands them to the callback one by one | |
312 Returntype : None | |
313 Example : iterate_lines($fh, sub { print "INPUT: $_"; }); | |
314 Exceptions : If the fh did not exist or if a callback was not given. | |
315 Status : Stable | |
316 | |
317 =cut | |
318 | |
319 sub iterate_lines { | |
320 my ($fh, $callback) = @_; | |
321 assert_file_handle($fh, 'FileHandle'); | |
322 assert_ref($callback, 'CODE', 'callback'); | |
323 while( my $line = <$fh> ) { | |
324 $callback->($line); | |
325 } | |
326 return; | |
327 } | |
328 | |
329 =head2 iterate_file | |
330 | |
331 Arg [1] : string $file | |
332 Arg [3] : CodeRef the callback which is used to iterate the lines in | |
333 the file | |
334 Description : Iterates through each line from the given file and | |
335 hands them to the callback one by one | |
336 Returntype : None | |
337 Example : iterate_file('/my/file', sub { print "INPUT: $_"; }); | |
338 Exceptions : If the file did not exist or if a callback was not given. | |
339 Status : Stable | |
340 | |
341 =cut | |
342 | |
343 | |
344 sub iterate_file { | |
345 my ($file, $callback) = @_; | |
346 work_with_file($file, 'r', sub { | |
347 my ($fh) = @_; | |
348 iterate_lines($fh, $callback); | |
349 return; | |
350 }); | |
351 return; | |
352 } | |
353 | |
354 | |
355 | |
356 =head2 work_with_file() | |
357 | |
358 Arg [1] : string $file | |
359 Arg [2] : string; $mode | |
360 Supports all modes specified by the C<open()> function as well as those | |
361 supported by IO::File | |
362 Arg [3] : CodeRef the callback which is given the open file handle as | |
363 its only argument | |
364 Description : Performs the nitty gritty of checking if a file handle is open | |
365 and closing the resulting filehandle down. | |
366 Returntype : None | |
367 Example : work_with_file('/tmp/out.txt', 'w', sub { | |
368 my ($fh) = @_; | |
369 print $fh 'hello'; | |
370 return; | |
371 }); | |
372 Exceptions : If we could not work with the file due to permissions | |
373 Status : Stable | |
374 | |
375 =cut | |
376 | |
377 sub work_with_file { | |
378 my ($file, $mode, $callback) = @_; | |
379 throw "We need a file name to open" if ! $file; | |
380 throw "We need a mode to open the requested file with" if ! $mode; | |
381 assert_ref($callback, 'CODE', 'callback'); | |
382 my $fh = IO::File->new($file, $mode) or | |
383 throw "Cannot open '${file}' in mode '${mode}': $!"; | |
384 $callback->($fh); | |
385 close($fh) or throw "Cannot close FH from ${file}: $!"; | |
386 return; | |
387 } | |
388 | |
389 =head2 gz_work_with_file() | |
390 | |
391 Arg [1] : string $file | |
392 Arg [2] : string; $mode | |
393 Supports modes like C<r>, C<w>, C<\>> and C<\<> | |
394 Arg [3] : CodeRef the callback which is given the open file handle as | |
395 its only argument | |
396 Arg [4] : HashRef used to pass options into the IO | |
397 compression/uncompression modules | |
398 Description : Performs the nitty gritty of checking if a file handle is open | |
399 and closing the resulting filehandle down. | |
400 Returntype : None | |
401 Example : work_with_file('/tmp/out.txt.gz', 'w', sub { | |
402 my ($fh) = @_; | |
403 print $fh 'hello'; | |
404 return; | |
405 }); | |
406 Exceptions : If we could not work with the file due to permissions | |
407 Status : Stable | |
408 | |
409 =cut | |
410 | |
411 sub gz_work_with_file { | |
412 my ($file, $mode, $callback, $args) = @_; | |
413 throw "IO::Compress was not available"if ! $GZIP_OK; | |
414 throw "We need a file name to open" if ! $file; | |
415 throw "We need a mode to open the requested file with" if ! $mode; | |
416 assert_ref($callback, 'CODE', 'callback'); | |
417 $args ||= {}; | |
418 | |
419 my $fh; | |
420 { | |
421 no warnings qw/once/; | |
422 if($mode =~ '>$' || $mode eq 'w') { | |
423 $args->{Append} = 1 if $mode =~ />>$/; | |
424 $fh = IO::Compress::Gzip->new($file, %$args) or throw "Cannot open '$file' for writing: $IO::Compress::Gzip::GzipError"; | |
425 } | |
426 elsif($mode eq '<' || $mode eq 'r') { | |
427 $fh = IO::Uncompress::Gunzip->new($file, %$args) or throw "Cannot open '$file' for writing: $IO::Uncompress::Gunzip::GunzipError"; | |
428 } | |
429 else { | |
430 throw "Could not decipher a mode from '$mode'"; | |
431 } | |
432 }; | |
433 $callback->($fh); | |
434 close($fh) or throw "Cannot close FH from ${file}: $!"; | |
435 return; | |
436 } | |
437 | |
438 =head2 move_data | |
439 | |
440 Arg [1] : FileHandle $src_fh | |
441 Arg [2] : FileHandle $trg_fh | |
442 Arg [3] : int $buffer. Defaults to 8KB | |
443 Description : Moves data from the given source filehandle to the target one | |
444 using a 8KB buffer or user specified buffer | |
445 Returntype : None | |
446 Example : move_data($src_fh, $trg_fh, 16*1024); # copy in 16KB chunks | |
447 Exceptions : If inputs were not as expected | |
448 | |
449 =cut | |
450 | |
451 sub move_data { | |
452 my ($src_fh, $trg_fh, $buffer_size) = @_; | |
453 assert_file_handle($src_fh, 'SourceFileHandle'); | |
454 assert_file_handle($trg_fh, 'TargetFileHandle'); | |
455 | |
456 $buffer_size ||= 8192; #Default 8KB | |
457 my $buffer; | |
458 while(1) { | |
459 my $read = sysread($src_fh, $buffer, $buffer_size); | |
460 if(! defined $read) { | |
461 throw "Error whilst reading from filehandle: $!"; | |
462 } | |
463 if($read == 0) { | |
464 last; | |
465 } | |
466 my $written = syswrite($trg_fh, $buffer); | |
467 if(!defined $written) { | |
468 throw "Error whilst writing to filehandle: $!"; | |
469 } | |
470 } | |
471 return; | |
472 } | |
473 | |
474 1; |