comparison variant_effect_predictor/Bio/EnsEMBL/Utils/IO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 package Bio::EnsEMBL::Utils::IO;
2
3 =pod
4
5 =head1 LICENSE
6
7 Copyright (c) 1999-2012 The European Bioinformatics Institute and
8 Genome Research Limited. All rights reserved.
9
10 This software is distributed under a modified Apache license.
11 For license details, please see
12
13 http://www.ensembl.org/info/about/code_licence.html
14
15 =head1 CONTACT
16
17 Please email comments or questions to the public Ensembl
18 developers list at <dev@ensembl.org>.
19
20 Questions may also be sent to the Ensembl help desk at
21 <helpdesk@ensembl.org>.
22
23 =cut
24
25 =pod
26
27 =head1 NAME
28
29 Bio::EnsEMBL::Utils::IO
30
31 =head1 SYNOPSIS
32
33 use Bio::EnsEMBL::Utils::IO qw/slurp work_with_file slurp_to_array fh_to_array/;
34 #or
35 # use Bio::EnsEMBL::Utils::IO qw/:slurp/; #brings in any method starting with slurp
36 # use Bio::EnsEMBL::Utils::IO qw/:array/; #brings in any method which ends with _array
37 # use Bio::EnsEMBL::Utils::IO qw/:gz/; #brings all methods which start with gz_
38 # use Bio::EnsEMBL::Utils::IO qw/:all/; #brings all methods in
39
40 #As a scalar
41 my $file_contents = slurp('/my/file/location.txt');
42 print length($file_contents);
43
44 #As a ref
45 my $file_contents_ref = slurp('/my/file/location.txt', 1);
46 print length($$file_contents_ref);
47
48 #Sending it to an array
49 my $array = slurp_to_array('/my/location');
50 work_with_file('/my/location', 'r', sub {
51 $array = process_to_array($_[0], sub {
52 #Gives us input line by line
53 return "INPUT: $_";
54 });
55 });
56
57 #Simplified vesion but without the post processing
58 $array = fh_to_array($fh);
59
60 #Sending this back out to another file
61 work_with_file('/my/file/newlocation.txt', 'w', sub {
62 my ($fh) = @_;
63 print $fh $$file_contents_ref;
64 return;
65 });
66
67 #Gzipping the data to another file
68 gz_work_with_file('/my/file.gz', 'w', sub {
69 my ($fh) = @_;
70 print $fh $$file_contents_ref;
71 return;
72 });
73
74 #Working with a set of lines manually
75 work_with_file('/my/file', 'r', sub {
76 my ($fh) = @_;
77 iterate_lines($fh, sub {
78 my ($line) = @_;
79 print $line; #Send the line in the file back out
80 return;
81 });
82 return;
83 });
84
85 #Doing the same in one go
86 iterate_file('/my/file', sub {
87 my ($line) = @_;
88 print $line; #Send the line in the file back out
89 return;
90 });
91
92 #Move all data from one file handle to another. Bit like a copy
93 move_data($src_fh, $trg_fh);
94
95 =head1 DESCRIPTION
96
97 A collection of subroutines aimed to helping IO based operations
98
99 =head1 METHODS
100
101 See subroutines.
102
103 =head1 MAINTAINER
104
105 $Author: ady $
106
107 =head1 VERSION
108
109 $Revision: 1.10 $
110
111 =cut
112
113 use strict;
114 use warnings;
115
116 use base qw(Exporter);
117
118 our $GZIP_OK = 0;
119 our @EXPORT_OK = qw/slurp slurp_to_array fh_to_array process_to_array work_with_file gz_slurp gz_slurp_to_array gz_work_with_file iterate_file iterate_lines move_data/;
120 our %EXPORT_TAGS = (
121 all => [@EXPORT_OK],
122 slurp => [qw/slurp slurp_to_array gz_slurp gz_slurp_to_array/],
123 array => [qw/fh_to_array process_to_array slurp_to_array gz_slurp_to_array/],
124 gz => [qw/gz_slurp gz_slurp_to_array gz_work_with_file/],
125 iterate => [qw/iterate_file iterate_lines/],
126 );
127 use Bio::EnsEMBL::Utils::Exception qw(throw);
128 use Bio::EnsEMBL::Utils::Scalar qw(:assert);
129 use IO::File;
130 eval {
131 require IO::Compress::Gzip;
132 require IO::Uncompress::Gunzip;
133 $GZIP_OK = 1;
134 };
135
136 =head2 slurp()
137
138 Arg [1] : string $file
139 Arg [2] : boolean; $want_ref
140 Arg [3] : boolean; $binary
141 Indicates if we want to return a scalar reference
142 Description : Forces the contents of a file into a scalar. This is the
143 fastest way to get a file into memory in Perl. You can also
144 get a scalar reference back to avoid copying the file contents
145 in Scalar references. If the input file is binary then specify
146 with the binary flag
147 Returntype : Scalar or reference of the file contents depending on arg 2
148 Example : my $contents = slurp('/tmp/file.txt');
149 Exceptions : If the file did not exist or was not readable
150 Status : Stable
151
152 =cut
153
154 sub slurp {
155 my ($file, $want_ref, $binary) = @_;
156 my $contents = q{};
157 work_with_file($file, 'r', sub {
158 my ($fh) = @_;
159 binmode($fh) if $binary;
160 my $size_left = -s $file;
161 while( $size_left > 0 ) {
162 my $read_cnt = sysread($fh, $contents, $size_left, length($contents));
163 unless( $read_cnt ) {
164 throw "read error in file $file: $!" ;
165 last;
166 }
167 $size_left -= $read_cnt ;
168 }
169 return;
170 });
171 return ($want_ref) ? \$contents : $contents;
172 }
173
174 =head2 gz_slurp()
175
176 Arg [1] : string $file
177 Arg [2] : boolean; $want_ref Indicates if we want to return a scalar reference
178 Arg [3] : boolean; $binary
179 Arg [4] : HashRef arguments to pass into IO compression layers
180 Description : Forces the contents of a file into a scalar. This is the
181 fastest way to get a file into memory in Perl. You can also
182 get a scalar reference back to avoid copying the file contents
183 in Scalar references. If the input file is binary then specify
184 with the binary flag
185 Returntype : Scalar or reference of the file contents depending on arg 2
186 Example : my $contents = slurp('/tmp/file.txt.gz');
187 Exceptions : If the file did not exist or was not readable
188 Status : Stable
189
190 =cut
191
192 sub gz_slurp {
193 my ($file, $want_ref, $binary, $args) = @_;
194 my $contents;
195 gz_work_with_file($file, 'r', sub {
196 my ($fh) = @_;
197 local $/ = undef;
198 binmode($fh) if $binary;
199 $contents = <$fh>;
200 return;
201 }, $args);
202 return ($want_ref) ? \$contents : $contents;
203 }
204
205 =head2 slurp_to_array()
206
207 Arg [1] : string $file
208 Arg [2] : boolean $chomp
209 Description : Sends the contents of the given file into an ArrayRef
210 Returntype : ArrayRef
211 Example : my $contents_array = slurp_to_array('/tmp/file.txt');
212 Exceptions : If the file did not exist or was not readable
213 Status : Stable
214
215 =cut
216
217 sub slurp_to_array {
218 my ($file, $chomp) = @_;
219 my $contents;
220 work_with_file($file, 'r', sub {
221 my ($fh) = @_;
222 $contents = fh_to_array($fh, $chomp);
223 return;
224 });
225 return $contents;
226 }
227
228 =head2 gz_slurp_to_array()
229
230 Arg [1] : string $file
231 Arg [2] : boolean $chomp
232 Arg [3] : HashRef arguments to pass into IO compression layers
233 Description : Sends the contents of the given gzipped file into an ArrayRef
234 Returntype : ArrayRef
235 Example : my $contents_array = slurp_to_array('/tmp/file.txt.gz');
236 Exceptions : If the file did not exist or was not readable
237 Status : Stable
238
239 =cut
240
241 sub gz_slurp_to_array {
242 my ($file, $chomp, $args) = @_;
243 my $contents;
244 gz_work_with_file($file, 'r', sub {
245 my ($fh) = @_;
246 $contents = fh_to_array($fh, $chomp);
247 return;
248 }, $args);
249 return $contents;
250 }
251
252 =head2 fh_to_array()
253
254 Arg [1] : Glob/IO::Handle $fh
255 Arg [2] : boolean $chomp
256 Description : Sends the contents of the given filehandle into an ArrayRef.
257 Will perform chomp on each line if specified. If you require
258 any more advanced line based processing then see
259 L<process_to_array>.
260 Returntype : ArrayRef
261 Example : my $contents_array = fh_to_array($fh);
262 Exceptions : None
263 Status : Stable
264
265 =cut
266
267 sub fh_to_array {
268 my ($fh, $chomp) = @_;
269 if($chomp) {
270 return process_to_array($fh, sub {
271 my ($line) = @_;
272 chomp($line);
273 return $line;
274 });
275 }
276 my @contents = <$fh>;
277 return \@contents;
278 }
279
280 =head2 process_to_array
281
282 Arg [1] : Glob/IO::Handle $fh
283 Arg [2] : CodeRef $callback
284 Description : Sends the contents of the given file handle into an ArrayRef
285 via the processing callback. Assumes line based input.
286 Returntype : ArrayRef
287 Example : my $array = process_to_array($fh, sub { return "INPUT: $_"; });
288 Exceptions : If the fh did not exist or if a callback was not given.
289 Status : Stable
290
291 =cut
292
293 sub process_to_array {
294 my ($fh, $callback) = @_;
295 assert_file_handle($fh, 'FileHandle');
296 assert_ref($callback, 'CODE', 'callback');
297 my @contents;
298 iterate_lines($fh, sub {
299 my ($line) = @_;
300 push(@contents, $callback->($line));
301 return;
302 });
303 return \@contents;
304 }
305
306 =head2 iterate_lines
307
308 Arg [1] : Glob/IO::Handle $fh
309 Arg [2] : CodeRef $callback
310 Description : Iterates through each line from the given file handle and
311 hands them to the callback one by one
312 Returntype : None
313 Example : iterate_lines($fh, sub { print "INPUT: $_"; });
314 Exceptions : If the fh did not exist or if a callback was not given.
315 Status : Stable
316
317 =cut
318
319 sub iterate_lines {
320 my ($fh, $callback) = @_;
321 assert_file_handle($fh, 'FileHandle');
322 assert_ref($callback, 'CODE', 'callback');
323 while( my $line = <$fh> ) {
324 $callback->($line);
325 }
326 return;
327 }
328
329 =head2 iterate_file
330
331 Arg [1] : string $file
332 Arg [3] : CodeRef the callback which is used to iterate the lines in
333 the file
334 Description : Iterates through each line from the given file and
335 hands them to the callback one by one
336 Returntype : None
337 Example : iterate_file('/my/file', sub { print "INPUT: $_"; });
338 Exceptions : If the file did not exist or if a callback was not given.
339 Status : Stable
340
341 =cut
342
343
344 sub iterate_file {
345 my ($file, $callback) = @_;
346 work_with_file($file, 'r', sub {
347 my ($fh) = @_;
348 iterate_lines($fh, $callback);
349 return;
350 });
351 return;
352 }
353
354
355
356 =head2 work_with_file()
357
358 Arg [1] : string $file
359 Arg [2] : string; $mode
360 Supports all modes specified by the C<open()> function as well as those
361 supported by IO::File
362 Arg [3] : CodeRef the callback which is given the open file handle as
363 its only argument
364 Description : Performs the nitty gritty of checking if a file handle is open
365 and closing the resulting filehandle down.
366 Returntype : None
367 Example : work_with_file('/tmp/out.txt', 'w', sub {
368 my ($fh) = @_;
369 print $fh 'hello';
370 return;
371 });
372 Exceptions : If we could not work with the file due to permissions
373 Status : Stable
374
375 =cut
376
377 sub work_with_file {
378 my ($file, $mode, $callback) = @_;
379 throw "We need a file name to open" if ! $file;
380 throw "We need a mode to open the requested file with" if ! $mode;
381 assert_ref($callback, 'CODE', 'callback');
382 my $fh = IO::File->new($file, $mode) or
383 throw "Cannot open '${file}' in mode '${mode}': $!";
384 $callback->($fh);
385 close($fh) or throw "Cannot close FH from ${file}: $!";
386 return;
387 }
388
389 =head2 gz_work_with_file()
390
391 Arg [1] : string $file
392 Arg [2] : string; $mode
393 Supports modes like C<r>, C<w>, C<\>> and C<\<>
394 Arg [3] : CodeRef the callback which is given the open file handle as
395 its only argument
396 Arg [4] : HashRef used to pass options into the IO
397 compression/uncompression modules
398 Description : Performs the nitty gritty of checking if a file handle is open
399 and closing the resulting filehandle down.
400 Returntype : None
401 Example : work_with_file('/tmp/out.txt.gz', 'w', sub {
402 my ($fh) = @_;
403 print $fh 'hello';
404 return;
405 });
406 Exceptions : If we could not work with the file due to permissions
407 Status : Stable
408
409 =cut
410
411 sub gz_work_with_file {
412 my ($file, $mode, $callback, $args) = @_;
413 throw "IO::Compress was not available"if ! $GZIP_OK;
414 throw "We need a file name to open" if ! $file;
415 throw "We need a mode to open the requested file with" if ! $mode;
416 assert_ref($callback, 'CODE', 'callback');
417 $args ||= {};
418
419 my $fh;
420 {
421 no warnings qw/once/;
422 if($mode =~ '>$' || $mode eq 'w') {
423 $args->{Append} = 1 if $mode =~ />>$/;
424 $fh = IO::Compress::Gzip->new($file, %$args) or throw "Cannot open '$file' for writing: $IO::Compress::Gzip::GzipError";
425 }
426 elsif($mode eq '<' || $mode eq 'r') {
427 $fh = IO::Uncompress::Gunzip->new($file, %$args) or throw "Cannot open '$file' for writing: $IO::Uncompress::Gunzip::GunzipError";
428 }
429 else {
430 throw "Could not decipher a mode from '$mode'";
431 }
432 };
433 $callback->($fh);
434 close($fh) or throw "Cannot close FH from ${file}: $!";
435 return;
436 }
437
438 =head2 move_data
439
440 Arg [1] : FileHandle $src_fh
441 Arg [2] : FileHandle $trg_fh
442 Arg [3] : int $buffer. Defaults to 8KB
443 Description : Moves data from the given source filehandle to the target one
444 using a 8KB buffer or user specified buffer
445 Returntype : None
446 Example : move_data($src_fh, $trg_fh, 16*1024); # copy in 16KB chunks
447 Exceptions : If inputs were not as expected
448
449 =cut
450
451 sub move_data {
452 my ($src_fh, $trg_fh, $buffer_size) = @_;
453 assert_file_handle($src_fh, 'SourceFileHandle');
454 assert_file_handle($trg_fh, 'TargetFileHandle');
455
456 $buffer_size ||= 8192; #Default 8KB
457 my $buffer;
458 while(1) {
459 my $read = sysread($src_fh, $buffer, $buffer_size);
460 if(! defined $read) {
461 throw "Error whilst reading from filehandle: $!";
462 }
463 if($read == 0) {
464 last;
465 }
466 my $written = syswrite($trg_fh, $buffer);
467 if(!defined $written) {
468 throw "Error whilst writing to filehandle: $!";
469 }
470 }
471 return;
472 }
473
474 1;