Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Utils/IO.pm @ 3:d30fa12e4cc5 default tip
Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author | devteam <devteam@galaxyproject.org> |
---|---|
date | Mon, 13 Jan 2014 10:38:30 -0500 |
parents | 1f6dce3d34e0 |
children |
line wrap: on
line source
package Bio::EnsEMBL::Utils::IO; =pod =head1 LICENSE Copyright (c) 1999-2012 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <dev@ensembl.org>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =cut =pod =head1 NAME Bio::EnsEMBL::Utils::IO =head1 SYNOPSIS use Bio::EnsEMBL::Utils::IO qw/slurp work_with_file slurp_to_array fh_to_array/; #or # use Bio::EnsEMBL::Utils::IO qw/:slurp/; #brings in any method starting with slurp # use Bio::EnsEMBL::Utils::IO qw/:array/; #brings in any method which ends with _array # use Bio::EnsEMBL::Utils::IO qw/:gz/; #brings all methods which start with gz_ # use Bio::EnsEMBL::Utils::IO qw/:all/; #brings all methods in #As a scalar my $file_contents = slurp('/my/file/location.txt'); print length($file_contents); #As a ref my $file_contents_ref = slurp('/my/file/location.txt', 1); print length($$file_contents_ref); #Sending it to an array my $array = slurp_to_array('/my/location'); work_with_file('/my/location', 'r', sub { $array = process_to_array($_[0], sub { #Gives us input line by line return "INPUT: $_"; }); }); #Simplified vesion but without the post processing $array = fh_to_array($fh); #Sending this back out to another file work_with_file('/my/file/newlocation.txt', 'w', sub { my ($fh) = @_; print $fh $$file_contents_ref; return; }); #Gzipping the data to another file gz_work_with_file('/my/file.gz', 'w', sub { my ($fh) = @_; print $fh $$file_contents_ref; return; }); #Working with a set of lines manually work_with_file('/my/file', 'r', sub { my ($fh) = @_; iterate_lines($fh, sub { my ($line) = @_; print $line; #Send the line in the file back out return; }); return; }); #Doing the same in one go iterate_file('/my/file', sub { my ($line) = @_; print $line; #Send the line in the file back out return; }); #Move all data from one file handle to another. Bit like a copy move_data($src_fh, $trg_fh); =head1 DESCRIPTION A collection of subroutines aimed to helping IO based operations =head1 METHODS See subroutines. =head1 MAINTAINER $Author: ady $ =head1 VERSION $Revision: 1.10 $ =cut use strict; use warnings; use base qw(Exporter); our $GZIP_OK = 0; our @EXPORT_OK = qw/slurp slurp_to_array fh_to_array process_to_array work_with_file gz_slurp gz_slurp_to_array gz_work_with_file iterate_file iterate_lines move_data/; our %EXPORT_TAGS = ( all => [@EXPORT_OK], slurp => [qw/slurp slurp_to_array gz_slurp gz_slurp_to_array/], array => [qw/fh_to_array process_to_array slurp_to_array gz_slurp_to_array/], gz => [qw/gz_slurp gz_slurp_to_array gz_work_with_file/], iterate => [qw/iterate_file iterate_lines/], ); use Bio::EnsEMBL::Utils::Exception qw(throw); use Bio::EnsEMBL::Utils::Scalar qw(:assert); use IO::File; eval { require IO::Compress::Gzip; require IO::Uncompress::Gunzip; $GZIP_OK = 1; }; =head2 slurp() Arg [1] : string $file Arg [2] : boolean; $want_ref Arg [3] : boolean; $binary Indicates if we want to return a scalar reference Description : Forces the contents of a file into a scalar. This is the fastest way to get a file into memory in Perl. You can also get a scalar reference back to avoid copying the file contents in Scalar references. If the input file is binary then specify with the binary flag Returntype : Scalar or reference of the file contents depending on arg 2 Example : my $contents = slurp('/tmp/file.txt'); Exceptions : If the file did not exist or was not readable Status : Stable =cut sub slurp { my ($file, $want_ref, $binary) = @_; my $contents = q{}; work_with_file($file, 'r', sub { my ($fh) = @_; binmode($fh) if $binary; my $size_left = -s $file; while( $size_left > 0 ) { my $read_cnt = sysread($fh, $contents, $size_left, length($contents)); unless( $read_cnt ) { throw "read error in file $file: $!" ; last; } $size_left -= $read_cnt ; } return; }); return ($want_ref) ? \$contents : $contents; } =head2 gz_slurp() Arg [1] : string $file Arg [2] : boolean; $want_ref Indicates if we want to return a scalar reference Arg [3] : boolean; $binary Arg [4] : HashRef arguments to pass into IO compression layers Description : Forces the contents of a file into a scalar. This is the fastest way to get a file into memory in Perl. You can also get a scalar reference back to avoid copying the file contents in Scalar references. If the input file is binary then specify with the binary flag Returntype : Scalar or reference of the file contents depending on arg 2 Example : my $contents = slurp('/tmp/file.txt.gz'); Exceptions : If the file did not exist or was not readable Status : Stable =cut sub gz_slurp { my ($file, $want_ref, $binary, $args) = @_; my $contents; gz_work_with_file($file, 'r', sub { my ($fh) = @_; local $/ = undef; binmode($fh) if $binary; $contents = <$fh>; return; }, $args); return ($want_ref) ? \$contents : $contents; } =head2 slurp_to_array() Arg [1] : string $file Arg [2] : boolean $chomp Description : Sends the contents of the given file into an ArrayRef Returntype : ArrayRef Example : my $contents_array = slurp_to_array('/tmp/file.txt'); Exceptions : If the file did not exist or was not readable Status : Stable =cut sub slurp_to_array { my ($file, $chomp) = @_; my $contents; work_with_file($file, 'r', sub { my ($fh) = @_; $contents = fh_to_array($fh, $chomp); return; }); return $contents; } =head2 gz_slurp_to_array() Arg [1] : string $file Arg [2] : boolean $chomp Arg [3] : HashRef arguments to pass into IO compression layers Description : Sends the contents of the given gzipped file into an ArrayRef Returntype : ArrayRef Example : my $contents_array = slurp_to_array('/tmp/file.txt.gz'); Exceptions : If the file did not exist or was not readable Status : Stable =cut sub gz_slurp_to_array { my ($file, $chomp, $args) = @_; my $contents; gz_work_with_file($file, 'r', sub { my ($fh) = @_; $contents = fh_to_array($fh, $chomp); return; }, $args); return $contents; } =head2 fh_to_array() Arg [1] : Glob/IO::Handle $fh Arg [2] : boolean $chomp Description : Sends the contents of the given filehandle into an ArrayRef. Will perform chomp on each line if specified. If you require any more advanced line based processing then see L<process_to_array>. Returntype : ArrayRef Example : my $contents_array = fh_to_array($fh); Exceptions : None Status : Stable =cut sub fh_to_array { my ($fh, $chomp) = @_; if($chomp) { return process_to_array($fh, sub { my ($line) = @_; chomp($line); return $line; }); } my @contents = <$fh>; return \@contents; } =head2 process_to_array Arg [1] : Glob/IO::Handle $fh Arg [2] : CodeRef $callback Description : Sends the contents of the given file handle into an ArrayRef via the processing callback. Assumes line based input. Returntype : ArrayRef Example : my $array = process_to_array($fh, sub { return "INPUT: $_"; }); Exceptions : If the fh did not exist or if a callback was not given. Status : Stable =cut sub process_to_array { my ($fh, $callback) = @_; assert_file_handle($fh, 'FileHandle'); assert_ref($callback, 'CODE', 'callback'); my @contents; iterate_lines($fh, sub { my ($line) = @_; push(@contents, $callback->($line)); return; }); return \@contents; } =head2 iterate_lines Arg [1] : Glob/IO::Handle $fh Arg [2] : CodeRef $callback Description : Iterates through each line from the given file handle and hands them to the callback one by one Returntype : None Example : iterate_lines($fh, sub { print "INPUT: $_"; }); Exceptions : If the fh did not exist or if a callback was not given. Status : Stable =cut sub iterate_lines { my ($fh, $callback) = @_; assert_file_handle($fh, 'FileHandle'); assert_ref($callback, 'CODE', 'callback'); while( my $line = <$fh> ) { $callback->($line); } return; } =head2 iterate_file Arg [1] : string $file Arg [3] : CodeRef the callback which is used to iterate the lines in the file Description : Iterates through each line from the given file and hands them to the callback one by one Returntype : None Example : iterate_file('/my/file', sub { print "INPUT: $_"; }); Exceptions : If the file did not exist or if a callback was not given. Status : Stable =cut sub iterate_file { my ($file, $callback) = @_; work_with_file($file, 'r', sub { my ($fh) = @_; iterate_lines($fh, $callback); return; }); return; } =head2 work_with_file() Arg [1] : string $file Arg [2] : string; $mode Supports all modes specified by the C<open()> function as well as those supported by IO::File Arg [3] : CodeRef the callback which is given the open file handle as its only argument Description : Performs the nitty gritty of checking if a file handle is open and closing the resulting filehandle down. Returntype : None Example : work_with_file('/tmp/out.txt', 'w', sub { my ($fh) = @_; print $fh 'hello'; return; }); Exceptions : If we could not work with the file due to permissions Status : Stable =cut sub work_with_file { my ($file, $mode, $callback) = @_; throw "We need a file name to open" if ! $file; throw "We need a mode to open the requested file with" if ! $mode; assert_ref($callback, 'CODE', 'callback'); my $fh = IO::File->new($file, $mode) or throw "Cannot open '${file}' in mode '${mode}': $!"; $callback->($fh); close($fh) or throw "Cannot close FH from ${file}: $!"; return; } =head2 gz_work_with_file() Arg [1] : string $file Arg [2] : string; $mode Supports modes like C<r>, C<w>, C<\>> and C<\<> Arg [3] : CodeRef the callback which is given the open file handle as its only argument Arg [4] : HashRef used to pass options into the IO compression/uncompression modules Description : Performs the nitty gritty of checking if a file handle is open and closing the resulting filehandle down. Returntype : None Example : work_with_file('/tmp/out.txt.gz', 'w', sub { my ($fh) = @_; print $fh 'hello'; return; }); Exceptions : If we could not work with the file due to permissions Status : Stable =cut sub gz_work_with_file { my ($file, $mode, $callback, $args) = @_; throw "IO::Compress was not available"if ! $GZIP_OK; throw "We need a file name to open" if ! $file; throw "We need a mode to open the requested file with" if ! $mode; assert_ref($callback, 'CODE', 'callback'); $args ||= {}; my $fh; { no warnings qw/once/; if($mode =~ '>$' || $mode eq 'w') { $args->{Append} = 1 if $mode =~ />>$/; $fh = IO::Compress::Gzip->new($file, %$args) or throw "Cannot open '$file' for writing: $IO::Compress::Gzip::GzipError"; } elsif($mode eq '<' || $mode eq 'r') { $fh = IO::Uncompress::Gunzip->new($file, %$args) or throw "Cannot open '$file' for writing: $IO::Uncompress::Gunzip::GunzipError"; } else { throw "Could not decipher a mode from '$mode'"; } }; $callback->($fh); close($fh) or throw "Cannot close FH from ${file}: $!"; return; } =head2 move_data Arg [1] : FileHandle $src_fh Arg [2] : FileHandle $trg_fh Arg [3] : int $buffer. Defaults to 8KB Description : Moves data from the given source filehandle to the target one using a 8KB buffer or user specified buffer Returntype : None Example : move_data($src_fh, $trg_fh, 16*1024); # copy in 16KB chunks Exceptions : If inputs were not as expected =cut sub move_data { my ($src_fh, $trg_fh, $buffer_size) = @_; assert_file_handle($src_fh, 'SourceFileHandle'); assert_file_handle($trg_fh, 'TargetFileHandle'); $buffer_size ||= 8192; #Default 8KB my $buffer; while(1) { my $read = sysread($src_fh, $buffer, $buffer_size); if(! defined $read) { throw "Error whilst reading from filehandle: $!"; } if($read == 0) { last; } my $written = syswrite($trg_fh, $buffer); if(!defined $written) { throw "Error whilst writing to filehandle: $!"; } } return; } 1;