Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/DBFile/FileAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/DBFile/FileAdaptor.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,220 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::DBFile::FileAdaptor - Base Adaptor for direct file access + +=head1 DESCRIPTION + +Basic wrapper class to provide access to file based data. + +This is primarily aimed at indexed Collection(.col) files which are optimised for Slice +based queries. Collections store fixed width width/windowed data as BLOBS. This makes +it possible to seek to the a required location given slice coordinate and read the only +the required amount of data covering the slice. + +Currently only works as hybrid DBAdaptor e.g. ResultFeatureAdaptor which inherits both from +here and BaseFeatureAdaptor. + +=cut + + + +package Bio::EnsEMBL::DBFile::FileAdaptor; + +use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate); +use strict; +use warnings; + + +=head2 get_filehandle + + Arg[1] : string - filepath + Arg[2] : HASHREF - Optional params, see open_file + Example : my $fh = $self->get_filehandle($filepath, 1); + Description: Gets and caches a simple file handle. + Returntype : GLOB/undef - filehandle + Exceptions : warns if cache entry exists but is not defined + Caller : general + Status : at risk + +=cut + +sub get_filehandle{ + my ($self, $filepath, $params_hash) = @_; + + my $file_op = '<'; + + if(exists $params_hash->{-file_operator}){ + $file_op = $params_hash->{-file_operator}; + }else{ + $params_hash->{-file_operator} = $file_op; + } + + if(! exists $self->{file_cache}{$filepath}{filehandle}){ + my $fh = $self->Bio::EnsEMBL::DBFile::FileAdaptor::open_file($filepath, $params_hash); + + if(defined $fh){ + $self->{file_cache}{$filepath}{filehandle} = $fh; + #$self->initialise_filehandle($filepath) if $self->can('initialise_filehandle'); + $self->initialise_filehandle($filepath) if($file_op eq '<'); + } + } + elsif(! defined $self->{file_cache}{$filepath}{filehandle}){ + #This maybe one of several read/seek errors which will have already been warned + warn "Encountered and error with file handle for $filepath\n"; + } + #else + # check against cache file op + # to make sure we aren't trying to open an already open fh with a different operator + + + return $self->{file_cache}{$filepath}{filehandle}; +} + + +=head2 open_file + + Arg[1] : string - filepath + Arg[2] : HASHREF - Optional params: + -binmode => 0|1, # Boolean i.e. treat file as binary + -file_operator => '>' # Default is '<' + #-perms_octal => # Requires FileHandle + Example : my $fh = $self->open_file($filepath, {-binmode = > 1, -file_operator => '>'}); + Description: Opens a file for reading or writing. + Returntype : GLOB/undef - filehandle + Exceptions : warns if file open fails + warns if file operator unsupported + warns if failed to set binmode + Caller : general + Status : at risk + +=cut + +sub open_file{ + my ($self, $filepath, $params_hash) = @_; + + #Validate params_hash? + #rearrange? Will not warn/throw for invalid keys? + #perms octal, requires FileHandle? See EFGUtils::open_file + + + + my $file_op = $params_hash->{-file_operator} || '<'; + + if(($file_op ne '<') && + ($file_op ne '>') && + ($file_op ne '>>')){ + throw("Cannot perform open with unsupported operator:\t${file_op}${filepath}"); + } + + my $fh; + my $success = open($fh, "${file_op}${filepath}"); + #$fh will be still be GLOB on fail + + #These warn instead of throw/die to allow + #open_file to be used to test a file + #this prevents throws/die when an attempting to access an absent file (good for webcode) + #could alternatively change to throw/die and eval where required + #prevents need to catch everywhere else and potential double reporting of error + + if(! $success){ + #undef $fh; + throw("Failed to open:\t$filepath\n$!\n"); + } + elsif($params_hash->{-binmode}){ + $success = binmode $fh; + + if(! $success){ + throw("Failed to set binmode:\t$filepath\n$!"); + #undef $fh; + } + } + + return $fh; +} + + +=head2 validate_file_length + + Arg[1] : string - filepath + Arg[2] : int - expected length in bytes + Example : $self->validate_file_length($filepath, $expected_length); + Description: Utility method which can be used during file creation + Returntype : None + Exceptions : warns if file open fails + throws if file is not expected length + Caller : general + Status : at risk - change to seek to accounts for 'logical characters' + +=cut + +sub validate_file_length{ + my ($self, $filepath, $expected_length, $binmode) = @_; + + #Currently not using cache as we rarely want to + #use the file handle afterwards + + + #THIS WAS USING EFGUtils::open_file imported in the Collector::ResultFeature!!!! + #which is just a sub not a class method, and is in a parallel inheritance path + #No warnings about redefining method :( + #Force use of FileAdaptor::open_file + + my $fh = $self->Bio::EnsEMBL::DBFile::FileAdaptor::open_file($filepath, {-binmode => $binmode}); + + + #sysseek always returns length in bytes, change to seek which + #uses logical characters i.e. actual encoding? + #Does seek use bytes in binmode and chars in non-binmode? + + my $seeked_bytes = sysseek($fh, 0, 2);# 2 is SEEK_END + #There is no systell function. Use sysseek(FH, 0, 1) for that. + + if($seeked_bytes < $expected_length){ + throw("File is shorter($seeked_bytes) than expected($expected_length):\t$filepath\n"); + } + elsif($seeked_bytes > $expected_length){ + throw("File is longer($seeked_bytes) than expected($expected_length):\t$filepath\n"); + } + + return; +} + + + + + +### STUBB/TEMPLATE METHODS ### +# +# If required hese should be over-ridden in the +# descendant FileAdaptor e.g. CollectionAdaptor +# Listed here rather for visibility (rather than +# using 'can') + + +sub initialise_filehandle{ + return; +} + + + +1;