Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,372 @@ +package Bio::EnsEMBL::DBSQL::DataFileAdaptor; + +=pod + +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=head1 NAME + +Bio::EnsEMBL::DBSQL::DataFileAdaptor + +=head1 SYNOPSIS + + my $dfa = $dba->get_DataFileAdaptor(); + my $file = $dfa->fetch_by_dbID(1); + my $files = $dfa->fetch_all(); + + my $logic_name_files = $dfa->fetch_all_by_logic_name('bam_alignments'); + +=head1 DESCRIPTION + +Provides a database wrapper to store the locations of files and to pull these +records back out. DataFile objects can only provide basic information but they +can return an intended external database adaptor which can be used to +parse the information. This system assumes nothing about the file just that +your parser can access it. + +Files are supported over any protocol your parser supports and locations can be +made absolute, built on the fly or versioned. + +=head1 METHODS + +=cut + +use strict; +use warnings; + +use base qw/Bio::EnsEMBL::DBSQL::BaseAdaptor/; + +use Bio::EnsEMBL::DataFile; +use Bio::EnsEMBL::DBSQL::BaseAdaptor; +use Bio::EnsEMBL::Utils::Exception qw/throw warning deprecate/; +use Bio::EnsEMBL::Utils::Scalar qw/:assert/; + +my $GLOBAL_BASE_PATH; + +=head2 global_base_path + + Arg[1] : String; base path + Example : Bio::EnsEMBL::DBSQL::DataFileAdaptor->global_base_path('/base/path'); + Description : Stores a global value to be used when building data file paths + Returntype : String + Exceptions : None + +=cut + +sub global_base_path { + my ($class, $base_path) = @_; + return $GLOBAL_BASE_PATH unless $base_path; + $GLOBAL_BASE_PATH = $base_path; + return $GLOBAL_BASE_PATH; +} + +=head2 get_base_path + + Arg[1] : String; (optional) base path + Example : $dfa->get_base_path(); + Description : If given the path it will return that path; if not it consults + $self->global_base_path() for a value. As a last resort + it will look at the meta table for an entry keyed by + B<data_file.base_path> + Returntype : String + Exceptions : Thrown if nothing is found after consulting all three locations + +=cut + +sub get_base_path { + my ($self, $path) = @_; + return $path if defined $path; + my $global_base_path = $self->global_base_path(); + return $global_base_path if defined $global_base_path; + my $meta_base_path = $self->db()->get_MetaContainer()->single_value_by_key('data_file.base_path', 1); + return $meta_base_path if defined $meta_base_path; + throw "No base path discovered. Either provide a path, set a global using global_base_path() or specify 'data_file.base_path' in meta"; +} + +=head2 DataFile_to_extension + + Deprecated + Arg[1] : Bio::EnsEMBL::DataFile + Example : my $ext = $dfa->DataFile_to_extension($bam_df); + Description : Returns an expected extension for the given DataFile type + Returntype : Scalar of the expected file extension + Exceptions : Raised if the given file type is not understood + +=cut + +sub DataFile_to_extension { + my ($self, $df) = @_; + deprecate("Use DataFile_to_extensions() instead"); + my $extensions = $self->DataFile_to_extensions($df); + return $extensions->[0]; +} + +=head2 DataFile_to_extensions + + Arg[1] : Bio::EnsEMBL::DataFile + Example : my $exts = $dfa->DataFile_to_extensions($bam_df); + Description : Returns all expected extensions for the given DataFile type. The + first returned is the default extension + Returntype : ArrayRef + Exceptions : Raised if the given file type is not understood + +=cut + +sub DataFile_to_extensions { + my ($self, $df) = @_; + my $type = $df->file_type(); + my $extensions = { + BAM => ['bam', 'bam.bai'], +# BIGBED => 'bb', + BIGWIG => ['bw'], + VCF => ['vcf.gz', 'vcf.gz.tbi'], + }->{$type}; + throw sprintf(q{No extensions found for the type '%s'}, $type ) if ! $extensions; + return $extensions; +} + + +=head2 DataFile_to_adaptor + + Arg[1] : Bio::EnsEMBL::DataFile + Arg[2] : (optional) base path + Example : my $bam = $dfa->DataFile_to_adaptor($bam_df); + Description : Returns an adaptor instance which will access the given DataFile + Returntype : Scalar actual return depends upon the given file type + Exceptions : Raised if the given file type is not understood + +=cut + +sub DataFile_to_adaptor { + my ($self, $df, $base) = @_; + my $type = $df->file_type(); + my $dispatch = { + BAM => sub { + require Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor; + return Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor->new($df->path($base)); + }, + BIGBED => sub { + require Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor; + return Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor->new($df->path($base)); + }, + BIGWIG => sub { + require Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor; + return Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor->new($df->path($base)); + }, + VCF => sub { + require Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor; + return Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor->new($df->path($base)); + }, + }->{$type}; + throw sprintf(q{No handler found for the type '%s'}, $type ) if ! $dispatch; + return $dispatch->(); +} + +=head2 fetch_all_by_logic_name + + Args [1] : String $logic_name for the linked analysis + Example : my $dfs = $dfa->fetch_all_by_logic_name('bam_alignments'); + Description : Returns all DataFile entries linked to the given analysis + logic name + Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances + Exceptions : Thrown if logic name does not exist + +=cut + +sub fetch_all_by_logic_name { + my ($self, $logic_name) = @_; + my $analysis = $self->db()->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name); + throw "No analysis found for logic_name '${logic_name}'" if ! $analysis; + return $self->fetch_all_by_Analysis($analysis); +} + +=head2 fetch_all_by_Analysis + + Args [1] : Bio::EnsEMBL::Analysis $analysis to look up by + Example : my $dfs = $dfa->fetch_all_by_Analysis($analysis); + Description : Returns all DataFile entries linked to the given analysis + Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances + Exceptions : None + +=cut + +sub fetch_all_by_Analysis { + my ($self, $analysis) = @_; + assert_ref($analysis, 'Bio::EnsEMBL::Analysis', 'analysis'); + $self->bind_param_generic_fetch($analysis->dbID(), SQL_INTEGER); + return $self->generic_fetch('df.analysis_id =?'); +} + +=head2 fetch_all_by_CoordSystem + + Args [1] : Bio::EnsEMBL::CoordSystem $coord_system to look up by + Example : my $dfs = $dfa->fetch_all_by_CoordSystem($cs); + Description : Returns all DataFile entries linked to the given coordinate + system. Does B<not> support I<toplevel> + Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances + Exceptions : None + +=cut + +sub fetch_all_by_CoordSystem { + my ($self, $cs) = @_; + assert_ref($cs, 'Bio::EnsEMBL::CoordSystem', 'coord_system'); + $self->bind_param_generic_fetch($cs->dbID(), SQL_INTEGER); + return $self->generic_fetch('df.coord_system_id =?'); +} + +sub fetch_by_name_and_type { + my ($self, $name, $type) = @_; + $self->bind_param_generic_fetch($name, SQL_VARCHAR); + $self->bind_param_generic_fetch($type, SQL_VARCHAR); + my $results = $self->generic_fetch('df.name =? and df.file_type =?'); + return $results->[0] if @{$results}; + return; +} + +sub generic_fetch { + my ($self, $constraint) = @_; + $constraint ||= q{}; + + my $sql = <<'SQL'; +select df.data_file_id, df.coord_system_id, df.analysis_id, df.name, df.version_lock, df.absolute, df.url, df.file_type +from data_file df +join coord_system cs using (coord_system_id) +where cs.species_id =? +SQL + $sql .= 'AND '.$constraint if $constraint; + + my $params = $self->bind_param_generic_fetch(); + if(defined $params) { + $self->{'_bind_param_generic_fetch'} = (); + } + else { + $params = []; + } + unshift(@{$params}, $self->db()->species_id()); + + my $csa = $self->db()->get_CoordSystemAdaptor(); + my $aa = $self->db()->get_AnalysisAdaptor(); + + return $self->dbc()->sql_helper()->execute(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub { + my ($row) = @_; + my ($data_file_id, $coord_system_id, $analysis_id, $name, $version_lock, $absolute, $url, $file_type) = @{$row}; + my $hash = { + dbID => $data_file_id, + adaptor => $self, + coord_system => $csa->fetch_by_dbID($coord_system_id), + analysis => $aa->fetch_by_dbID($analysis_id), + name => $name, + version_lock => $version_lock, + absolute => $absolute, + file_type => $file_type, + }; + $hash->{url} = $url if $url; + return Bio::EnsEMBL::DataFile->new_fast($hash); + }); +} + +sub store { + my ($self, $df) = @_; + + assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); + + if ($df->is_stored($self->db())) { + return $df->dbID(); + } + + throw 'Analysis is not defined for this data file' if ! defined $df->analysis(); + throw 'Coord system is not defined for this data file' if ! defined $df->coord_system(); + + my $sql = <<'SQL'; +INSERT INTO data_file (coord_system_id, analysis_id, name, version_lock, absolute, url, file_type) +VALUES (?,?,?,?,?,?,?) +SQL + my $params = [ + [$df->coord_system()->dbID(), SQL_INTEGER], + [$df->analysis()->dbID(), SQL_INTEGER], + [$df->name(), SQL_VARCHAR], + [$df->version_lock(), SQL_INTEGER], + [$df->absolute(), SQL_INTEGER], + [$df->url(), SQL_VARCHAR], + [$df->file_type(), SQL_VARCHAR], + ]; + $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub { + my ( $sth, $dbh ) = @_; + $df->dbID($self->last_insert_id()); + return; + }); + $df->adaptor($self); + + return $df->dbID(); +} + +sub update { + my ($self, $df) = @_; + + assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); + + if (! $df->is_stored($self->db())) { + $self->store($df); + return; + } + + my $sql = <<'SQL'; +UPDATE data_file SET coord_system_id =?, analysis_id=?, name=?, version_lock=?, absolute=?, url=?, file_type=? +WHERE data_file_id =? +SQL + my $params = [ + [$df->coord_system()->dbID(), SQL_INTEGER], + [$df->analysis()->dbID(), SQL_INTEGER], + [$df->name(), SQL_VARCHAR], + [$df->version_lock(), SQL_INTEGER], + [$df->absolute(), SQL_INTEGER], + [$df->url(), SQL_VARCHAR], + [$df->file_type(), SQL_VARCHAR], + [$df->dbID(), SQL_INTEGER], + ]; + $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params); + return; +} + +sub delete { + my ($self, $df) = @_; + + assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); + + if (! $df->is_stored($self->db())) { + throw "Cannot delete the data file if it has not already been stored in this database"; + } + + $self->dbc()->sql_helper()->execute_update( + -SQL => 'DELETE from data_file where data_file_id =?', + -PARAMS => [[$df->dbID(), SQL_INTEGER]], + ); + + return; +} + +sub _tables { + my ($self) = @_; + return ( + [qw/data_file df/] + ); +} + +1;