Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 package Bio::EnsEMBL::DBSQL::DataFileAdaptor; | |
| 2 | |
| 3 =pod | |
| 4 | |
| 5 =head1 LICENSE | |
| 6 | |
| 7 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 8 Genome Research Limited. All rights reserved. | |
| 9 | |
| 10 This software is distributed under a modified Apache license. | |
| 11 For license details, please see | |
| 12 | |
| 13 http://www.ensembl.org/info/about/code_licence.html | |
| 14 | |
| 15 =head1 CONTACT | |
| 16 | |
| 17 Please email comments or questions to the public Ensembl | |
| 18 developers list at <dev@ensembl.org>. | |
| 19 | |
| 20 Questions may also be sent to the Ensembl help desk at | |
| 21 <helpdesk@ensembl.org>. | |
| 22 | |
| 23 =head1 NAME | |
| 24 | |
| 25 Bio::EnsEMBL::DBSQL::DataFileAdaptor | |
| 26 | |
| 27 =head1 SYNOPSIS | |
| 28 | |
| 29 my $dfa = $dba->get_DataFileAdaptor(); | |
| 30 my $file = $dfa->fetch_by_dbID(1); | |
| 31 my $files = $dfa->fetch_all(); | |
| 32 | |
| 33 my $logic_name_files = $dfa->fetch_all_by_logic_name('bam_alignments'); | |
| 34 | |
| 35 =head1 DESCRIPTION | |
| 36 | |
| 37 Provides a database wrapper to store the locations of files and to pull these | |
| 38 records back out. DataFile objects can only provide basic information but they | |
| 39 can return an intended external database adaptor which can be used to | |
| 40 parse the information. This system assumes nothing about the file just that | |
| 41 your parser can access it. | |
| 42 | |
| 43 Files are supported over any protocol your parser supports and locations can be | |
| 44 made absolute, built on the fly or versioned. | |
| 45 | |
| 46 =head1 METHODS | |
| 47 | |
| 48 =cut | |
| 49 | |
| 50 use strict; | |
| 51 use warnings; | |
| 52 | |
| 53 use base qw/Bio::EnsEMBL::DBSQL::BaseAdaptor/; | |
| 54 | |
| 55 use Bio::EnsEMBL::DataFile; | |
| 56 use Bio::EnsEMBL::DBSQL::BaseAdaptor; | |
| 57 use Bio::EnsEMBL::Utils::Exception qw/throw warning deprecate/; | |
| 58 use Bio::EnsEMBL::Utils::Scalar qw/:assert/; | |
| 59 | |
| 60 my $GLOBAL_BASE_PATH; | |
| 61 | |
| 62 =head2 global_base_path | |
| 63 | |
| 64 Arg[1] : String; base path | |
| 65 Example : Bio::EnsEMBL::DBSQL::DataFileAdaptor->global_base_path('/base/path'); | |
| 66 Description : Stores a global value to be used when building data file paths | |
| 67 Returntype : String | |
| 68 Exceptions : None | |
| 69 | |
| 70 =cut | |
| 71 | |
| 72 sub global_base_path { | |
| 73 my ($class, $base_path) = @_; | |
| 74 return $GLOBAL_BASE_PATH unless $base_path; | |
| 75 $GLOBAL_BASE_PATH = $base_path; | |
| 76 return $GLOBAL_BASE_PATH; | |
| 77 } | |
| 78 | |
| 79 =head2 get_base_path | |
| 80 | |
| 81 Arg[1] : String; (optional) base path | |
| 82 Example : $dfa->get_base_path(); | |
| 83 Description : If given the path it will return that path; if not it consults | |
| 84 $self->global_base_path() for a value. As a last resort | |
| 85 it will look at the meta table for an entry keyed by | |
| 86 B<data_file.base_path> | |
| 87 Returntype : String | |
| 88 Exceptions : Thrown if nothing is found after consulting all three locations | |
| 89 | |
| 90 =cut | |
| 91 | |
| 92 sub get_base_path { | |
| 93 my ($self, $path) = @_; | |
| 94 return $path if defined $path; | |
| 95 my $global_base_path = $self->global_base_path(); | |
| 96 return $global_base_path if defined $global_base_path; | |
| 97 my $meta_base_path = $self->db()->get_MetaContainer()->single_value_by_key('data_file.base_path', 1); | |
| 98 return $meta_base_path if defined $meta_base_path; | |
| 99 throw "No base path discovered. Either provide a path, set a global using global_base_path() or specify 'data_file.base_path' in meta"; | |
| 100 } | |
| 101 | |
| 102 =head2 DataFile_to_extension | |
| 103 | |
| 104 Deprecated | |
| 105 Arg[1] : Bio::EnsEMBL::DataFile | |
| 106 Example : my $ext = $dfa->DataFile_to_extension($bam_df); | |
| 107 Description : Returns an expected extension for the given DataFile type | |
| 108 Returntype : Scalar of the expected file extension | |
| 109 Exceptions : Raised if the given file type is not understood | |
| 110 | |
| 111 =cut | |
| 112 | |
| 113 sub DataFile_to_extension { | |
| 114 my ($self, $df) = @_; | |
| 115 deprecate("Use DataFile_to_extensions() instead"); | |
| 116 my $extensions = $self->DataFile_to_extensions($df); | |
| 117 return $extensions->[0]; | |
| 118 } | |
| 119 | |
| 120 =head2 DataFile_to_extensions | |
| 121 | |
| 122 Arg[1] : Bio::EnsEMBL::DataFile | |
| 123 Example : my $exts = $dfa->DataFile_to_extensions($bam_df); | |
| 124 Description : Returns all expected extensions for the given DataFile type. The | |
| 125 first returned is the default extension | |
| 126 Returntype : ArrayRef | |
| 127 Exceptions : Raised if the given file type is not understood | |
| 128 | |
| 129 =cut | |
| 130 | |
| 131 sub DataFile_to_extensions { | |
| 132 my ($self, $df) = @_; | |
| 133 my $type = $df->file_type(); | |
| 134 my $extensions = { | |
| 135 BAM => ['bam', 'bam.bai'], | |
| 136 # BIGBED => 'bb', | |
| 137 BIGWIG => ['bw'], | |
| 138 VCF => ['vcf.gz', 'vcf.gz.tbi'], | |
| 139 }->{$type}; | |
| 140 throw sprintf(q{No extensions found for the type '%s'}, $type ) if ! $extensions; | |
| 141 return $extensions; | |
| 142 } | |
| 143 | |
| 144 | |
| 145 =head2 DataFile_to_adaptor | |
| 146 | |
| 147 Arg[1] : Bio::EnsEMBL::DataFile | |
| 148 Arg[2] : (optional) base path | |
| 149 Example : my $bam = $dfa->DataFile_to_adaptor($bam_df); | |
| 150 Description : Returns an adaptor instance which will access the given DataFile | |
| 151 Returntype : Scalar actual return depends upon the given file type | |
| 152 Exceptions : Raised if the given file type is not understood | |
| 153 | |
| 154 =cut | |
| 155 | |
| 156 sub DataFile_to_adaptor { | |
| 157 my ($self, $df, $base) = @_; | |
| 158 my $type = $df->file_type(); | |
| 159 my $dispatch = { | |
| 160 BAM => sub { | |
| 161 require Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor; | |
| 162 return Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor->new($df->path($base)); | |
| 163 }, | |
| 164 BIGBED => sub { | |
| 165 require Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor; | |
| 166 return Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor->new($df->path($base)); | |
| 167 }, | |
| 168 BIGWIG => sub { | |
| 169 require Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor; | |
| 170 return Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor->new($df->path($base)); | |
| 171 }, | |
| 172 VCF => sub { | |
| 173 require Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor; | |
| 174 return Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor->new($df->path($base)); | |
| 175 }, | |
| 176 }->{$type}; | |
| 177 throw sprintf(q{No handler found for the type '%s'}, $type ) if ! $dispatch; | |
| 178 return $dispatch->(); | |
| 179 } | |
| 180 | |
| 181 =head2 fetch_all_by_logic_name | |
| 182 | |
| 183 Args [1] : String $logic_name for the linked analysis | |
| 184 Example : my $dfs = $dfa->fetch_all_by_logic_name('bam_alignments'); | |
| 185 Description : Returns all DataFile entries linked to the given analysis | |
| 186 logic name | |
| 187 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances | |
| 188 Exceptions : Thrown if logic name does not exist | |
| 189 | |
| 190 =cut | |
| 191 | |
| 192 sub fetch_all_by_logic_name { | |
| 193 my ($self, $logic_name) = @_; | |
| 194 my $analysis = $self->db()->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name); | |
| 195 throw "No analysis found for logic_name '${logic_name}'" if ! $analysis; | |
| 196 return $self->fetch_all_by_Analysis($analysis); | |
| 197 } | |
| 198 | |
| 199 =head2 fetch_all_by_Analysis | |
| 200 | |
| 201 Args [1] : Bio::EnsEMBL::Analysis $analysis to look up by | |
| 202 Example : my $dfs = $dfa->fetch_all_by_Analysis($analysis); | |
| 203 Description : Returns all DataFile entries linked to the given analysis | |
| 204 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances | |
| 205 Exceptions : None | |
| 206 | |
| 207 =cut | |
| 208 | |
| 209 sub fetch_all_by_Analysis { | |
| 210 my ($self, $analysis) = @_; | |
| 211 assert_ref($analysis, 'Bio::EnsEMBL::Analysis', 'analysis'); | |
| 212 $self->bind_param_generic_fetch($analysis->dbID(), SQL_INTEGER); | |
| 213 return $self->generic_fetch('df.analysis_id =?'); | |
| 214 } | |
| 215 | |
| 216 =head2 fetch_all_by_CoordSystem | |
| 217 | |
| 218 Args [1] : Bio::EnsEMBL::CoordSystem $coord_system to look up by | |
| 219 Example : my $dfs = $dfa->fetch_all_by_CoordSystem($cs); | |
| 220 Description : Returns all DataFile entries linked to the given coordinate | |
| 221 system. Does B<not> support I<toplevel> | |
| 222 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances | |
| 223 Exceptions : None | |
| 224 | |
| 225 =cut | |
| 226 | |
| 227 sub fetch_all_by_CoordSystem { | |
| 228 my ($self, $cs) = @_; | |
| 229 assert_ref($cs, 'Bio::EnsEMBL::CoordSystem', 'coord_system'); | |
| 230 $self->bind_param_generic_fetch($cs->dbID(), SQL_INTEGER); | |
| 231 return $self->generic_fetch('df.coord_system_id =?'); | |
| 232 } | |
| 233 | |
| 234 sub fetch_by_name_and_type { | |
| 235 my ($self, $name, $type) = @_; | |
| 236 $self->bind_param_generic_fetch($name, SQL_VARCHAR); | |
| 237 $self->bind_param_generic_fetch($type, SQL_VARCHAR); | |
| 238 my $results = $self->generic_fetch('df.name =? and df.file_type =?'); | |
| 239 return $results->[0] if @{$results}; | |
| 240 return; | |
| 241 } | |
| 242 | |
| 243 sub generic_fetch { | |
| 244 my ($self, $constraint) = @_; | |
| 245 $constraint ||= q{}; | |
| 246 | |
| 247 my $sql = <<'SQL'; | |
| 248 select df.data_file_id, df.coord_system_id, df.analysis_id, df.name, df.version_lock, df.absolute, df.url, df.file_type | |
| 249 from data_file df | |
| 250 join coord_system cs using (coord_system_id) | |
| 251 where cs.species_id =? | |
| 252 SQL | |
| 253 $sql .= 'AND '.$constraint if $constraint; | |
| 254 | |
| 255 my $params = $self->bind_param_generic_fetch(); | |
| 256 if(defined $params) { | |
| 257 $self->{'_bind_param_generic_fetch'} = (); | |
| 258 } | |
| 259 else { | |
| 260 $params = []; | |
| 261 } | |
| 262 unshift(@{$params}, $self->db()->species_id()); | |
| 263 | |
| 264 my $csa = $self->db()->get_CoordSystemAdaptor(); | |
| 265 my $aa = $self->db()->get_AnalysisAdaptor(); | |
| 266 | |
| 267 return $self->dbc()->sql_helper()->execute(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub { | |
| 268 my ($row) = @_; | |
| 269 my ($data_file_id, $coord_system_id, $analysis_id, $name, $version_lock, $absolute, $url, $file_type) = @{$row}; | |
| 270 my $hash = { | |
| 271 dbID => $data_file_id, | |
| 272 adaptor => $self, | |
| 273 coord_system => $csa->fetch_by_dbID($coord_system_id), | |
| 274 analysis => $aa->fetch_by_dbID($analysis_id), | |
| 275 name => $name, | |
| 276 version_lock => $version_lock, | |
| 277 absolute => $absolute, | |
| 278 file_type => $file_type, | |
| 279 }; | |
| 280 $hash->{url} = $url if $url; | |
| 281 return Bio::EnsEMBL::DataFile->new_fast($hash); | |
| 282 }); | |
| 283 } | |
| 284 | |
| 285 sub store { | |
| 286 my ($self, $df) = @_; | |
| 287 | |
| 288 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); | |
| 289 | |
| 290 if ($df->is_stored($self->db())) { | |
| 291 return $df->dbID(); | |
| 292 } | |
| 293 | |
| 294 throw 'Analysis is not defined for this data file' if ! defined $df->analysis(); | |
| 295 throw 'Coord system is not defined for this data file' if ! defined $df->coord_system(); | |
| 296 | |
| 297 my $sql = <<'SQL'; | |
| 298 INSERT INTO data_file (coord_system_id, analysis_id, name, version_lock, absolute, url, file_type) | |
| 299 VALUES (?,?,?,?,?,?,?) | |
| 300 SQL | |
| 301 my $params = [ | |
| 302 [$df->coord_system()->dbID(), SQL_INTEGER], | |
| 303 [$df->analysis()->dbID(), SQL_INTEGER], | |
| 304 [$df->name(), SQL_VARCHAR], | |
| 305 [$df->version_lock(), SQL_INTEGER], | |
| 306 [$df->absolute(), SQL_INTEGER], | |
| 307 [$df->url(), SQL_VARCHAR], | |
| 308 [$df->file_type(), SQL_VARCHAR], | |
| 309 ]; | |
| 310 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub { | |
| 311 my ( $sth, $dbh ) = @_; | |
| 312 $df->dbID($self->last_insert_id()); | |
| 313 return; | |
| 314 }); | |
| 315 $df->adaptor($self); | |
| 316 | |
| 317 return $df->dbID(); | |
| 318 } | |
| 319 | |
| 320 sub update { | |
| 321 my ($self, $df) = @_; | |
| 322 | |
| 323 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); | |
| 324 | |
| 325 if (! $df->is_stored($self->db())) { | |
| 326 $self->store($df); | |
| 327 return; | |
| 328 } | |
| 329 | |
| 330 my $sql = <<'SQL'; | |
| 331 UPDATE data_file SET coord_system_id =?, analysis_id=?, name=?, version_lock=?, absolute=?, url=?, file_type=? | |
| 332 WHERE data_file_id =? | |
| 333 SQL | |
| 334 my $params = [ | |
| 335 [$df->coord_system()->dbID(), SQL_INTEGER], | |
| 336 [$df->analysis()->dbID(), SQL_INTEGER], | |
| 337 [$df->name(), SQL_VARCHAR], | |
| 338 [$df->version_lock(), SQL_INTEGER], | |
| 339 [$df->absolute(), SQL_INTEGER], | |
| 340 [$df->url(), SQL_VARCHAR], | |
| 341 [$df->file_type(), SQL_VARCHAR], | |
| 342 [$df->dbID(), SQL_INTEGER], | |
| 343 ]; | |
| 344 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params); | |
| 345 return; | |
| 346 } | |
| 347 | |
| 348 sub delete { | |
| 349 my ($self, $df) = @_; | |
| 350 | |
| 351 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); | |
| 352 | |
| 353 if (! $df->is_stored($self->db())) { | |
| 354 throw "Cannot delete the data file if it has not already been stored in this database"; | |
| 355 } | |
| 356 | |
| 357 $self->dbc()->sql_helper()->execute_update( | |
| 358 -SQL => 'DELETE from data_file where data_file_id =?', | |
| 359 -PARAMS => [[$df->dbID(), SQL_INTEGER]], | |
| 360 ); | |
| 361 | |
| 362 return; | |
| 363 } | |
| 364 | |
| 365 sub _tables { | |
| 366 my ($self) = @_; | |
| 367 return ( | |
| 368 [qw/data_file df/] | |
| 369 ); | |
| 370 } | |
| 371 | |
| 372 1; |
