0
|
1 package Bio::EnsEMBL::DBSQL::DataFileAdaptor;
|
|
2
|
|
3 =pod
|
|
4
|
|
5 =head1 LICENSE
|
|
6
|
|
7 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
8 Genome Research Limited. All rights reserved.
|
|
9
|
|
10 This software is distributed under a modified Apache license.
|
|
11 For license details, please see
|
|
12
|
|
13 http://www.ensembl.org/info/about/code_licence.html
|
|
14
|
|
15 =head1 CONTACT
|
|
16
|
|
17 Please email comments or questions to the public Ensembl
|
|
18 developers list at <dev@ensembl.org>.
|
|
19
|
|
20 Questions may also be sent to the Ensembl help desk at
|
|
21 <helpdesk@ensembl.org>.
|
|
22
|
|
23 =head1 NAME
|
|
24
|
|
25 Bio::EnsEMBL::DBSQL::DataFileAdaptor
|
|
26
|
|
27 =head1 SYNOPSIS
|
|
28
|
|
29 my $dfa = $dba->get_DataFileAdaptor();
|
|
30 my $file = $dfa->fetch_by_dbID(1);
|
|
31 my $files = $dfa->fetch_all();
|
|
32
|
|
33 my $logic_name_files = $dfa->fetch_all_by_logic_name('bam_alignments');
|
|
34
|
|
35 =head1 DESCRIPTION
|
|
36
|
|
37 Provides a database wrapper to store the locations of files and to pull these
|
|
38 records back out. DataFile objects can only provide basic information but they
|
|
39 can return an intended external database adaptor which can be used to
|
|
40 parse the information. This system assumes nothing about the file just that
|
|
41 your parser can access it.
|
|
42
|
|
43 Files are supported over any protocol your parser supports and locations can be
|
|
44 made absolute, built on the fly or versioned.
|
|
45
|
|
46 =head1 METHODS
|
|
47
|
|
48 =cut
|
|
49
|
|
50 use strict;
|
|
51 use warnings;
|
|
52
|
|
53 use base qw/Bio::EnsEMBL::DBSQL::BaseAdaptor/;
|
|
54
|
|
55 use Bio::EnsEMBL::DataFile;
|
|
56 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
|
|
57 use Bio::EnsEMBL::Utils::Exception qw/throw warning deprecate/;
|
|
58 use Bio::EnsEMBL::Utils::Scalar qw/:assert/;
|
|
59
|
|
60 my $GLOBAL_BASE_PATH;
|
|
61
|
|
62 =head2 global_base_path
|
|
63
|
|
64 Arg[1] : String; base path
|
|
65 Example : Bio::EnsEMBL::DBSQL::DataFileAdaptor->global_base_path('/base/path');
|
|
66 Description : Stores a global value to be used when building data file paths
|
|
67 Returntype : String
|
|
68 Exceptions : None
|
|
69
|
|
70 =cut
|
|
71
|
|
72 sub global_base_path {
|
|
73 my ($class, $base_path) = @_;
|
|
74 return $GLOBAL_BASE_PATH unless $base_path;
|
|
75 $GLOBAL_BASE_PATH = $base_path;
|
|
76 return $GLOBAL_BASE_PATH;
|
|
77 }
|
|
78
|
|
79 =head2 get_base_path
|
|
80
|
|
81 Arg[1] : String; (optional) base path
|
|
82 Example : $dfa->get_base_path();
|
|
83 Description : If given the path it will return that path; if not it consults
|
|
84 $self->global_base_path() for a value. As a last resort
|
|
85 it will look at the meta table for an entry keyed by
|
|
86 B<data_file.base_path>
|
|
87 Returntype : String
|
|
88 Exceptions : Thrown if nothing is found after consulting all three locations
|
|
89
|
|
90 =cut
|
|
91
|
|
92 sub get_base_path {
|
|
93 my ($self, $path) = @_;
|
|
94 return $path if defined $path;
|
|
95 my $global_base_path = $self->global_base_path();
|
|
96 return $global_base_path if defined $global_base_path;
|
|
97 my $meta_base_path = $self->db()->get_MetaContainer()->single_value_by_key('data_file.base_path', 1);
|
|
98 return $meta_base_path if defined $meta_base_path;
|
|
99 throw "No base path discovered. Either provide a path, set a global using global_base_path() or specify 'data_file.base_path' in meta";
|
|
100 }
|
|
101
|
|
102 =head2 DataFile_to_extension
|
|
103
|
|
104 Deprecated
|
|
105 Arg[1] : Bio::EnsEMBL::DataFile
|
|
106 Example : my $ext = $dfa->DataFile_to_extension($bam_df);
|
|
107 Description : Returns an expected extension for the given DataFile type
|
|
108 Returntype : Scalar of the expected file extension
|
|
109 Exceptions : Raised if the given file type is not understood
|
|
110
|
|
111 =cut
|
|
112
|
|
113 sub DataFile_to_extension {
|
|
114 my ($self, $df) = @_;
|
|
115 deprecate("Use DataFile_to_extensions() instead");
|
|
116 my $extensions = $self->DataFile_to_extensions($df);
|
|
117 return $extensions->[0];
|
|
118 }
|
|
119
|
|
120 =head2 DataFile_to_extensions
|
|
121
|
|
122 Arg[1] : Bio::EnsEMBL::DataFile
|
|
123 Example : my $exts = $dfa->DataFile_to_extensions($bam_df);
|
|
124 Description : Returns all expected extensions for the given DataFile type. The
|
|
125 first returned is the default extension
|
|
126 Returntype : ArrayRef
|
|
127 Exceptions : Raised if the given file type is not understood
|
|
128
|
|
129 =cut
|
|
130
|
|
131 sub DataFile_to_extensions {
|
|
132 my ($self, $df) = @_;
|
|
133 my $type = $df->file_type();
|
|
134 my $extensions = {
|
|
135 BAM => ['bam', 'bam.bai'],
|
|
136 # BIGBED => 'bb',
|
|
137 BIGWIG => ['bw'],
|
|
138 VCF => ['vcf.gz', 'vcf.gz.tbi'],
|
|
139 }->{$type};
|
|
140 throw sprintf(q{No extensions found for the type '%s'}, $type ) if ! $extensions;
|
|
141 return $extensions;
|
|
142 }
|
|
143
|
|
144
|
|
145 =head2 DataFile_to_adaptor
|
|
146
|
|
147 Arg[1] : Bio::EnsEMBL::DataFile
|
|
148 Arg[2] : (optional) base path
|
|
149 Example : my $bam = $dfa->DataFile_to_adaptor($bam_df);
|
|
150 Description : Returns an adaptor instance which will access the given DataFile
|
|
151 Returntype : Scalar actual return depends upon the given file type
|
|
152 Exceptions : Raised if the given file type is not understood
|
|
153
|
|
154 =cut
|
|
155
|
|
156 sub DataFile_to_adaptor {
|
|
157 my ($self, $df, $base) = @_;
|
|
158 my $type = $df->file_type();
|
|
159 my $dispatch = {
|
|
160 BAM => sub {
|
|
161 require Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor;
|
|
162 return Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor->new($df->path($base));
|
|
163 },
|
|
164 BIGBED => sub {
|
|
165 require Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor;
|
|
166 return Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor->new($df->path($base));
|
|
167 },
|
|
168 BIGWIG => sub {
|
|
169 require Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor;
|
|
170 return Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor->new($df->path($base));
|
|
171 },
|
|
172 VCF => sub {
|
|
173 require Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor;
|
|
174 return Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor->new($df->path($base));
|
|
175 },
|
|
176 }->{$type};
|
|
177 throw sprintf(q{No handler found for the type '%s'}, $type ) if ! $dispatch;
|
|
178 return $dispatch->();
|
|
179 }
|
|
180
|
|
181 =head2 fetch_all_by_logic_name
|
|
182
|
|
183 Args [1] : String $logic_name for the linked analysis
|
|
184 Example : my $dfs = $dfa->fetch_all_by_logic_name('bam_alignments');
|
|
185 Description : Returns all DataFile entries linked to the given analysis
|
|
186 logic name
|
|
187 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances
|
|
188 Exceptions : Thrown if logic name does not exist
|
|
189
|
|
190 =cut
|
|
191
|
|
192 sub fetch_all_by_logic_name {
|
|
193 my ($self, $logic_name) = @_;
|
|
194 my $analysis = $self->db()->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name);
|
|
195 throw "No analysis found for logic_name '${logic_name}'" if ! $analysis;
|
|
196 return $self->fetch_all_by_Analysis($analysis);
|
|
197 }
|
|
198
|
|
199 =head2 fetch_all_by_Analysis
|
|
200
|
|
201 Args [1] : Bio::EnsEMBL::Analysis $analysis to look up by
|
|
202 Example : my $dfs = $dfa->fetch_all_by_Analysis($analysis);
|
|
203 Description : Returns all DataFile entries linked to the given analysis
|
|
204 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances
|
|
205 Exceptions : None
|
|
206
|
|
207 =cut
|
|
208
|
|
209 sub fetch_all_by_Analysis {
|
|
210 my ($self, $analysis) = @_;
|
|
211 assert_ref($analysis, 'Bio::EnsEMBL::Analysis', 'analysis');
|
|
212 $self->bind_param_generic_fetch($analysis->dbID(), SQL_INTEGER);
|
|
213 return $self->generic_fetch('df.analysis_id =?');
|
|
214 }
|
|
215
|
|
216 =head2 fetch_all_by_CoordSystem
|
|
217
|
|
218 Args [1] : Bio::EnsEMBL::CoordSystem $coord_system to look up by
|
|
219 Example : my $dfs = $dfa->fetch_all_by_CoordSystem($cs);
|
|
220 Description : Returns all DataFile entries linked to the given coordinate
|
|
221 system. Does B<not> support I<toplevel>
|
|
222 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances
|
|
223 Exceptions : None
|
|
224
|
|
225 =cut
|
|
226
|
|
227 sub fetch_all_by_CoordSystem {
|
|
228 my ($self, $cs) = @_;
|
|
229 assert_ref($cs, 'Bio::EnsEMBL::CoordSystem', 'coord_system');
|
|
230 $self->bind_param_generic_fetch($cs->dbID(), SQL_INTEGER);
|
|
231 return $self->generic_fetch('df.coord_system_id =?');
|
|
232 }
|
|
233
|
|
234 sub fetch_by_name_and_type {
|
|
235 my ($self, $name, $type) = @_;
|
|
236 $self->bind_param_generic_fetch($name, SQL_VARCHAR);
|
|
237 $self->bind_param_generic_fetch($type, SQL_VARCHAR);
|
|
238 my $results = $self->generic_fetch('df.name =? and df.file_type =?');
|
|
239 return $results->[0] if @{$results};
|
|
240 return;
|
|
241 }
|
|
242
|
|
243 sub generic_fetch {
|
|
244 my ($self, $constraint) = @_;
|
|
245 $constraint ||= q{};
|
|
246
|
|
247 my $sql = <<'SQL';
|
|
248 select df.data_file_id, df.coord_system_id, df.analysis_id, df.name, df.version_lock, df.absolute, df.url, df.file_type
|
|
249 from data_file df
|
|
250 join coord_system cs using (coord_system_id)
|
|
251 where cs.species_id =?
|
|
252 SQL
|
|
253 $sql .= 'AND '.$constraint if $constraint;
|
|
254
|
|
255 my $params = $self->bind_param_generic_fetch();
|
|
256 if(defined $params) {
|
|
257 $self->{'_bind_param_generic_fetch'} = ();
|
|
258 }
|
|
259 else {
|
|
260 $params = [];
|
|
261 }
|
|
262 unshift(@{$params}, $self->db()->species_id());
|
|
263
|
|
264 my $csa = $self->db()->get_CoordSystemAdaptor();
|
|
265 my $aa = $self->db()->get_AnalysisAdaptor();
|
|
266
|
|
267 return $self->dbc()->sql_helper()->execute(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub {
|
|
268 my ($row) = @_;
|
|
269 my ($data_file_id, $coord_system_id, $analysis_id, $name, $version_lock, $absolute, $url, $file_type) = @{$row};
|
|
270 my $hash = {
|
|
271 dbID => $data_file_id,
|
|
272 adaptor => $self,
|
|
273 coord_system => $csa->fetch_by_dbID($coord_system_id),
|
|
274 analysis => $aa->fetch_by_dbID($analysis_id),
|
|
275 name => $name,
|
|
276 version_lock => $version_lock,
|
|
277 absolute => $absolute,
|
|
278 file_type => $file_type,
|
|
279 };
|
|
280 $hash->{url} = $url if $url;
|
|
281 return Bio::EnsEMBL::DataFile->new_fast($hash);
|
|
282 });
|
|
283 }
|
|
284
|
|
285 sub store {
|
|
286 my ($self, $df) = @_;
|
|
287
|
|
288 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile');
|
|
289
|
|
290 if ($df->is_stored($self->db())) {
|
|
291 return $df->dbID();
|
|
292 }
|
|
293
|
|
294 throw 'Analysis is not defined for this data file' if ! defined $df->analysis();
|
|
295 throw 'Coord system is not defined for this data file' if ! defined $df->coord_system();
|
|
296
|
|
297 my $sql = <<'SQL';
|
|
298 INSERT INTO data_file (coord_system_id, analysis_id, name, version_lock, absolute, url, file_type)
|
|
299 VALUES (?,?,?,?,?,?,?)
|
|
300 SQL
|
|
301 my $params = [
|
|
302 [$df->coord_system()->dbID(), SQL_INTEGER],
|
|
303 [$df->analysis()->dbID(), SQL_INTEGER],
|
|
304 [$df->name(), SQL_VARCHAR],
|
|
305 [$df->version_lock(), SQL_INTEGER],
|
|
306 [$df->absolute(), SQL_INTEGER],
|
|
307 [$df->url(), SQL_VARCHAR],
|
|
308 [$df->file_type(), SQL_VARCHAR],
|
|
309 ];
|
|
310 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub {
|
|
311 my ( $sth, $dbh ) = @_;
|
|
312 $df->dbID($self->last_insert_id());
|
|
313 return;
|
|
314 });
|
|
315 $df->adaptor($self);
|
|
316
|
|
317 return $df->dbID();
|
|
318 }
|
|
319
|
|
320 sub update {
|
|
321 my ($self, $df) = @_;
|
|
322
|
|
323 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile');
|
|
324
|
|
325 if (! $df->is_stored($self->db())) {
|
|
326 $self->store($df);
|
|
327 return;
|
|
328 }
|
|
329
|
|
330 my $sql = <<'SQL';
|
|
331 UPDATE data_file SET coord_system_id =?, analysis_id=?, name=?, version_lock=?, absolute=?, url=?, file_type=?
|
|
332 WHERE data_file_id =?
|
|
333 SQL
|
|
334 my $params = [
|
|
335 [$df->coord_system()->dbID(), SQL_INTEGER],
|
|
336 [$df->analysis()->dbID(), SQL_INTEGER],
|
|
337 [$df->name(), SQL_VARCHAR],
|
|
338 [$df->version_lock(), SQL_INTEGER],
|
|
339 [$df->absolute(), SQL_INTEGER],
|
|
340 [$df->url(), SQL_VARCHAR],
|
|
341 [$df->file_type(), SQL_VARCHAR],
|
|
342 [$df->dbID(), SQL_INTEGER],
|
|
343 ];
|
|
344 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params);
|
|
345 return;
|
|
346 }
|
|
347
|
|
348 sub delete {
|
|
349 my ($self, $df) = @_;
|
|
350
|
|
351 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile');
|
|
352
|
|
353 if (! $df->is_stored($self->db())) {
|
|
354 throw "Cannot delete the data file if it has not already been stored in this database";
|
|
355 }
|
|
356
|
|
357 $self->dbc()->sql_helper()->execute_update(
|
|
358 -SQL => 'DELETE from data_file where data_file_id =?',
|
|
359 -PARAMS => [[$df->dbID(), SQL_INTEGER]],
|
|
360 );
|
|
361
|
|
362 return;
|
|
363 }
|
|
364
|
|
365 sub _tables {
|
|
366 my ($self) = @_;
|
|
367 return (
|
|
368 [qw/data_file df/]
|
|
369 );
|
|
370 }
|
|
371
|
|
372 1;
|