comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 package Bio::EnsEMBL::DBSQL::DataFileAdaptor;
2
3 =pod
4
5 =head1 LICENSE
6
7 Copyright (c) 1999-2012 The European Bioinformatics Institute and
8 Genome Research Limited. All rights reserved.
9
10 This software is distributed under a modified Apache license.
11 For license details, please see
12
13 http://www.ensembl.org/info/about/code_licence.html
14
15 =head1 CONTACT
16
17 Please email comments or questions to the public Ensembl
18 developers list at <dev@ensembl.org>.
19
20 Questions may also be sent to the Ensembl help desk at
21 <helpdesk@ensembl.org>.
22
23 =head1 NAME
24
25 Bio::EnsEMBL::DBSQL::DataFileAdaptor
26
27 =head1 SYNOPSIS
28
29 my $dfa = $dba->get_DataFileAdaptor();
30 my $file = $dfa->fetch_by_dbID(1);
31 my $files = $dfa->fetch_all();
32
33 my $logic_name_files = $dfa->fetch_all_by_logic_name('bam_alignments');
34
35 =head1 DESCRIPTION
36
37 Provides a database wrapper to store the locations of files and to pull these
38 records back out. DataFile objects can only provide basic information but they
39 can return an intended external database adaptor which can be used to
40 parse the information. This system assumes nothing about the file just that
41 your parser can access it.
42
43 Files are supported over any protocol your parser supports and locations can be
44 made absolute, built on the fly or versioned.
45
46 =head1 METHODS
47
48 =cut
49
50 use strict;
51 use warnings;
52
53 use base qw/Bio::EnsEMBL::DBSQL::BaseAdaptor/;
54
55 use Bio::EnsEMBL::DataFile;
56 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
57 use Bio::EnsEMBL::Utils::Exception qw/throw warning deprecate/;
58 use Bio::EnsEMBL::Utils::Scalar qw/:assert/;
59
60 my $GLOBAL_BASE_PATH;
61
62 =head2 global_base_path
63
64 Arg[1] : String; base path
65 Example : Bio::EnsEMBL::DBSQL::DataFileAdaptor->global_base_path('/base/path');
66 Description : Stores a global value to be used when building data file paths
67 Returntype : String
68 Exceptions : None
69
70 =cut
71
72 sub global_base_path {
73 my ($class, $base_path) = @_;
74 return $GLOBAL_BASE_PATH unless $base_path;
75 $GLOBAL_BASE_PATH = $base_path;
76 return $GLOBAL_BASE_PATH;
77 }
78
79 =head2 get_base_path
80
81 Arg[1] : String; (optional) base path
82 Example : $dfa->get_base_path();
83 Description : If given the path it will return that path; if not it consults
84 $self->global_base_path() for a value. As a last resort
85 it will look at the meta table for an entry keyed by
86 B<data_file.base_path>
87 Returntype : String
88 Exceptions : Thrown if nothing is found after consulting all three locations
89
90 =cut
91
92 sub get_base_path {
93 my ($self, $path) = @_;
94 return $path if defined $path;
95 my $global_base_path = $self->global_base_path();
96 return $global_base_path if defined $global_base_path;
97 my $meta_base_path = $self->db()->get_MetaContainer()->single_value_by_key('data_file.base_path', 1);
98 return $meta_base_path if defined $meta_base_path;
99 throw "No base path discovered. Either provide a path, set a global using global_base_path() or specify 'data_file.base_path' in meta";
100 }
101
102 =head2 DataFile_to_extension
103
104 Deprecated
105 Arg[1] : Bio::EnsEMBL::DataFile
106 Example : my $ext = $dfa->DataFile_to_extension($bam_df);
107 Description : Returns an expected extension for the given DataFile type
108 Returntype : Scalar of the expected file extension
109 Exceptions : Raised if the given file type is not understood
110
111 =cut
112
113 sub DataFile_to_extension {
114 my ($self, $df) = @_;
115 deprecate("Use DataFile_to_extensions() instead");
116 my $extensions = $self->DataFile_to_extensions($df);
117 return $extensions->[0];
118 }
119
120 =head2 DataFile_to_extensions
121
122 Arg[1] : Bio::EnsEMBL::DataFile
123 Example : my $exts = $dfa->DataFile_to_extensions($bam_df);
124 Description : Returns all expected extensions for the given DataFile type. The
125 first returned is the default extension
126 Returntype : ArrayRef
127 Exceptions : Raised if the given file type is not understood
128
129 =cut
130
131 sub DataFile_to_extensions {
132 my ($self, $df) = @_;
133 my $type = $df->file_type();
134 my $extensions = {
135 BAM => ['bam', 'bam.bai'],
136 # BIGBED => 'bb',
137 BIGWIG => ['bw'],
138 VCF => ['vcf.gz', 'vcf.gz.tbi'],
139 }->{$type};
140 throw sprintf(q{No extensions found for the type '%s'}, $type ) if ! $extensions;
141 return $extensions;
142 }
143
144
145 =head2 DataFile_to_adaptor
146
147 Arg[1] : Bio::EnsEMBL::DataFile
148 Arg[2] : (optional) base path
149 Example : my $bam = $dfa->DataFile_to_adaptor($bam_df);
150 Description : Returns an adaptor instance which will access the given DataFile
151 Returntype : Scalar actual return depends upon the given file type
152 Exceptions : Raised if the given file type is not understood
153
154 =cut
155
156 sub DataFile_to_adaptor {
157 my ($self, $df, $base) = @_;
158 my $type = $df->file_type();
159 my $dispatch = {
160 BAM => sub {
161 require Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor;
162 return Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor->new($df->path($base));
163 },
164 BIGBED => sub {
165 require Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor;
166 return Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor->new($df->path($base));
167 },
168 BIGWIG => sub {
169 require Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor;
170 return Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor->new($df->path($base));
171 },
172 VCF => sub {
173 require Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor;
174 return Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor->new($df->path($base));
175 },
176 }->{$type};
177 throw sprintf(q{No handler found for the type '%s'}, $type ) if ! $dispatch;
178 return $dispatch->();
179 }
180
181 =head2 fetch_all_by_logic_name
182
183 Args [1] : String $logic_name for the linked analysis
184 Example : my $dfs = $dfa->fetch_all_by_logic_name('bam_alignments');
185 Description : Returns all DataFile entries linked to the given analysis
186 logic name
187 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances
188 Exceptions : Thrown if logic name does not exist
189
190 =cut
191
192 sub fetch_all_by_logic_name {
193 my ($self, $logic_name) = @_;
194 my $analysis = $self->db()->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name);
195 throw "No analysis found for logic_name '${logic_name}'" if ! $analysis;
196 return $self->fetch_all_by_Analysis($analysis);
197 }
198
199 =head2 fetch_all_by_Analysis
200
201 Args [1] : Bio::EnsEMBL::Analysis $analysis to look up by
202 Example : my $dfs = $dfa->fetch_all_by_Analysis($analysis);
203 Description : Returns all DataFile entries linked to the given analysis
204 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances
205 Exceptions : None
206
207 =cut
208
209 sub fetch_all_by_Analysis {
210 my ($self, $analysis) = @_;
211 assert_ref($analysis, 'Bio::EnsEMBL::Analysis', 'analysis');
212 $self->bind_param_generic_fetch($analysis->dbID(), SQL_INTEGER);
213 return $self->generic_fetch('df.analysis_id =?');
214 }
215
216 =head2 fetch_all_by_CoordSystem
217
218 Args [1] : Bio::EnsEMBL::CoordSystem $coord_system to look up by
219 Example : my $dfs = $dfa->fetch_all_by_CoordSystem($cs);
220 Description : Returns all DataFile entries linked to the given coordinate
221 system. Does B<not> support I<toplevel>
222 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances
223 Exceptions : None
224
225 =cut
226
227 sub fetch_all_by_CoordSystem {
228 my ($self, $cs) = @_;
229 assert_ref($cs, 'Bio::EnsEMBL::CoordSystem', 'coord_system');
230 $self->bind_param_generic_fetch($cs->dbID(), SQL_INTEGER);
231 return $self->generic_fetch('df.coord_system_id =?');
232 }
233
234 sub fetch_by_name_and_type {
235 my ($self, $name, $type) = @_;
236 $self->bind_param_generic_fetch($name, SQL_VARCHAR);
237 $self->bind_param_generic_fetch($type, SQL_VARCHAR);
238 my $results = $self->generic_fetch('df.name =? and df.file_type =?');
239 return $results->[0] if @{$results};
240 return;
241 }
242
243 sub generic_fetch {
244 my ($self, $constraint) = @_;
245 $constraint ||= q{};
246
247 my $sql = <<'SQL';
248 select df.data_file_id, df.coord_system_id, df.analysis_id, df.name, df.version_lock, df.absolute, df.url, df.file_type
249 from data_file df
250 join coord_system cs using (coord_system_id)
251 where cs.species_id =?
252 SQL
253 $sql .= 'AND '.$constraint if $constraint;
254
255 my $params = $self->bind_param_generic_fetch();
256 if(defined $params) {
257 $self->{'_bind_param_generic_fetch'} = ();
258 }
259 else {
260 $params = [];
261 }
262 unshift(@{$params}, $self->db()->species_id());
263
264 my $csa = $self->db()->get_CoordSystemAdaptor();
265 my $aa = $self->db()->get_AnalysisAdaptor();
266
267 return $self->dbc()->sql_helper()->execute(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub {
268 my ($row) = @_;
269 my ($data_file_id, $coord_system_id, $analysis_id, $name, $version_lock, $absolute, $url, $file_type) = @{$row};
270 my $hash = {
271 dbID => $data_file_id,
272 adaptor => $self,
273 coord_system => $csa->fetch_by_dbID($coord_system_id),
274 analysis => $aa->fetch_by_dbID($analysis_id),
275 name => $name,
276 version_lock => $version_lock,
277 absolute => $absolute,
278 file_type => $file_type,
279 };
280 $hash->{url} = $url if $url;
281 return Bio::EnsEMBL::DataFile->new_fast($hash);
282 });
283 }
284
285 sub store {
286 my ($self, $df) = @_;
287
288 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile');
289
290 if ($df->is_stored($self->db())) {
291 return $df->dbID();
292 }
293
294 throw 'Analysis is not defined for this data file' if ! defined $df->analysis();
295 throw 'Coord system is not defined for this data file' if ! defined $df->coord_system();
296
297 my $sql = <<'SQL';
298 INSERT INTO data_file (coord_system_id, analysis_id, name, version_lock, absolute, url, file_type)
299 VALUES (?,?,?,?,?,?,?)
300 SQL
301 my $params = [
302 [$df->coord_system()->dbID(), SQL_INTEGER],
303 [$df->analysis()->dbID(), SQL_INTEGER],
304 [$df->name(), SQL_VARCHAR],
305 [$df->version_lock(), SQL_INTEGER],
306 [$df->absolute(), SQL_INTEGER],
307 [$df->url(), SQL_VARCHAR],
308 [$df->file_type(), SQL_VARCHAR],
309 ];
310 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub {
311 my ( $sth, $dbh ) = @_;
312 $df->dbID($self->last_insert_id());
313 return;
314 });
315 $df->adaptor($self);
316
317 return $df->dbID();
318 }
319
320 sub update {
321 my ($self, $df) = @_;
322
323 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile');
324
325 if (! $df->is_stored($self->db())) {
326 $self->store($df);
327 return;
328 }
329
330 my $sql = <<'SQL';
331 UPDATE data_file SET coord_system_id =?, analysis_id=?, name=?, version_lock=?, absolute=?, url=?, file_type=?
332 WHERE data_file_id =?
333 SQL
334 my $params = [
335 [$df->coord_system()->dbID(), SQL_INTEGER],
336 [$df->analysis()->dbID(), SQL_INTEGER],
337 [$df->name(), SQL_VARCHAR],
338 [$df->version_lock(), SQL_INTEGER],
339 [$df->absolute(), SQL_INTEGER],
340 [$df->url(), SQL_VARCHAR],
341 [$df->file_type(), SQL_VARCHAR],
342 [$df->dbID(), SQL_INTEGER],
343 ];
344 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params);
345 return;
346 }
347
348 sub delete {
349 my ($self, $df) = @_;
350
351 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile');
352
353 if (! $df->is_stored($self->db())) {
354 throw "Cannot delete the data file if it has not already been stored in this database";
355 }
356
357 $self->dbc()->sql_helper()->execute_update(
358 -SQL => 'DELETE from data_file where data_file_id =?',
359 -PARAMS => [[$df->dbID(), SQL_INTEGER]],
360 );
361
362 return;
363 }
364
365 sub _tables {
366 my ($self) = @_;
367 return (
368 [qw/data_file df/]
369 );
370 }
371
372 1;