Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/DataFileAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 package Bio::EnsEMBL::DBSQL::DataFileAdaptor; | |
2 | |
3 =pod | |
4 | |
5 =head1 LICENSE | |
6 | |
7 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
8 Genome Research Limited. All rights reserved. | |
9 | |
10 This software is distributed under a modified Apache license. | |
11 For license details, please see | |
12 | |
13 http://www.ensembl.org/info/about/code_licence.html | |
14 | |
15 =head1 CONTACT | |
16 | |
17 Please email comments or questions to the public Ensembl | |
18 developers list at <dev@ensembl.org>. | |
19 | |
20 Questions may also be sent to the Ensembl help desk at | |
21 <helpdesk@ensembl.org>. | |
22 | |
23 =head1 NAME | |
24 | |
25 Bio::EnsEMBL::DBSQL::DataFileAdaptor | |
26 | |
27 =head1 SYNOPSIS | |
28 | |
29 my $dfa = $dba->get_DataFileAdaptor(); | |
30 my $file = $dfa->fetch_by_dbID(1); | |
31 my $files = $dfa->fetch_all(); | |
32 | |
33 my $logic_name_files = $dfa->fetch_all_by_logic_name('bam_alignments'); | |
34 | |
35 =head1 DESCRIPTION | |
36 | |
37 Provides a database wrapper to store the locations of files and to pull these | |
38 records back out. DataFile objects can only provide basic information but they | |
39 can return an intended external database adaptor which can be used to | |
40 parse the information. This system assumes nothing about the file just that | |
41 your parser can access it. | |
42 | |
43 Files are supported over any protocol your parser supports and locations can be | |
44 made absolute, built on the fly or versioned. | |
45 | |
46 =head1 METHODS | |
47 | |
48 =cut | |
49 | |
50 use strict; | |
51 use warnings; | |
52 | |
53 use base qw/Bio::EnsEMBL::DBSQL::BaseAdaptor/; | |
54 | |
55 use Bio::EnsEMBL::DataFile; | |
56 use Bio::EnsEMBL::DBSQL::BaseAdaptor; | |
57 use Bio::EnsEMBL::Utils::Exception qw/throw warning deprecate/; | |
58 use Bio::EnsEMBL::Utils::Scalar qw/:assert/; | |
59 | |
60 my $GLOBAL_BASE_PATH; | |
61 | |
62 =head2 global_base_path | |
63 | |
64 Arg[1] : String; base path | |
65 Example : Bio::EnsEMBL::DBSQL::DataFileAdaptor->global_base_path('/base/path'); | |
66 Description : Stores a global value to be used when building data file paths | |
67 Returntype : String | |
68 Exceptions : None | |
69 | |
70 =cut | |
71 | |
72 sub global_base_path { | |
73 my ($class, $base_path) = @_; | |
74 return $GLOBAL_BASE_PATH unless $base_path; | |
75 $GLOBAL_BASE_PATH = $base_path; | |
76 return $GLOBAL_BASE_PATH; | |
77 } | |
78 | |
79 =head2 get_base_path | |
80 | |
81 Arg[1] : String; (optional) base path | |
82 Example : $dfa->get_base_path(); | |
83 Description : If given the path it will return that path; if not it consults | |
84 $self->global_base_path() for a value. As a last resort | |
85 it will look at the meta table for an entry keyed by | |
86 B<data_file.base_path> | |
87 Returntype : String | |
88 Exceptions : Thrown if nothing is found after consulting all three locations | |
89 | |
90 =cut | |
91 | |
92 sub get_base_path { | |
93 my ($self, $path) = @_; | |
94 return $path if defined $path; | |
95 my $global_base_path = $self->global_base_path(); | |
96 return $global_base_path if defined $global_base_path; | |
97 my $meta_base_path = $self->db()->get_MetaContainer()->single_value_by_key('data_file.base_path', 1); | |
98 return $meta_base_path if defined $meta_base_path; | |
99 throw "No base path discovered. Either provide a path, set a global using global_base_path() or specify 'data_file.base_path' in meta"; | |
100 } | |
101 | |
102 =head2 DataFile_to_extension | |
103 | |
104 Deprecated | |
105 Arg[1] : Bio::EnsEMBL::DataFile | |
106 Example : my $ext = $dfa->DataFile_to_extension($bam_df); | |
107 Description : Returns an expected extension for the given DataFile type | |
108 Returntype : Scalar of the expected file extension | |
109 Exceptions : Raised if the given file type is not understood | |
110 | |
111 =cut | |
112 | |
113 sub DataFile_to_extension { | |
114 my ($self, $df) = @_; | |
115 deprecate("Use DataFile_to_extensions() instead"); | |
116 my $extensions = $self->DataFile_to_extensions($df); | |
117 return $extensions->[0]; | |
118 } | |
119 | |
120 =head2 DataFile_to_extensions | |
121 | |
122 Arg[1] : Bio::EnsEMBL::DataFile | |
123 Example : my $exts = $dfa->DataFile_to_extensions($bam_df); | |
124 Description : Returns all expected extensions for the given DataFile type. The | |
125 first returned is the default extension | |
126 Returntype : ArrayRef | |
127 Exceptions : Raised if the given file type is not understood | |
128 | |
129 =cut | |
130 | |
131 sub DataFile_to_extensions { | |
132 my ($self, $df) = @_; | |
133 my $type = $df->file_type(); | |
134 my $extensions = { | |
135 BAM => ['bam', 'bam.bai'], | |
136 # BIGBED => 'bb', | |
137 BIGWIG => ['bw'], | |
138 VCF => ['vcf.gz', 'vcf.gz.tbi'], | |
139 }->{$type}; | |
140 throw sprintf(q{No extensions found for the type '%s'}, $type ) if ! $extensions; | |
141 return $extensions; | |
142 } | |
143 | |
144 | |
145 =head2 DataFile_to_adaptor | |
146 | |
147 Arg[1] : Bio::EnsEMBL::DataFile | |
148 Arg[2] : (optional) base path | |
149 Example : my $bam = $dfa->DataFile_to_adaptor($bam_df); | |
150 Description : Returns an adaptor instance which will access the given DataFile | |
151 Returntype : Scalar actual return depends upon the given file type | |
152 Exceptions : Raised if the given file type is not understood | |
153 | |
154 =cut | |
155 | |
156 sub DataFile_to_adaptor { | |
157 my ($self, $df, $base) = @_; | |
158 my $type = $df->file_type(); | |
159 my $dispatch = { | |
160 BAM => sub { | |
161 require Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor; | |
162 return Bio::EnsEMBL::ExternalData::BAM::BAMAdaptor->new($df->path($base)); | |
163 }, | |
164 BIGBED => sub { | |
165 require Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor; | |
166 return Bio::EnsEMBL::ExternalData::BigFile::BigBedAdaptor->new($df->path($base)); | |
167 }, | |
168 BIGWIG => sub { | |
169 require Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor; | |
170 return Bio::EnsEMBL::ExternalData::BigFile::BigWigAdaptor->new($df->path($base)); | |
171 }, | |
172 VCF => sub { | |
173 require Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor; | |
174 return Bio::EnsEMBL::ExternalData::VCF::VCFAdaptor->new($df->path($base)); | |
175 }, | |
176 }->{$type}; | |
177 throw sprintf(q{No handler found for the type '%s'}, $type ) if ! $dispatch; | |
178 return $dispatch->(); | |
179 } | |
180 | |
181 =head2 fetch_all_by_logic_name | |
182 | |
183 Args [1] : String $logic_name for the linked analysis | |
184 Example : my $dfs = $dfa->fetch_all_by_logic_name('bam_alignments'); | |
185 Description : Returns all DataFile entries linked to the given analysis | |
186 logic name | |
187 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances | |
188 Exceptions : Thrown if logic name does not exist | |
189 | |
190 =cut | |
191 | |
192 sub fetch_all_by_logic_name { | |
193 my ($self, $logic_name) = @_; | |
194 my $analysis = $self->db()->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name); | |
195 throw "No analysis found for logic_name '${logic_name}'" if ! $analysis; | |
196 return $self->fetch_all_by_Analysis($analysis); | |
197 } | |
198 | |
199 =head2 fetch_all_by_Analysis | |
200 | |
201 Args [1] : Bio::EnsEMBL::Analysis $analysis to look up by | |
202 Example : my $dfs = $dfa->fetch_all_by_Analysis($analysis); | |
203 Description : Returns all DataFile entries linked to the given analysis | |
204 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances | |
205 Exceptions : None | |
206 | |
207 =cut | |
208 | |
209 sub fetch_all_by_Analysis { | |
210 my ($self, $analysis) = @_; | |
211 assert_ref($analysis, 'Bio::EnsEMBL::Analysis', 'analysis'); | |
212 $self->bind_param_generic_fetch($analysis->dbID(), SQL_INTEGER); | |
213 return $self->generic_fetch('df.analysis_id =?'); | |
214 } | |
215 | |
216 =head2 fetch_all_by_CoordSystem | |
217 | |
218 Args [1] : Bio::EnsEMBL::CoordSystem $coord_system to look up by | |
219 Example : my $dfs = $dfa->fetch_all_by_CoordSystem($cs); | |
220 Description : Returns all DataFile entries linked to the given coordinate | |
221 system. Does B<not> support I<toplevel> | |
222 Returntype : ArrayRef contains Bio::EnsEMBL::DataFile instances | |
223 Exceptions : None | |
224 | |
225 =cut | |
226 | |
227 sub fetch_all_by_CoordSystem { | |
228 my ($self, $cs) = @_; | |
229 assert_ref($cs, 'Bio::EnsEMBL::CoordSystem', 'coord_system'); | |
230 $self->bind_param_generic_fetch($cs->dbID(), SQL_INTEGER); | |
231 return $self->generic_fetch('df.coord_system_id =?'); | |
232 } | |
233 | |
234 sub fetch_by_name_and_type { | |
235 my ($self, $name, $type) = @_; | |
236 $self->bind_param_generic_fetch($name, SQL_VARCHAR); | |
237 $self->bind_param_generic_fetch($type, SQL_VARCHAR); | |
238 my $results = $self->generic_fetch('df.name =? and df.file_type =?'); | |
239 return $results->[0] if @{$results}; | |
240 return; | |
241 } | |
242 | |
243 sub generic_fetch { | |
244 my ($self, $constraint) = @_; | |
245 $constraint ||= q{}; | |
246 | |
247 my $sql = <<'SQL'; | |
248 select df.data_file_id, df.coord_system_id, df.analysis_id, df.name, df.version_lock, df.absolute, df.url, df.file_type | |
249 from data_file df | |
250 join coord_system cs using (coord_system_id) | |
251 where cs.species_id =? | |
252 SQL | |
253 $sql .= 'AND '.$constraint if $constraint; | |
254 | |
255 my $params = $self->bind_param_generic_fetch(); | |
256 if(defined $params) { | |
257 $self->{'_bind_param_generic_fetch'} = (); | |
258 } | |
259 else { | |
260 $params = []; | |
261 } | |
262 unshift(@{$params}, $self->db()->species_id()); | |
263 | |
264 my $csa = $self->db()->get_CoordSystemAdaptor(); | |
265 my $aa = $self->db()->get_AnalysisAdaptor(); | |
266 | |
267 return $self->dbc()->sql_helper()->execute(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub { | |
268 my ($row) = @_; | |
269 my ($data_file_id, $coord_system_id, $analysis_id, $name, $version_lock, $absolute, $url, $file_type) = @{$row}; | |
270 my $hash = { | |
271 dbID => $data_file_id, | |
272 adaptor => $self, | |
273 coord_system => $csa->fetch_by_dbID($coord_system_id), | |
274 analysis => $aa->fetch_by_dbID($analysis_id), | |
275 name => $name, | |
276 version_lock => $version_lock, | |
277 absolute => $absolute, | |
278 file_type => $file_type, | |
279 }; | |
280 $hash->{url} = $url if $url; | |
281 return Bio::EnsEMBL::DataFile->new_fast($hash); | |
282 }); | |
283 } | |
284 | |
285 sub store { | |
286 my ($self, $df) = @_; | |
287 | |
288 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); | |
289 | |
290 if ($df->is_stored($self->db())) { | |
291 return $df->dbID(); | |
292 } | |
293 | |
294 throw 'Analysis is not defined for this data file' if ! defined $df->analysis(); | |
295 throw 'Coord system is not defined for this data file' if ! defined $df->coord_system(); | |
296 | |
297 my $sql = <<'SQL'; | |
298 INSERT INTO data_file (coord_system_id, analysis_id, name, version_lock, absolute, url, file_type) | |
299 VALUES (?,?,?,?,?,?,?) | |
300 SQL | |
301 my $params = [ | |
302 [$df->coord_system()->dbID(), SQL_INTEGER], | |
303 [$df->analysis()->dbID(), SQL_INTEGER], | |
304 [$df->name(), SQL_VARCHAR], | |
305 [$df->version_lock(), SQL_INTEGER], | |
306 [$df->absolute(), SQL_INTEGER], | |
307 [$df->url(), SQL_VARCHAR], | |
308 [$df->file_type(), SQL_VARCHAR], | |
309 ]; | |
310 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub { | |
311 my ( $sth, $dbh ) = @_; | |
312 $df->dbID($self->last_insert_id()); | |
313 return; | |
314 }); | |
315 $df->adaptor($self); | |
316 | |
317 return $df->dbID(); | |
318 } | |
319 | |
320 sub update { | |
321 my ($self, $df) = @_; | |
322 | |
323 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); | |
324 | |
325 if (! $df->is_stored($self->db())) { | |
326 $self->store($df); | |
327 return; | |
328 } | |
329 | |
330 my $sql = <<'SQL'; | |
331 UPDATE data_file SET coord_system_id =?, analysis_id=?, name=?, version_lock=?, absolute=?, url=?, file_type=? | |
332 WHERE data_file_id =? | |
333 SQL | |
334 my $params = [ | |
335 [$df->coord_system()->dbID(), SQL_INTEGER], | |
336 [$df->analysis()->dbID(), SQL_INTEGER], | |
337 [$df->name(), SQL_VARCHAR], | |
338 [$df->version_lock(), SQL_INTEGER], | |
339 [$df->absolute(), SQL_INTEGER], | |
340 [$df->url(), SQL_VARCHAR], | |
341 [$df->file_type(), SQL_VARCHAR], | |
342 [$df->dbID(), SQL_INTEGER], | |
343 ]; | |
344 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params); | |
345 return; | |
346 } | |
347 | |
348 sub delete { | |
349 my ($self, $df) = @_; | |
350 | |
351 assert_ref($df, 'Bio::EnsEMBL::DataFile', 'datafile'); | |
352 | |
353 if (! $df->is_stored($self->db())) { | |
354 throw "Cannot delete the data file if it has not already been stored in this database"; | |
355 } | |
356 | |
357 $self->dbc()->sql_helper()->execute_update( | |
358 -SQL => 'DELETE from data_file where data_file_id =?', | |
359 -PARAMS => [[$df->dbID(), SQL_INTEGER]], | |
360 ); | |
361 | |
362 return; | |
363 } | |
364 | |
365 sub _tables { | |
366 my ($self) = @_; | |
367 return ( | |
368 [qw/data_file df/] | |
369 ); | |
370 } | |
371 | |
372 1; |