Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/IdMapping/BaseObject.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::IdMapping::BaseObject - base object for IdMapping objects | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 # this object isn't instantiated directly but rather extended | |
28 use Bio::EnsEMBL::IdMapping::BaseObject; | |
29 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject); | |
30 | |
31 =head1 DESCRIPTION | |
32 | |
33 This is the base object for some of the objects used in the IdMapping | |
34 application. An object that extends BaseObject will have a ConfParser, | |
35 Logger and Cache object. BaseObject also implements some useful utility | |
36 functions related to file and db access. | |
37 | |
38 This isn't very clean OO design but it's efficient and easy to use... | |
39 | |
40 =head1 METHODS | |
41 | |
42 new | |
43 get_filehandle | |
44 file_exists | |
45 fetch_value_from_db | |
46 dump_table_to_file | |
47 upload_file_into_table | |
48 logger | |
49 conf | |
50 cache | |
51 | |
52 =cut | |
53 | |
54 | |
55 package Bio::EnsEMBL::IdMapping::BaseObject; | |
56 | |
57 use strict; | |
58 use warnings; | |
59 no warnings 'uninitialized'; | |
60 | |
61 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
62 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
63 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append); | |
64 | |
65 | |
66 =head2 new | |
67 | |
68 Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object | |
69 Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object | |
70 Arg [CACHE] : Bio::EnsEMBL::IdMapping::Cache $cache - a cache object | |
71 Example : my $object = Bio::EnsEMBL::IdMapping::BaseObjectSubclass->new( | |
72 -LOGGER => $logger, | |
73 -CONF => $conf, | |
74 -CACHE => $cache | |
75 ); | |
76 Description : Constructor | |
77 Return type : implementing subclass type | |
78 Exceptions : thrown on wrong or missing arguments | |
79 Caller : general | |
80 Status : At Risk | |
81 : under development | |
82 | |
83 =cut | |
84 | |
85 sub new { | |
86 my $caller = shift; | |
87 my $class = ref($caller) || $caller; | |
88 | |
89 my ($logger, $conf, $cache) = rearrange(['LOGGER', 'CONF', 'CACHE'], @_); | |
90 | |
91 unless ($logger and ref($logger) and | |
92 $logger->isa('Bio::EnsEMBL::Utils::Logger')) { | |
93 throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging."); | |
94 } | |
95 | |
96 unless ($conf and ref($conf) and | |
97 $conf->isa('Bio::EnsEMBL::Utils::ConfParser')) { | |
98 throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object."); | |
99 } | |
100 | |
101 unless ($cache and ref($cache) and | |
102 $cache->isa('Bio::EnsEMBL::IdMapping::Cache')) { | |
103 throw("You must provide configuration as a Bio::EnsEMBL::IdMapping::Cache object."); | |
104 } | |
105 | |
106 my $self = {}; | |
107 bless ($self, $class); | |
108 | |
109 # initialise | |
110 $self->logger($logger); | |
111 $self->conf($conf); | |
112 $self->cache($cache); | |
113 | |
114 return $self; | |
115 } | |
116 | |
117 | |
118 =head2 get_filehandle | |
119 | |
120 Arg[1] : String $filename - filename for filehandle | |
121 Arg[2] : String $path_append - append subdirectory name to basedir | |
122 Arg[3] : String $mode - filehandle mode (<|>|>>) | |
123 Example : my $fh = $object->get_filehandle('mapping_stats.txt', 'stats', | |
124 '>'); | |
125 print $fh "Stats:\n"; | |
126 Description : Returns a filehandle to a file for reading or writing. The file | |
127 is qualified with the basedir defined in the configuration and | |
128 an optional subdirectory name. | |
129 Return type : filehandle | |
130 Exceptions : thrown on missing filename | |
131 Caller : general | |
132 Status : At Risk | |
133 : under development | |
134 | |
135 =cut | |
136 | |
137 sub get_filehandle { | |
138 my $self = shift; | |
139 my $filename = shift; | |
140 my $path_append = shift; | |
141 my $mode = shift; | |
142 | |
143 throw("Need a filename for this filehandle.") unless (defined($filename)); | |
144 | |
145 my $path = $self->conf->param('basedir'); | |
146 $path = path_append($path, $path_append) if (defined($path_append)); | |
147 | |
148 $mode ||= '>'; | |
149 | |
150 open(my $fh, $mode, "$path/$filename") or | |
151 throw("Unable to open $path/$filename: $!"); | |
152 | |
153 return $fh; | |
154 } | |
155 | |
156 | |
157 =head2 file_exists | |
158 | |
159 Arg[1] : String $filename - filename to test | |
160 Arg[2] : Boolean $path_append - turn on pre-pending of basedir | |
161 Example : unless ($object->file_exists('gene_mappings.ser', 1)) { | |
162 $object->do_gene_mapping; | |
163 } | |
164 Description : Tests if a file exists and has non-zero size. | |
165 Return type : Boolean | |
166 Exceptions : none | |
167 Caller : general | |
168 Status : At Risk | |
169 : under development | |
170 | |
171 =cut | |
172 | |
173 sub file_exists { | |
174 my $self = shift; | |
175 my $filename = shift; | |
176 my $path_append = shift; | |
177 | |
178 my $path = $self->conf->param('basedir'); | |
179 $path = path_append($path, $path_append) if (defined($path_append)); | |
180 | |
181 return (-s "$path/$filename"); | |
182 } | |
183 | |
184 | |
185 =head2 fetch_value_from_db | |
186 | |
187 Arg[1] : DBI::db $dbh - a DBI database handle | |
188 Arg[2] : String $sql - SQL statement to execute | |
189 Example : my $num_genes = $object->fetch_value_from_db($dbh, | |
190 'SELECT count(*) FROM gene'); | |
191 Description : Executes an SQL statement on a db handle and returns the first | |
192 column of the first row returned. Useful for queries returning a | |
193 single value, like table counts. | |
194 Return type : Return type of SQL statement | |
195 Exceptions : thrown on wrong or missing arguments | |
196 Caller : general | |
197 Status : At Risk | |
198 : under development | |
199 | |
200 =cut | |
201 | |
202 sub fetch_value_from_db { | |
203 my $self = shift; | |
204 my $dbh = shift; | |
205 my $sql = shift; | |
206 | |
207 throw("Need a db handle.") unless ($dbh and $dbh->isa('DBI::db')); | |
208 throw("Need an SQL query to execute.") unless ($sql); | |
209 | |
210 my $sth = $dbh->prepare($sql); | |
211 $sth->execute; | |
212 my ($retval) = $sth->fetchrow_array; | |
213 | |
214 return $retval; | |
215 } | |
216 | |
217 | |
218 =head2 dump_table_to_file | |
219 | |
220 Arg[1] : String $dbtype - db type (source|target) | |
221 Arg[2] : String $table - name of table to dump | |
222 Arg[3] : String $filename - name of dump file | |
223 Arg[4] : Boolean $check_existing - turn on test for existing dump | |
224 Example : my $rows_dumped = $object->dump_table_to_file('source', | |
225 'stable_id_event', 'stable_id_event_existing.txt'); | |
226 Description : Dumps the contents of a db table to a tab-delimited file. The | |
227 dump file will be written to a subdirectory called 'tables' | |
228 under the basedir from your configuration. | |
229 Return type : Int - the number of rows dumped | |
230 Exceptions : thrown on wrong or missing arguments | |
231 Caller : general | |
232 Status : At Risk | |
233 : under development | |
234 | |
235 =cut | |
236 | |
237 sub dump_table_to_file { | |
238 my $self = shift; | |
239 my $dbtype = shift; | |
240 my $table = shift; | |
241 my $filename = shift; | |
242 my $check_existing = shift; | |
243 | |
244 # argument check | |
245 unless (($dbtype eq 'source') or ($dbtype eq 'target')) { | |
246 throw("Missing or unknown db type: $dbtype."); | |
247 } | |
248 throw("Need a table name.") unless ($table); | |
249 throw("Need a filename.") unless ($filename); | |
250 | |
251 # conditionally check if table was already dumped | |
252 if ($check_existing and $self->file_exists($filename, 'tables')) { | |
253 $self->logger->info("$filename exists, won't dump again.\n"); | |
254 return 0; | |
255 } | |
256 | |
257 my $fh = $self->get_filehandle($filename, 'tables'); | |
258 | |
259 my $dba = $self->cache->get_DBAdaptor($dbtype); | |
260 my $dbh = $dba->dbc->db_handle; | |
261 my $sth = $dbh->prepare("SELECT * FROM $table"); | |
262 $sth->execute; | |
263 | |
264 my $i = 0; | |
265 | |
266 while (my @row = $sth->fetchrow_array) { | |
267 $i++; | |
268 | |
269 # use '\N' for NULL values | |
270 for (my $j = 0; $j < scalar(@row); $j++) { | |
271 $row[$j] = '\N' unless (defined($row[$j])); | |
272 } | |
273 | |
274 print $fh join("\t", @row); | |
275 print $fh "\n"; | |
276 } | |
277 | |
278 $sth->finish; | |
279 | |
280 return $i; | |
281 } | |
282 | |
283 | |
284 =head2 upload_file_into_table | |
285 | |
286 Arg[1] : String $dbtype - db type (source|target) | |
287 Arg[2] : String $table - name of table to upload the data to | |
288 Arg[3] : String $filename - name of dump file | |
289 Arg[4] : Boolean $no_check_empty - don't check if table is empty | |
290 Example : my $rows_uploaded = $object->upload_file_into_table('target', | |
291 'stable_id_event', 'stable_id_event_new.txt'); | |
292 Description : Uploads a tab-delimited data file into a db table. The data file | |
293 will be taken from a subdirectory 'tables' under your configured | |
294 basedir. If the db table isn't empty and $no_check_empty isn't | |
295 set, no data is uploaded (and a warning is issued). | |
296 Return type : Int - the number of rows uploaded | |
297 Exceptions : thrown on wrong or missing arguments | |
298 Caller : general | |
299 Status : At Risk | |
300 : under development | |
301 | |
302 =cut | |
303 | |
304 sub upload_file_into_table { | |
305 my $self = shift; | |
306 my $dbtype = shift; | |
307 my $table = shift; | |
308 my $filename = shift; | |
309 my $no_check_empty = shift; | |
310 | |
311 # argument check | |
312 unless ( ( $dbtype eq 'source' ) or ( $dbtype eq 'target' ) ) { | |
313 throw("Missing or unknown db type: $dbtype."); | |
314 } | |
315 throw("Need a table name.") unless ($table); | |
316 throw("Need a filename.") unless ($filename); | |
317 | |
318 # sanity check for dry run | |
319 if ( $self->conf->param('dry_run') ) { | |
320 $self->logger->warning( | |
321 "dry_run - skipping db upload for $filename.\n"); | |
322 return; | |
323 } | |
324 | |
325 my $file = | |
326 join( '/', $self->conf->param('basedir'), 'tables', $filename ); | |
327 my $r = 0; | |
328 | |
329 if ( -s $file ) { | |
330 | |
331 $self->logger->debug( "$file -> $table\n", 1 ); | |
332 | |
333 my $dba = $self->cache->get_DBAdaptor($dbtype); | |
334 my $dbh = $dba->dbc->db_handle; | |
335 | |
336 my $idtable = 0; | |
337 if ( $table =~ /^([^_]+)_stable_id/ ) { | |
338 # This is a stable_id table we're working with. | |
339 $idtable = 1; | |
340 $table = $1; | |
341 } | |
342 | |
343 # check table is empty | |
344 my ( $sql, $sth ); | |
345 unless ($no_check_empty) { | |
346 if ($idtable) { | |
347 $sql = | |
348 qq(SELECT count(*) FROM $table WHERE stable_id IS NOT NULL); | |
349 } | |
350 else { | |
351 $sql = qq(SELECT count(*) FROM $table); | |
352 } | |
353 $sth = $dbh->prepare($sql); | |
354 $sth->execute; | |
355 my ($c) = $sth->fetchrow_array; | |
356 $sth->finish; | |
357 | |
358 if ( $c > 0 ) { | |
359 if ($idtable) { | |
360 $self->logger->warning( | |
361 "Table $table contains $c stable IDs.\n", | |
362 1 ); | |
363 } | |
364 else { | |
365 $self->logger->warning( | |
366 "Table $table not empty: found $c entries.\n", | |
367 1 ); | |
368 } | |
369 $self->logger->info( "Data not uploaded!\n", 1 ); | |
370 return $r; | |
371 } | |
372 } ## end unless ($no_check_empty) | |
373 | |
374 # now upload the data | |
375 if ($idtable) { | |
376 # Create a temporary table, upload the data into it, and then | |
377 # update the main table. | |
378 $dbh->do( | |
379 qq( CREATE TABLE stable_id_$$ ( object_id INTEGER UNSIGNED, | |
380 stable_id VARCHAR(255), | |
381 version SMALLINT UNSIGNED, | |
382 created_date DATETIME, | |
383 modified_date DATETIME, | |
384 PRIMARY KEY(object_id) ) ) | |
385 ); | |
386 | |
387 $dbh->do( | |
388 qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE stable_id_$$)); | |
389 | |
390 $dbh->do( | |
391 qq( | |
392 UPDATE $table, stable_id_$$ | |
393 SET $table.stable_id=stable_id_$$.stable_id, | |
394 $table.version=stable_id_$$.version, | |
395 $table.created_date=stable_id_$$.created_date, | |
396 $table.modified_date=stable_id_$$.modified_date | |
397 WHERE $table.${table}_id = stable_id_$$.object_id ) | |
398 ); | |
399 | |
400 $dbh->do(qq(DROP TABLE stable_id_$$)); | |
401 } ## end if ($idtable) | |
402 else { | |
403 $dbh->do(qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE $table)); | |
404 } | |
405 $dbh->do(qq(OPTIMIZE TABLE $table)); | |
406 | |
407 } ## end if ( -s $file ) | |
408 else { | |
409 $self->logger->warning( "No data found in file $filename.\n", 1 ); | |
410 } | |
411 | |
412 return $r; | |
413 } ## end sub upload_file_into_table | |
414 | |
415 | |
416 =head2 logger | |
417 | |
418 Arg[1] : (optional) Bio::EnsEMBL::Utils::Logger - the logger to set | |
419 Example : $object->logger->info("Starting ID mapping.\n"); | |
420 Description : Getter/setter for logger object | |
421 Return type : Bio::EnsEMBL::Utils::Logger | |
422 Exceptions : none | |
423 Caller : constructor | |
424 Status : At Risk | |
425 : under development | |
426 | |
427 =cut | |
428 | |
429 sub logger { | |
430 my $self = shift; | |
431 $self->{'_logger'} = shift if (@_); | |
432 return $self->{'_logger'}; | |
433 } | |
434 | |
435 | |
436 =head2 conf | |
437 | |
438 Arg[1] : (optional) Bio::EnsEMBL::Utils::ConfParser - the configuration | |
439 to set | |
440 Example : my $basedir = $object->conf->param('basedir'); | |
441 Description : Getter/setter for configuration object | |
442 Return type : Bio::EnsEMBL::Utils::ConfParser | |
443 Exceptions : none | |
444 Caller : constructor | |
445 Status : At Risk | |
446 : under development | |
447 | |
448 =cut | |
449 | |
450 sub conf { | |
451 my $self = shift; | |
452 $self->{'_conf'} = shift if (@_); | |
453 return $self->{'_conf'}; | |
454 } | |
455 | |
456 | |
457 =head2 cache | |
458 | |
459 Arg[1] : (optional) Bio::EnsEMBL::IdMapping::Cache - the cache to set | |
460 Example : $object->cache->read_from_file('source'); | |
461 Description : Getter/setter for cache object | |
462 Return type : Bio::EnsEMBL::IdMapping::Cache | |
463 Exceptions : none | |
464 Caller : constructor | |
465 Status : At Risk | |
466 : under development | |
467 | |
468 =cut | |
469 | |
470 sub cache { | |
471 my $self = shift; | |
472 $self->{'_cache'} = shift if (@_); | |
473 return $self->{'_cache'}; | |
474 } | |
475 | |
476 | |
477 1; | |
478 |