comparison variant_effect_predictor/Bio/EnsEMBL/IdMapping/BaseObject.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::IdMapping::BaseObject - base object for IdMapping objects
24
25 =head1 SYNOPSIS
26
27 # this object isn't instantiated directly but rather extended
28 use Bio::EnsEMBL::IdMapping::BaseObject;
29 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
30
31 =head1 DESCRIPTION
32
33 This is the base object for some of the objects used in the IdMapping
34 application. An object that extends BaseObject will have a ConfParser,
35 Logger and Cache object. BaseObject also implements some useful utility
36 functions related to file and db access.
37
38 This isn't very clean OO design but it's efficient and easy to use...
39
40 =head1 METHODS
41
42 new
43 get_filehandle
44 file_exists
45 fetch_value_from_db
46 dump_table_to_file
47 upload_file_into_table
48 logger
49 conf
50 cache
51
52 =cut
53
54
55 package Bio::EnsEMBL::IdMapping::BaseObject;
56
57 use strict;
58 use warnings;
59 no warnings 'uninitialized';
60
61 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
62 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
63 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
64
65
66 =head2 new
67
68 Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
69 Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
70 Arg [CACHE] : Bio::EnsEMBL::IdMapping::Cache $cache - a cache object
71 Example : my $object = Bio::EnsEMBL::IdMapping::BaseObjectSubclass->new(
72 -LOGGER => $logger,
73 -CONF => $conf,
74 -CACHE => $cache
75 );
76 Description : Constructor
77 Return type : implementing subclass type
78 Exceptions : thrown on wrong or missing arguments
79 Caller : general
80 Status : At Risk
81 : under development
82
83 =cut
84
85 sub new {
86 my $caller = shift;
87 my $class = ref($caller) || $caller;
88
89 my ($logger, $conf, $cache) = rearrange(['LOGGER', 'CONF', 'CACHE'], @_);
90
91 unless ($logger and ref($logger) and
92 $logger->isa('Bio::EnsEMBL::Utils::Logger')) {
93 throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
94 }
95
96 unless ($conf and ref($conf) and
97 $conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
98 throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
99 }
100
101 unless ($cache and ref($cache) and
102 $cache->isa('Bio::EnsEMBL::IdMapping::Cache')) {
103 throw("You must provide configuration as a Bio::EnsEMBL::IdMapping::Cache object.");
104 }
105
106 my $self = {};
107 bless ($self, $class);
108
109 # initialise
110 $self->logger($logger);
111 $self->conf($conf);
112 $self->cache($cache);
113
114 return $self;
115 }
116
117
118 =head2 get_filehandle
119
120 Arg[1] : String $filename - filename for filehandle
121 Arg[2] : String $path_append - append subdirectory name to basedir
122 Arg[3] : String $mode - filehandle mode (<|>|>>)
123 Example : my $fh = $object->get_filehandle('mapping_stats.txt', 'stats',
124 '>');
125 print $fh "Stats:\n";
126 Description : Returns a filehandle to a file for reading or writing. The file
127 is qualified with the basedir defined in the configuration and
128 an optional subdirectory name.
129 Return type : filehandle
130 Exceptions : thrown on missing filename
131 Caller : general
132 Status : At Risk
133 : under development
134
135 =cut
136
137 sub get_filehandle {
138 my $self = shift;
139 my $filename = shift;
140 my $path_append = shift;
141 my $mode = shift;
142
143 throw("Need a filename for this filehandle.") unless (defined($filename));
144
145 my $path = $self->conf->param('basedir');
146 $path = path_append($path, $path_append) if (defined($path_append));
147
148 $mode ||= '>';
149
150 open(my $fh, $mode, "$path/$filename") or
151 throw("Unable to open $path/$filename: $!");
152
153 return $fh;
154 }
155
156
157 =head2 file_exists
158
159 Arg[1] : String $filename - filename to test
160 Arg[2] : Boolean $path_append - turn on pre-pending of basedir
161 Example : unless ($object->file_exists('gene_mappings.ser', 1)) {
162 $object->do_gene_mapping;
163 }
164 Description : Tests if a file exists and has non-zero size.
165 Return type : Boolean
166 Exceptions : none
167 Caller : general
168 Status : At Risk
169 : under development
170
171 =cut
172
173 sub file_exists {
174 my $self = shift;
175 my $filename = shift;
176 my $path_append = shift;
177
178 my $path = $self->conf->param('basedir');
179 $path = path_append($path, $path_append) if (defined($path_append));
180
181 return (-s "$path/$filename");
182 }
183
184
185 =head2 fetch_value_from_db
186
187 Arg[1] : DBI::db $dbh - a DBI database handle
188 Arg[2] : String $sql - SQL statement to execute
189 Example : my $num_genes = $object->fetch_value_from_db($dbh,
190 'SELECT count(*) FROM gene');
191 Description : Executes an SQL statement on a db handle and returns the first
192 column of the first row returned. Useful for queries returning a
193 single value, like table counts.
194 Return type : Return type of SQL statement
195 Exceptions : thrown on wrong or missing arguments
196 Caller : general
197 Status : At Risk
198 : under development
199
200 =cut
201
202 sub fetch_value_from_db {
203 my $self = shift;
204 my $dbh = shift;
205 my $sql = shift;
206
207 throw("Need a db handle.") unless ($dbh and $dbh->isa('DBI::db'));
208 throw("Need an SQL query to execute.") unless ($sql);
209
210 my $sth = $dbh->prepare($sql);
211 $sth->execute;
212 my ($retval) = $sth->fetchrow_array;
213
214 return $retval;
215 }
216
217
218 =head2 dump_table_to_file
219
220 Arg[1] : String $dbtype - db type (source|target)
221 Arg[2] : String $table - name of table to dump
222 Arg[3] : String $filename - name of dump file
223 Arg[4] : Boolean $check_existing - turn on test for existing dump
224 Example : my $rows_dumped = $object->dump_table_to_file('source',
225 'stable_id_event', 'stable_id_event_existing.txt');
226 Description : Dumps the contents of a db table to a tab-delimited file. The
227 dump file will be written to a subdirectory called 'tables'
228 under the basedir from your configuration.
229 Return type : Int - the number of rows dumped
230 Exceptions : thrown on wrong or missing arguments
231 Caller : general
232 Status : At Risk
233 : under development
234
235 =cut
236
237 sub dump_table_to_file {
238 my $self = shift;
239 my $dbtype = shift;
240 my $table = shift;
241 my $filename = shift;
242 my $check_existing = shift;
243
244 # argument check
245 unless (($dbtype eq 'source') or ($dbtype eq 'target')) {
246 throw("Missing or unknown db type: $dbtype.");
247 }
248 throw("Need a table name.") unless ($table);
249 throw("Need a filename.") unless ($filename);
250
251 # conditionally check if table was already dumped
252 if ($check_existing and $self->file_exists($filename, 'tables')) {
253 $self->logger->info("$filename exists, won't dump again.\n");
254 return 0;
255 }
256
257 my $fh = $self->get_filehandle($filename, 'tables');
258
259 my $dba = $self->cache->get_DBAdaptor($dbtype);
260 my $dbh = $dba->dbc->db_handle;
261 my $sth = $dbh->prepare("SELECT * FROM $table");
262 $sth->execute;
263
264 my $i = 0;
265
266 while (my @row = $sth->fetchrow_array) {
267 $i++;
268
269 # use '\N' for NULL values
270 for (my $j = 0; $j < scalar(@row); $j++) {
271 $row[$j] = '\N' unless (defined($row[$j]));
272 }
273
274 print $fh join("\t", @row);
275 print $fh "\n";
276 }
277
278 $sth->finish;
279
280 return $i;
281 }
282
283
284 =head2 upload_file_into_table
285
286 Arg[1] : String $dbtype - db type (source|target)
287 Arg[2] : String $table - name of table to upload the data to
288 Arg[3] : String $filename - name of dump file
289 Arg[4] : Boolean $no_check_empty - don't check if table is empty
290 Example : my $rows_uploaded = $object->upload_file_into_table('target',
291 'stable_id_event', 'stable_id_event_new.txt');
292 Description : Uploads a tab-delimited data file into a db table. The data file
293 will be taken from a subdirectory 'tables' under your configured
294 basedir. If the db table isn't empty and $no_check_empty isn't
295 set, no data is uploaded (and a warning is issued).
296 Return type : Int - the number of rows uploaded
297 Exceptions : thrown on wrong or missing arguments
298 Caller : general
299 Status : At Risk
300 : under development
301
302 =cut
303
304 sub upload_file_into_table {
305 my $self = shift;
306 my $dbtype = shift;
307 my $table = shift;
308 my $filename = shift;
309 my $no_check_empty = shift;
310
311 # argument check
312 unless ( ( $dbtype eq 'source' ) or ( $dbtype eq 'target' ) ) {
313 throw("Missing or unknown db type: $dbtype.");
314 }
315 throw("Need a table name.") unless ($table);
316 throw("Need a filename.") unless ($filename);
317
318 # sanity check for dry run
319 if ( $self->conf->param('dry_run') ) {
320 $self->logger->warning(
321 "dry_run - skipping db upload for $filename.\n");
322 return;
323 }
324
325 my $file =
326 join( '/', $self->conf->param('basedir'), 'tables', $filename );
327 my $r = 0;
328
329 if ( -s $file ) {
330
331 $self->logger->debug( "$file -> $table\n", 1 );
332
333 my $dba = $self->cache->get_DBAdaptor($dbtype);
334 my $dbh = $dba->dbc->db_handle;
335
336 my $idtable = 0;
337 if ( $table =~ /^([^_]+)_stable_id/ ) {
338 # This is a stable_id table we're working with.
339 $idtable = 1;
340 $table = $1;
341 }
342
343 # check table is empty
344 my ( $sql, $sth );
345 unless ($no_check_empty) {
346 if ($idtable) {
347 $sql =
348 qq(SELECT count(*) FROM $table WHERE stable_id IS NOT NULL);
349 }
350 else {
351 $sql = qq(SELECT count(*) FROM $table);
352 }
353 $sth = $dbh->prepare($sql);
354 $sth->execute;
355 my ($c) = $sth->fetchrow_array;
356 $sth->finish;
357
358 if ( $c > 0 ) {
359 if ($idtable) {
360 $self->logger->warning(
361 "Table $table contains $c stable IDs.\n",
362 1 );
363 }
364 else {
365 $self->logger->warning(
366 "Table $table not empty: found $c entries.\n",
367 1 );
368 }
369 $self->logger->info( "Data not uploaded!\n", 1 );
370 return $r;
371 }
372 } ## end unless ($no_check_empty)
373
374 # now upload the data
375 if ($idtable) {
376 # Create a temporary table, upload the data into it, and then
377 # update the main table.
378 $dbh->do(
379 qq( CREATE TABLE stable_id_$$ ( object_id INTEGER UNSIGNED,
380 stable_id VARCHAR(255),
381 version SMALLINT UNSIGNED,
382 created_date DATETIME,
383 modified_date DATETIME,
384 PRIMARY KEY(object_id) ) )
385 );
386
387 $dbh->do(
388 qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE stable_id_$$));
389
390 $dbh->do(
391 qq(
392 UPDATE $table, stable_id_$$
393 SET $table.stable_id=stable_id_$$.stable_id,
394 $table.version=stable_id_$$.version,
395 $table.created_date=stable_id_$$.created_date,
396 $table.modified_date=stable_id_$$.modified_date
397 WHERE $table.${table}_id = stable_id_$$.object_id )
398 );
399
400 $dbh->do(qq(DROP TABLE stable_id_$$));
401 } ## end if ($idtable)
402 else {
403 $dbh->do(qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE $table));
404 }
405 $dbh->do(qq(OPTIMIZE TABLE $table));
406
407 } ## end if ( -s $file )
408 else {
409 $self->logger->warning( "No data found in file $filename.\n", 1 );
410 }
411
412 return $r;
413 } ## end sub upload_file_into_table
414
415
416 =head2 logger
417
418 Arg[1] : (optional) Bio::EnsEMBL::Utils::Logger - the logger to set
419 Example : $object->logger->info("Starting ID mapping.\n");
420 Description : Getter/setter for logger object
421 Return type : Bio::EnsEMBL::Utils::Logger
422 Exceptions : none
423 Caller : constructor
424 Status : At Risk
425 : under development
426
427 =cut
428
429 sub logger {
430 my $self = shift;
431 $self->{'_logger'} = shift if (@_);
432 return $self->{'_logger'};
433 }
434
435
436 =head2 conf
437
438 Arg[1] : (optional) Bio::EnsEMBL::Utils::ConfParser - the configuration
439 to set
440 Example : my $basedir = $object->conf->param('basedir');
441 Description : Getter/setter for configuration object
442 Return type : Bio::EnsEMBL::Utils::ConfParser
443 Exceptions : none
444 Caller : constructor
445 Status : At Risk
446 : under development
447
448 =cut
449
450 sub conf {
451 my $self = shift;
452 $self->{'_conf'} = shift if (@_);
453 return $self->{'_conf'};
454 }
455
456
457 =head2 cache
458
459 Arg[1] : (optional) Bio::EnsEMBL::IdMapping::Cache - the cache to set
460 Example : $object->cache->read_from_file('source');
461 Description : Getter/setter for cache object
462 Return type : Bio::EnsEMBL::IdMapping::Cache
463 Exceptions : none
464 Caller : constructor
465 Status : At Risk
466 : under development
467
468 =cut
469
470 sub cache {
471 my $self = shift;
472 $self->{'_cache'} = shift if (@_);
473 return $self->{'_cache'};
474 }
475
476
477 1;
478