0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::IdMapping::BaseObject - base object for IdMapping objects
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 # this object isn't instantiated directly but rather extended
|
|
28 use Bio::EnsEMBL::IdMapping::BaseObject;
|
|
29 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 This is the base object for some of the objects used in the IdMapping
|
|
34 application. An object that extends BaseObject will have a ConfParser,
|
|
35 Logger and Cache object. BaseObject also implements some useful utility
|
|
36 functions related to file and db access.
|
|
37
|
|
38 This isn't very clean OO design but it's efficient and easy to use...
|
|
39
|
|
40 =head1 METHODS
|
|
41
|
|
42 new
|
|
43 get_filehandle
|
|
44 file_exists
|
|
45 fetch_value_from_db
|
|
46 dump_table_to_file
|
|
47 upload_file_into_table
|
|
48 logger
|
|
49 conf
|
|
50 cache
|
|
51
|
|
52 =cut
|
|
53
|
|
54
|
|
55 package Bio::EnsEMBL::IdMapping::BaseObject;
|
|
56
|
|
57 use strict;
|
|
58 use warnings;
|
|
59 no warnings 'uninitialized';
|
|
60
|
|
61 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
62 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
63 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
|
|
64
|
|
65
|
|
66 =head2 new
|
|
67
|
|
68 Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
|
|
69 Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
|
|
70 Arg [CACHE] : Bio::EnsEMBL::IdMapping::Cache $cache - a cache object
|
|
71 Example : my $object = Bio::EnsEMBL::IdMapping::BaseObjectSubclass->new(
|
|
72 -LOGGER => $logger,
|
|
73 -CONF => $conf,
|
|
74 -CACHE => $cache
|
|
75 );
|
|
76 Description : Constructor
|
|
77 Return type : implementing subclass type
|
|
78 Exceptions : thrown on wrong or missing arguments
|
|
79 Caller : general
|
|
80 Status : At Risk
|
|
81 : under development
|
|
82
|
|
83 =cut
|
|
84
|
|
85 sub new {
|
|
86 my $caller = shift;
|
|
87 my $class = ref($caller) || $caller;
|
|
88
|
|
89 my ($logger, $conf, $cache) = rearrange(['LOGGER', 'CONF', 'CACHE'], @_);
|
|
90
|
|
91 unless ($logger and ref($logger) and
|
|
92 $logger->isa('Bio::EnsEMBL::Utils::Logger')) {
|
|
93 throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
|
|
94 }
|
|
95
|
|
96 unless ($conf and ref($conf) and
|
|
97 $conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
|
|
98 throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
|
|
99 }
|
|
100
|
|
101 unless ($cache and ref($cache) and
|
|
102 $cache->isa('Bio::EnsEMBL::IdMapping::Cache')) {
|
|
103 throw("You must provide configuration as a Bio::EnsEMBL::IdMapping::Cache object.");
|
|
104 }
|
|
105
|
|
106 my $self = {};
|
|
107 bless ($self, $class);
|
|
108
|
|
109 # initialise
|
|
110 $self->logger($logger);
|
|
111 $self->conf($conf);
|
|
112 $self->cache($cache);
|
|
113
|
|
114 return $self;
|
|
115 }
|
|
116
|
|
117
|
|
118 =head2 get_filehandle
|
|
119
|
|
120 Arg[1] : String $filename - filename for filehandle
|
|
121 Arg[2] : String $path_append - append subdirectory name to basedir
|
|
122 Arg[3] : String $mode - filehandle mode (<|>|>>)
|
|
123 Example : my $fh = $object->get_filehandle('mapping_stats.txt', 'stats',
|
|
124 '>');
|
|
125 print $fh "Stats:\n";
|
|
126 Description : Returns a filehandle to a file for reading or writing. The file
|
|
127 is qualified with the basedir defined in the configuration and
|
|
128 an optional subdirectory name.
|
|
129 Return type : filehandle
|
|
130 Exceptions : thrown on missing filename
|
|
131 Caller : general
|
|
132 Status : At Risk
|
|
133 : under development
|
|
134
|
|
135 =cut
|
|
136
|
|
137 sub get_filehandle {
|
|
138 my $self = shift;
|
|
139 my $filename = shift;
|
|
140 my $path_append = shift;
|
|
141 my $mode = shift;
|
|
142
|
|
143 throw("Need a filename for this filehandle.") unless (defined($filename));
|
|
144
|
|
145 my $path = $self->conf->param('basedir');
|
|
146 $path = path_append($path, $path_append) if (defined($path_append));
|
|
147
|
|
148 $mode ||= '>';
|
|
149
|
|
150 open(my $fh, $mode, "$path/$filename") or
|
|
151 throw("Unable to open $path/$filename: $!");
|
|
152
|
|
153 return $fh;
|
|
154 }
|
|
155
|
|
156
|
|
157 =head2 file_exists
|
|
158
|
|
159 Arg[1] : String $filename - filename to test
|
|
160 Arg[2] : Boolean $path_append - turn on pre-pending of basedir
|
|
161 Example : unless ($object->file_exists('gene_mappings.ser', 1)) {
|
|
162 $object->do_gene_mapping;
|
|
163 }
|
|
164 Description : Tests if a file exists and has non-zero size.
|
|
165 Return type : Boolean
|
|
166 Exceptions : none
|
|
167 Caller : general
|
|
168 Status : At Risk
|
|
169 : under development
|
|
170
|
|
171 =cut
|
|
172
|
|
173 sub file_exists {
|
|
174 my $self = shift;
|
|
175 my $filename = shift;
|
|
176 my $path_append = shift;
|
|
177
|
|
178 my $path = $self->conf->param('basedir');
|
|
179 $path = path_append($path, $path_append) if (defined($path_append));
|
|
180
|
|
181 return (-s "$path/$filename");
|
|
182 }
|
|
183
|
|
184
|
|
185 =head2 fetch_value_from_db
|
|
186
|
|
187 Arg[1] : DBI::db $dbh - a DBI database handle
|
|
188 Arg[2] : String $sql - SQL statement to execute
|
|
189 Example : my $num_genes = $object->fetch_value_from_db($dbh,
|
|
190 'SELECT count(*) FROM gene');
|
|
191 Description : Executes an SQL statement on a db handle and returns the first
|
|
192 column of the first row returned. Useful for queries returning a
|
|
193 single value, like table counts.
|
|
194 Return type : Return type of SQL statement
|
|
195 Exceptions : thrown on wrong or missing arguments
|
|
196 Caller : general
|
|
197 Status : At Risk
|
|
198 : under development
|
|
199
|
|
200 =cut
|
|
201
|
|
202 sub fetch_value_from_db {
|
|
203 my $self = shift;
|
|
204 my $dbh = shift;
|
|
205 my $sql = shift;
|
|
206
|
|
207 throw("Need a db handle.") unless ($dbh and $dbh->isa('DBI::db'));
|
|
208 throw("Need an SQL query to execute.") unless ($sql);
|
|
209
|
|
210 my $sth = $dbh->prepare($sql);
|
|
211 $sth->execute;
|
|
212 my ($retval) = $sth->fetchrow_array;
|
|
213
|
|
214 return $retval;
|
|
215 }
|
|
216
|
|
217
|
|
218 =head2 dump_table_to_file
|
|
219
|
|
220 Arg[1] : String $dbtype - db type (source|target)
|
|
221 Arg[2] : String $table - name of table to dump
|
|
222 Arg[3] : String $filename - name of dump file
|
|
223 Arg[4] : Boolean $check_existing - turn on test for existing dump
|
|
224 Example : my $rows_dumped = $object->dump_table_to_file('source',
|
|
225 'stable_id_event', 'stable_id_event_existing.txt');
|
|
226 Description : Dumps the contents of a db table to a tab-delimited file. The
|
|
227 dump file will be written to a subdirectory called 'tables'
|
|
228 under the basedir from your configuration.
|
|
229 Return type : Int - the number of rows dumped
|
|
230 Exceptions : thrown on wrong or missing arguments
|
|
231 Caller : general
|
|
232 Status : At Risk
|
|
233 : under development
|
|
234
|
|
235 =cut
|
|
236
|
|
237 sub dump_table_to_file {
|
|
238 my $self = shift;
|
|
239 my $dbtype = shift;
|
|
240 my $table = shift;
|
|
241 my $filename = shift;
|
|
242 my $check_existing = shift;
|
|
243
|
|
244 # argument check
|
|
245 unless (($dbtype eq 'source') or ($dbtype eq 'target')) {
|
|
246 throw("Missing or unknown db type: $dbtype.");
|
|
247 }
|
|
248 throw("Need a table name.") unless ($table);
|
|
249 throw("Need a filename.") unless ($filename);
|
|
250
|
|
251 # conditionally check if table was already dumped
|
|
252 if ($check_existing and $self->file_exists($filename, 'tables')) {
|
|
253 $self->logger->info("$filename exists, won't dump again.\n");
|
|
254 return 0;
|
|
255 }
|
|
256
|
|
257 my $fh = $self->get_filehandle($filename, 'tables');
|
|
258
|
|
259 my $dba = $self->cache->get_DBAdaptor($dbtype);
|
|
260 my $dbh = $dba->dbc->db_handle;
|
|
261 my $sth = $dbh->prepare("SELECT * FROM $table");
|
|
262 $sth->execute;
|
|
263
|
|
264 my $i = 0;
|
|
265
|
|
266 while (my @row = $sth->fetchrow_array) {
|
|
267 $i++;
|
|
268
|
|
269 # use '\N' for NULL values
|
|
270 for (my $j = 0; $j < scalar(@row); $j++) {
|
|
271 $row[$j] = '\N' unless (defined($row[$j]));
|
|
272 }
|
|
273
|
|
274 print $fh join("\t", @row);
|
|
275 print $fh "\n";
|
|
276 }
|
|
277
|
|
278 $sth->finish;
|
|
279
|
|
280 return $i;
|
|
281 }
|
|
282
|
|
283
|
|
284 =head2 upload_file_into_table
|
|
285
|
|
286 Arg[1] : String $dbtype - db type (source|target)
|
|
287 Arg[2] : String $table - name of table to upload the data to
|
|
288 Arg[3] : String $filename - name of dump file
|
|
289 Arg[4] : Boolean $no_check_empty - don't check if table is empty
|
|
290 Example : my $rows_uploaded = $object->upload_file_into_table('target',
|
|
291 'stable_id_event', 'stable_id_event_new.txt');
|
|
292 Description : Uploads a tab-delimited data file into a db table. The data file
|
|
293 will be taken from a subdirectory 'tables' under your configured
|
|
294 basedir. If the db table isn't empty and $no_check_empty isn't
|
|
295 set, no data is uploaded (and a warning is issued).
|
|
296 Return type : Int - the number of rows uploaded
|
|
297 Exceptions : thrown on wrong or missing arguments
|
|
298 Caller : general
|
|
299 Status : At Risk
|
|
300 : under development
|
|
301
|
|
302 =cut
|
|
303
|
|
304 sub upload_file_into_table {
|
|
305 my $self = shift;
|
|
306 my $dbtype = shift;
|
|
307 my $table = shift;
|
|
308 my $filename = shift;
|
|
309 my $no_check_empty = shift;
|
|
310
|
|
311 # argument check
|
|
312 unless ( ( $dbtype eq 'source' ) or ( $dbtype eq 'target' ) ) {
|
|
313 throw("Missing or unknown db type: $dbtype.");
|
|
314 }
|
|
315 throw("Need a table name.") unless ($table);
|
|
316 throw("Need a filename.") unless ($filename);
|
|
317
|
|
318 # sanity check for dry run
|
|
319 if ( $self->conf->param('dry_run') ) {
|
|
320 $self->logger->warning(
|
|
321 "dry_run - skipping db upload for $filename.\n");
|
|
322 return;
|
|
323 }
|
|
324
|
|
325 my $file =
|
|
326 join( '/', $self->conf->param('basedir'), 'tables', $filename );
|
|
327 my $r = 0;
|
|
328
|
|
329 if ( -s $file ) {
|
|
330
|
|
331 $self->logger->debug( "$file -> $table\n", 1 );
|
|
332
|
|
333 my $dba = $self->cache->get_DBAdaptor($dbtype);
|
|
334 my $dbh = $dba->dbc->db_handle;
|
|
335
|
|
336 my $idtable = 0;
|
|
337 if ( $table =~ /^([^_]+)_stable_id/ ) {
|
|
338 # This is a stable_id table we're working with.
|
|
339 $idtable = 1;
|
|
340 $table = $1;
|
|
341 }
|
|
342
|
|
343 # check table is empty
|
|
344 my ( $sql, $sth );
|
|
345 unless ($no_check_empty) {
|
|
346 if ($idtable) {
|
|
347 $sql =
|
|
348 qq(SELECT count(*) FROM $table WHERE stable_id IS NOT NULL);
|
|
349 }
|
|
350 else {
|
|
351 $sql = qq(SELECT count(*) FROM $table);
|
|
352 }
|
|
353 $sth = $dbh->prepare($sql);
|
|
354 $sth->execute;
|
|
355 my ($c) = $sth->fetchrow_array;
|
|
356 $sth->finish;
|
|
357
|
|
358 if ( $c > 0 ) {
|
|
359 if ($idtable) {
|
|
360 $self->logger->warning(
|
|
361 "Table $table contains $c stable IDs.\n",
|
|
362 1 );
|
|
363 }
|
|
364 else {
|
|
365 $self->logger->warning(
|
|
366 "Table $table not empty: found $c entries.\n",
|
|
367 1 );
|
|
368 }
|
|
369 $self->logger->info( "Data not uploaded!\n", 1 );
|
|
370 return $r;
|
|
371 }
|
|
372 } ## end unless ($no_check_empty)
|
|
373
|
|
374 # now upload the data
|
|
375 if ($idtable) {
|
|
376 # Create a temporary table, upload the data into it, and then
|
|
377 # update the main table.
|
|
378 $dbh->do(
|
|
379 qq( CREATE TABLE stable_id_$$ ( object_id INTEGER UNSIGNED,
|
|
380 stable_id VARCHAR(255),
|
|
381 version SMALLINT UNSIGNED,
|
|
382 created_date DATETIME,
|
|
383 modified_date DATETIME,
|
|
384 PRIMARY KEY(object_id) ) )
|
|
385 );
|
|
386
|
|
387 $dbh->do(
|
|
388 qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE stable_id_$$));
|
|
389
|
|
390 $dbh->do(
|
|
391 qq(
|
|
392 UPDATE $table, stable_id_$$
|
|
393 SET $table.stable_id=stable_id_$$.stable_id,
|
|
394 $table.version=stable_id_$$.version,
|
|
395 $table.created_date=stable_id_$$.created_date,
|
|
396 $table.modified_date=stable_id_$$.modified_date
|
|
397 WHERE $table.${table}_id = stable_id_$$.object_id )
|
|
398 );
|
|
399
|
|
400 $dbh->do(qq(DROP TABLE stable_id_$$));
|
|
401 } ## end if ($idtable)
|
|
402 else {
|
|
403 $dbh->do(qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE $table));
|
|
404 }
|
|
405 $dbh->do(qq(OPTIMIZE TABLE $table));
|
|
406
|
|
407 } ## end if ( -s $file )
|
|
408 else {
|
|
409 $self->logger->warning( "No data found in file $filename.\n", 1 );
|
|
410 }
|
|
411
|
|
412 return $r;
|
|
413 } ## end sub upload_file_into_table
|
|
414
|
|
415
|
|
416 =head2 logger
|
|
417
|
|
418 Arg[1] : (optional) Bio::EnsEMBL::Utils::Logger - the logger to set
|
|
419 Example : $object->logger->info("Starting ID mapping.\n");
|
|
420 Description : Getter/setter for logger object
|
|
421 Return type : Bio::EnsEMBL::Utils::Logger
|
|
422 Exceptions : none
|
|
423 Caller : constructor
|
|
424 Status : At Risk
|
|
425 : under development
|
|
426
|
|
427 =cut
|
|
428
|
|
429 sub logger {
|
|
430 my $self = shift;
|
|
431 $self->{'_logger'} = shift if (@_);
|
|
432 return $self->{'_logger'};
|
|
433 }
|
|
434
|
|
435
|
|
436 =head2 conf
|
|
437
|
|
438 Arg[1] : (optional) Bio::EnsEMBL::Utils::ConfParser - the configuration
|
|
439 to set
|
|
440 Example : my $basedir = $object->conf->param('basedir');
|
|
441 Description : Getter/setter for configuration object
|
|
442 Return type : Bio::EnsEMBL::Utils::ConfParser
|
|
443 Exceptions : none
|
|
444 Caller : constructor
|
|
445 Status : At Risk
|
|
446 : under development
|
|
447
|
|
448 =cut
|
|
449
|
|
450 sub conf {
|
|
451 my $self = shift;
|
|
452 $self->{'_conf'} = shift if (@_);
|
|
453 return $self->{'_conf'};
|
|
454 }
|
|
455
|
|
456
|
|
457 =head2 cache
|
|
458
|
|
459 Arg[1] : (optional) Bio::EnsEMBL::IdMapping::Cache - the cache to set
|
|
460 Example : $object->cache->read_from_file('source');
|
|
461 Description : Getter/setter for cache object
|
|
462 Return type : Bio::EnsEMBL::IdMapping::Cache
|
|
463 Exceptions : none
|
|
464 Caller : constructor
|
|
465 Status : At Risk
|
|
466 : under development
|
|
467
|
|
468 =cut
|
|
469
|
|
470 sub cache {
|
|
471 my $self = shift;
|
|
472 $self->{'_cache'} = shift if (@_);
|
|
473 return $self->{'_cache'};
|
|
474 }
|
|
475
|
|
476
|
|
477 1;
|
|
478
|