0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Utils::SqlHelper
|
|
24
|
|
25 =head1 VERSION
|
|
26
|
|
27 $Revision: 1.25 $
|
|
28
|
|
29 =head1 SYNOPSIS
|
|
30
|
|
31 use Bio::EnsEMBL::Utils::SqlHelper;
|
|
32
|
|
33 my $helper =
|
|
34 Bio::EnsEMBL::Utils::SqlHelper->new( -DB_CONNECTION => $dbc );
|
|
35
|
|
36 my $arr_ref = $helper->execute(
|
|
37 -SQL => 'select name, age from tab where col =?',
|
|
38 -CALLBACK => sub {
|
|
39 my @row = @{ shift @_ };
|
|
40 return { name => $row[0], age => $row[1] };
|
|
41 },
|
|
42 -PARAMS => ['A'] );
|
|
43
|
|
44 use Data::Dumper;
|
|
45 print Dumper($arr_ref), "\n";
|
|
46 # Prints out [name=>'name', age=>1] maybe ....
|
|
47
|
|
48
|
|
49 # For transactional work; only works if your MySQL table
|
|
50 # engine/database supports transactional work (such as InnoDB)
|
|
51
|
|
52 $helper->transaction(
|
|
53 -CALLBACK => sub {
|
|
54 if ( $helper->execute_single_result(
|
|
55 -SQL => 'select count(*) from tab'
|
|
56 ) )
|
|
57 {
|
|
58 return $helper->execute_update('delete from tab');
|
|
59 } else {
|
|
60 return
|
|
61 $helper->batch( -SQL => 'insert into tab (?,?)',
|
|
62 -DATA => [ [ 1, 2 ], [ 1, 3 ], [ 1, 4 ] ] );
|
|
63 }
|
|
64 } );
|
|
65
|
|
66 =head1 DESCRIPTION
|
|
67
|
|
68 Easier database interaction
|
|
69
|
|
70 =head1 METHODS
|
|
71
|
|
72 See subrotuines.
|
|
73
|
|
74 =cut
|
|
75
|
|
76 package Bio::EnsEMBL::Utils::SqlHelper;
|
|
77
|
|
78 use warnings;
|
|
79 use strict;
|
|
80
|
|
81 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
82 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
|
|
83 use Bio::EnsEMBL::Utils::Exception qw(throw);
|
|
84 use Bio::EnsEMBL::Utils::Iterator;
|
|
85 use English qw( -no_match_vars ); #Used for $PROCESS_ID
|
|
86 use Scalar::Util qw(weaken); #Used to not hold a strong ref to DBConnection
|
|
87
|
|
88 =pod
|
|
89
|
|
90 =head2 new()
|
|
91
|
|
92 Arg [DB_CONNECTION] : Bio::EnsEMBL::DBSQL::DBConnection $db_connection
|
|
93 Returntype : Instance of helper
|
|
94 Exceptions : If the object given as a DBConnection is not one or it
|
|
95 was undefined
|
|
96 Status : Stable
|
|
97
|
|
98 Creates a new instance of this object.
|
|
99
|
|
100 my $dba = get_dba('mydb'); # New DBAdaptor from somewhere
|
|
101 my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(
|
|
102 -DB_CONNECTION => $dba->dbc() );
|
|
103
|
|
104 $helper->execute_update( -SQL => 'update tab set flag=?',
|
|
105 -PARAMS => [1] );
|
|
106
|
|
107 =cut
|
|
108
|
|
109 sub new {
|
|
110 my ( $class, @args ) = @_;
|
|
111
|
|
112 my ($db_connection) = rearrange([qw(db_connection)], @args);
|
|
113
|
|
114 my $self = bless( {}, ref($class) || $class );
|
|
115 throw('-DB_CONNECTION construction parameter was undefined.')
|
|
116 unless defined $db_connection;
|
|
117 $self->db_connection($db_connection);
|
|
118
|
|
119 return $self;
|
|
120 }
|
|
121
|
|
122 =pod
|
|
123
|
|
124 =head2 db_connection()
|
|
125
|
|
126 Arg [1] : Bio::EnsEMBL::DBSQL::DBConnection $db_connection
|
|
127 Description : Sets and retrieves the DBConnection
|
|
128 Returntype : Bio::EnsEMBL::DBSQL::DBConnection
|
|
129 Exceptions : If the object given as a DBConnection is not one or if an
|
|
130 attempt is made to set the value more than once
|
|
131 Status : Stable
|
|
132
|
|
133 =cut
|
|
134
|
|
135 sub db_connection {
|
|
136 my ($self, $db_connection) = @_;
|
|
137 if(defined $db_connection) {
|
|
138 if(exists $self->{db_connection}) {
|
|
139 throw('Cannot reset the DBConnection object; already defined ');
|
|
140 }
|
|
141 assert_ref($db_connection, 'Bio::EnsEMBL::DBSQL::DBConnection', 'db_connection');
|
|
142 $self->{db_connection} = $db_connection;
|
|
143 weaken $self->{db_connection};
|
|
144 }
|
|
145 return $self->{db_connection};
|
|
146 }
|
|
147
|
|
148 # --------- SQL Methods
|
|
149
|
|
150 =pod
|
|
151
|
|
152 =head2 execute() - Execute a SQL statement with a custom row handler
|
|
153
|
|
154 Arg [SQL] : string SQL to execute
|
|
155 Arg [CALLBACK] : CodeRef; The callback to use for mapping a row to a data
|
|
156 point; leave blank for a default mapping to a 2D array
|
|
157 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned
|
|
158 to the callback & not ArrayRefs
|
|
159 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
|
|
160 Arg [PREPARE_PARAMS] : boolean Parameters to be passed onto the Statement Handle
|
|
161 prepare call
|
|
162 Arg [ITERATOR] : boolean Request a L<Bio::EnsEMBL::Utils::Iterator>
|
|
163 rather than a 2D array
|
|
164 Returntype : ArrayRef/L<Bio::EnsEMBL::Utils::Iterator>
|
|
165 Exceptions : If errors occur in the execution of the SQL
|
|
166 Status : Stable
|
|
167
|
|
168 my $arr_ref = $helper->execute(
|
|
169 -SQL => 'select a,b,c from tab where col =?',
|
|
170 -CALLBACK => sub {
|
|
171 my @row = @{ shift @_ };
|
|
172 return { A => $row[0], B => $row[1], C => $row[2] };
|
|
173 },
|
|
174 -PARAMS => ['A'] );
|
|
175
|
|
176 #Or with hashrefs
|
|
177 my $arr_ref = $helper->execute(
|
|
178 -SQL => 'select a,b,c from tab where col =?',
|
|
179 -USE_HASHREFS => 1,
|
|
180 -CALLBACK => sub {
|
|
181 my $row = shift @_;
|
|
182 return { A => $row->{a}, B => $row->{b}, C => $row->{c} };
|
|
183 },
|
|
184 -PARAMS => ['A'] );
|
|
185
|
|
186 Uses a callback defined by the C<sub> decalaration. Here we specify how
|
|
187 the calling code will deal with each row of a database's result set. The
|
|
188 sub can return any type of Object/hash/data structure you require.
|
|
189
|
|
190 Should you not specify a callback then a basic one will be assigned to
|
|
191 you which will return a 2D array structure e.g.
|
|
192
|
|
193 my $arr_ref = $helper->execute(
|
|
194 -SQL => 'select a,b,c from tab where col =?',
|
|
195 -PARAMS => ['A'] );
|
|
196
|
|
197 This is equivalent to DBI's c<selectall_arrayref()> subroutine.
|
|
198
|
|
199 As an extension to this method you can write a closure subroutine which
|
|
200 takes in two parameters. The first is the array/hash reference & the
|
|
201 second is the statement handle used to execute. 99% of the time you will
|
|
202 not need it but there are occasions where you do need it. An example of
|
|
203 usage would be:
|
|
204
|
|
205 my $conn = get_conn(); #From somwewhere
|
|
206 my $arr_ref = $conn->execute(
|
|
207 -SQL => 'select a,b,c from tab where col =?',
|
|
208 -USE_HASHREFS => 1,
|
|
209 -CALLBACK => sub {
|
|
210 my ( $row, $sth ) = @_;
|
|
211 #Then do something with sth
|
|
212 return { A => $row->[0], B => $row->[1], C => $row->[2] };
|
|
213 },
|
|
214 -PARAMS => ['A'] );
|
|
215
|
|
216 Any arguments to bind to the incoming statement. This can be a set of scalars
|
|
217 or a 2D array if you need to specify any kind of types of sql objects i.e.
|
|
218
|
|
219 use DBI qw(:sql_types);
|
|
220
|
|
221 my $conn = get_conn();
|
|
222 my $arr_ref = $conn->execute(
|
|
223 -SQL =>
|
|
224 'select a,b,c from tab where col =? and num_col=? and other=?',
|
|
225 -USE_HASHREFS => 1,
|
|
226 -CALLBACK => sub {
|
|
227 my @row = @{ shift @_ };
|
|
228 return { A => $row[0], B => $row[1], C => $row[2] };
|
|
229 },
|
|
230 -PARAMS => [ '1', SQL_VARCHAR ],
|
|
231 [ 2, SQL_INTEGER ],
|
|
232 'hello' );
|
|
233
|
|
234 Here we import DBI's sql types into our package and then pass in
|
|
235 multiple anonymous array references as parameters. Each param is
|
|
236 tested in the input and if it is detected to be an ARRAY reference we
|
|
237 dereference the array and run DBI's bind_param method. In fact you can
|
|
238 see each part of the incoming paramaters array as the contents to call
|
|
239 C<bind_param> with. The only difference is the package tracks the bind
|
|
240 position for you.
|
|
241
|
|
242 We can get back a L<Bio::EnsEMBL::Utils::Iterator> object which can be used
|
|
243 to iterate over the results set without first materializing the data into
|
|
244 memory. An example would be:
|
|
245
|
|
246 my $iterator = $helper->execute(
|
|
247 -SQL => 'select a,b,c from tab where col =?',
|
|
248 -PARAMS => ['A']
|
|
249 -ITERATOR => 1);
|
|
250 while($iterator->has_next()) {
|
|
251 my $row = $iterator->next();
|
|
252 #Do something
|
|
253 }
|
|
254
|
|
255 This is very useful for very large datasets.
|
|
256
|
|
257 =cut
|
|
258
|
|
259 sub execute {
|
|
260 my ( $self, @args ) = @_;
|
|
261 my ($sql, $callback, $use_hashrefs, $params, $prepare_params, $iterator) =
|
|
262 rearrange([qw(sql callback use_hashrefs params prepare_params iterator)], @args);
|
|
263 my $has_return = 1;
|
|
264
|
|
265 #If no callback then we execute using a default one which returns a 2D array
|
|
266 if(!defined $callback) {
|
|
267 throw('Cannot use fetchrow_hashref() with default mappers. Turn off this option') if $use_hashrefs;
|
|
268 $callback = $self->_mappers()->{array_ref};
|
|
269 }
|
|
270
|
|
271 return $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator );
|
|
272 }
|
|
273
|
|
274 =pod
|
|
275
|
|
276 =head2 execute_simple()
|
|
277
|
|
278 Arg [SQL] : string $sql
|
|
279 Arg [PARAMS] : ArrayRef $params
|
|
280 Arg [CALLBACK] : CodeRef $callback
|
|
281 Returntype : ArrayRef of 1D elements
|
|
282 Exceptions : If errors occur in the execution of the SQL
|
|
283 Status : Stable
|
|
284
|
|
285 my $classification =
|
|
286 $helper->execute_simple(
|
|
287 -SQL =>
|
|
288 'select meta_val from meta where meta_key =? order by meta_id',
|
|
289 -PARAMS => ['species.classification'] );
|
|
290
|
|
291 Identical to C<execute> except you do not specify a sub-routine reference.
|
|
292 Using this code assumes you want an array of single scalar values as returned
|
|
293 by the given SQL statement.
|
|
294
|
|
295 =cut
|
|
296
|
|
297 sub execute_simple {
|
|
298 my ( $self, @args ) = @_;
|
|
299 my ($sql, $params, $callback) = rearrange([qw(sql params callback)], @args);
|
|
300 my $has_return = 1;
|
|
301 my $use_hashrefs = 0;
|
|
302 $callback ||= $self->_mappers()->{first_element};
|
|
303 return $self->_execute($sql, $callback, $has_return, $use_hashrefs, $params);
|
|
304 }
|
|
305
|
|
306 =pod
|
|
307
|
|
308 =head2 execute_no_return()
|
|
309
|
|
310 Arg [SQL] : string sql
|
|
311 Arg [CALLBACK] : CodeRef The callback to use for mapping a row to a data point;
|
|
312 we assume you are assigning into a data structure which
|
|
313 has requirements other than simple translation into an
|
|
314 array
|
|
315 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned
|
|
316 to the callback & not ArrayRefs
|
|
317 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
|
|
318 Returntype : None
|
|
319 Exceptions : If errors occur in the execution of the SQL
|
|
320 Status : Stable
|
|
321
|
|
322 Whilst all other execute methods will return something; this assumes that the
|
|
323 given mapper subroutine will be performing the business of placing values
|
|
324 somewhere or doing something with them.
|
|
325
|
|
326 There is a huge temptation to nest queries using this method; do not! Execute
|
|
327 the values into an array using one of the other methods then run your subqueries
|
|
328 on them; or make a better first query. SQL is flexible; so use it.
|
|
329
|
|
330 =cut
|
|
331
|
|
332 sub execute_no_return {
|
|
333 my ( $self, @args ) = @_;
|
|
334 my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args);
|
|
335 throw('No callback defined but this is a required parameter for execute_no_return()') if ! $callback;
|
|
336 my $has_return = 0;
|
|
337 my $prepare_params = [];
|
|
338 $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params);
|
|
339 return;
|
|
340 }
|
|
341
|
|
342 =pod
|
|
343
|
|
344 =head2 execute_into_hash()
|
|
345
|
|
346 Arg [SQL] : string $sql
|
|
347 Arg [CALLBACK] : CodeRef The callback to use for mapping to a value in a hash
|
|
348 keyed by the first element in your result set;
|
|
349 leave blank for a default mapping to a scalar value
|
|
350 of the second element
|
|
351 Arg [PARAMS] : The binding parameters to the SQL statement
|
|
352 Returntype : HashRef keyed by column 1 & value is the return of callback
|
|
353 Exceptions : If errors occur in the execution of the SQL
|
|
354 Status : Stable
|
|
355
|
|
356 A variant of the execute methods but rather than returning a list of
|
|
357 mapped results this will assume the first column of a returning map &
|
|
358 the calling subroutine will map the remainder of your return as the
|
|
359 hash's key.
|
|
360
|
|
361 B<This code can handle simple queries to hashes, complex value mappings
|
|
362 and repeated mappings for the same key>.
|
|
363
|
|
364 For example:
|
|
365
|
|
366 my $sql = 'select key, one, two from table where something =?';
|
|
367 my $mapper = sub {
|
|
368 my ( $row, $value ) = @_;
|
|
369 #Ignore field 0 as that is being used for the key
|
|
370 my $obj = Some::Obj->new( one => $row->[1], two => $row->[2] );
|
|
371 return $obj;
|
|
372 };
|
|
373
|
|
374 my $hash =
|
|
375 $helper->execute_into_hash( -SQL => $sql,
|
|
376 -CALLBACK => $mapper,
|
|
377 -PARAMS => ['val'] );
|
|
378
|
|
379 #Or for a more simple usage
|
|
380 my $sql = 'select biotype, count(gene_id) from gene group by biotype';
|
|
381 my $biotype_hash = $conn->execute_into_hash( -SQL => $sql );
|
|
382 print $biotype_hash->{protein_coding} || 0, "\n";
|
|
383
|
|
384 The basic pattern assumes a scenario where you are mapping in a one
|
|
385 key to one value. For more advanced mapping techniques you can use the
|
|
386 second value passed to the subroutine paramater set. This is shown as
|
|
387 C<$value> in the above examples. This value is what is found in the HASH
|
|
388 being populated in the background. So on the first time you encounter it
|
|
389 for the given key it will be undefined. For future invocations it will
|
|
390 be set to the value you gave it. This allows us to setup code like the
|
|
391 following
|
|
392
|
|
393 my %args = ( -SQL => 'select meta_key, meta_value from meta '
|
|
394 . 'where meta_key =? order by meta_id',
|
|
395 -PARAMS => ['species.classification'] );
|
|
396
|
|
397 my $hash = $helper->execute_into_hash(
|
|
398 %args,
|
|
399 -CALLBACK => sub {
|
|
400 my ( $row, $value ) = @_;
|
|
401 $value = [] if !defined $value;
|
|
402 push( @{$value}, $row->[1] );
|
|
403 return $value;
|
|
404 } );
|
|
405
|
|
406 #OR
|
|
407
|
|
408 $hash = $helper->execute_into_hash(
|
|
409 %args,
|
|
410 -CALLBACK => sub {
|
|
411 my ( $row, $value ) = @_;
|
|
412 if ( defined $value ) {
|
|
413 push( @{$value}, $row->[1] );
|
|
414 return;
|
|
415 }
|
|
416 my $new_value = [ $row->[1] ];
|
|
417 return $new_value;
|
|
418 } );
|
|
419
|
|
420 The code understands that returning a defined value means to push this
|
|
421 value into the background hash. In example one we keep on re-inserting
|
|
422 the Array of classifications into the hash. Example two shows an early
|
|
423 return from the callback which indicates to the code we do not have any
|
|
424 value to re-insert into the hash. Of the two methods example one is
|
|
425 clearer but is possibliy slower.
|
|
426
|
|
427 B<Remember that the row you are given is the full row & not a view of
|
|
428 the reminaing fields.> Therefore indexing for the data you are concerned
|
|
429 with begins at position 1.
|
|
430
|
|
431 =cut
|
|
432
|
|
433 sub execute_into_hash {
|
|
434 my ( $self, @args ) = @_;
|
|
435 my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args);
|
|
436 my $hash = {};
|
|
437
|
|
438 #If no callback then we execute using a default one which sets value to 2nd element
|
|
439 if(!defined $callback) {
|
|
440 $callback = $self->_mappers()->{second_element};
|
|
441 }
|
|
442
|
|
443 #Default mapper uses the 1st key + something else from the mapper
|
|
444 my $mapper = sub {
|
|
445 my $row = shift @_;
|
|
446 my $key = $row->[0];
|
|
447 my $value = $hash->{$key};
|
|
448 my $new_value = $callback->($row, $value);
|
|
449 if(defined $new_value) {
|
|
450 $hash->{ $key } = $new_value;
|
|
451 }
|
|
452 return;
|
|
453 };
|
|
454
|
|
455 $self->execute_no_return(
|
|
456 -SQL => $sql,
|
|
457 -CALLBACK => $mapper,
|
|
458 -PARAMS => $params
|
|
459 );
|
|
460
|
|
461 return $hash;
|
|
462 }
|
|
463
|
|
464 =pod
|
|
465
|
|
466 =head2 execute_single_result()
|
|
467
|
|
468 Arg [SQL] : string $sql
|
|
469 Arg [CALLBACK] : CodeRef The callback to use for mapping a row to a data point;
|
|
470 leave blank for a default scalar mapping
|
|
471 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned
|
|
472 to the callback & not ArrayRefs
|
|
473 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
|
|
474 Returntype : Scalar
|
|
475 Exceptions : If errors occur in the execution of the SQL, if the query
|
|
476 returned more than 1 row and if we found no rows.
|
|
477 Status : Stable
|
|
478
|
|
479 my $meta_count =
|
|
480 $helper->execute_single_result(
|
|
481 -SQL => 'select count(*) from meta where species_id =?',
|
|
482 -PARAMS => [1] );
|
|
483
|
|
484 Very similar to C<execute()> except it will raise an exception if we have more
|
|
485 or less than one row returned
|
|
486
|
|
487 =cut
|
|
488
|
|
489 sub execute_single_result {
|
|
490 my ( $self, @args ) = @_;
|
|
491 my ($sql, $callback, $use_hashrefs, $params) = rearrange(
|
|
492 [qw(sql callback use_hashrefs params)], @args);
|
|
493
|
|
494 my $results = $self->execute_simple(
|
|
495 -SQL => $sql,
|
|
496 -CALLBACK => $callback,
|
|
497 -USE_HASHREFS => $use_hashrefs,
|
|
498 -PARAMS => $params
|
|
499 );
|
|
500
|
|
501 my $result_count = scalar(@{$results});
|
|
502 if($result_count != 1) {
|
|
503 $params = [] if ! $params;
|
|
504 my $type = ($result_count == 0) ? 'No' : 'Too many';
|
|
505 my $msg = "${type} results returned. Expected 1 but got $result_count for query '${sql}' with params [";
|
|
506 $msg .= join( ',', map {(defined $_) ? $_ : '-undef-';} @{$params} );
|
|
507 $msg .= ']';
|
|
508 throw($msg);
|
|
509 }
|
|
510 return $results->[0];
|
|
511 }
|
|
512
|
|
513 =pod
|
|
514
|
|
515 =head2 transaction()
|
|
516
|
|
517 Arg [CALLBACK] : CodeRef The callback used for transaction isolation; once
|
|
518 the subroutine exists the code will decide on rollback
|
|
519 or commit. Required
|
|
520 Arg [RETRY] : integer the number of retries to attempt with this
|
|
521 transactional block. Defaults to 0.
|
|
522 Arg [PAUSE] : integer the time in seconds to pause in-between retries.
|
|
523 Defaults to 1.
|
|
524 Arg [CONDITION] : CodeRef allows you to inspect the exception raised
|
|
525 and should your callback return true then the
|
|
526 retry will be attempted. If not given then all
|
|
527 exceptions mean attempt a retry (if specified)
|
|
528 Returntype : Return of the callback
|
|
529 Exceptions : If errors occur in the execution of the SQL
|
|
530 Status : Stable
|
|
531
|
|
532 my $val = $helper->transaction(
|
|
533 -CALLBACK => sub {
|
|
534 my ($dbc) = @_;
|
|
535 #Do something
|
|
536 return 1;
|
|
537 } );
|
|
538
|
|
539 #Or because of the arguments method we use
|
|
540 my $val = $helper->transaction(
|
|
541 sub {
|
|
542 my ($dbc) = @_;
|
|
543 #Do something
|
|
544 return 1;
|
|
545 } );
|
|
546
|
|
547 Creates a transactional block which will ensure that the connection is
|
|
548 committed when your submmited subroutine has finished or will rollback
|
|
549 in the event of an error occuring in your block.
|
|
550
|
|
551 The code will always force AutoCommit off but will restore it to its
|
|
552 previous setting. If your DBI/DBD driver does not support manual
|
|
553 commits then this code will break. The code will turn off the
|
|
554 C<disconnect_when_idle()> method to allow transactions to work as
|
|
555 expected.
|
|
556
|
|
557 An effect of using REPEATABLE READ transaction isolation (InnoDB's
|
|
558 default) is that your data is as fresh as when you started your current
|
|
559 transaction. To ensure the freshest data use C<SELECT ... from ... LOCK
|
|
560 IN SHARE MODE> or C<SELECT ... from ... LOCK FOR UPDATE> if you are
|
|
561 going to issue updates.
|
|
562
|
|
563 Creating a transaction within a transaction results in the commit
|
|
564 rollback statements occuring in the top level transaction. That way any
|
|
565 block of code which is meant to to be transaction can be wrapped in
|
|
566 this block ( assuming the same instance of SQLHelper is passed around &
|
|
567 used).
|
|
568
|
|
569 You can also request the retry of a transactional block of code which is
|
|
570 causing problems. This is not a perfect solution as it indicates your
|
|
571 programming model is broken. This mode can be specified as such:
|
|
572
|
|
573 my $val = $helper->transaction(
|
|
574 -RETRY => 3, -PAUSE => 2,
|
|
575 -CALLBACK => sub {
|
|
576 my ($dbc) = @_;
|
|
577 #Do something
|
|
578 return 1;
|
|
579 } );
|
|
580
|
|
581 The C<-RETRY> argument indicates the number of times we attempt the transaction
|
|
582 and C<-PAUSE> indicates the time in-between attempts. These retries will
|
|
583 only occur in the root transaction block i.e. you cannot influence the
|
|
584 retry system in a sub transaction. You can influence if the retry is done with
|
|
585 the C<-CONDITION> argument which accepts a Code reference (same as the
|
|
586 C<-CALLBACK> parameter). This allows you to inspect the error thrown to
|
|
587 retry only in some situations e.g.
|
|
588
|
|
589 my $val = $helper->transaction(
|
|
590 -RETRY => 3, -PAUSE => 2,
|
|
591 -CALLBACK => sub {
|
|
592 my ($dbc) = @_;
|
|
593 #Do something
|
|
594 return 1;
|
|
595 },
|
|
596 -CONDITION => sub {
|
|
597 my ($error) = @_;
|
|
598 return ( $error =~ /deadlock/ ) ? 1 : 0;
|
|
599 }
|
|
600 );
|
|
601
|
|
602 Here we attempt a transaction and will B<only> retry when we have an error
|
|
603 with the phrase deadlock.
|
|
604
|
|
605 =cut
|
|
606
|
|
607 sub transaction {
|
|
608 my ($self, @args) = @_;
|
|
609
|
|
610 my ($callback, $retry, $pause, $condition) = rearrange([qw(callback retry pause condition)], @args);
|
|
611
|
|
612 throw('-CALLBACK was not a CodeRef. Got a reference of type ['.ref($callback).']. Check your parameters')
|
|
613 unless check_ref($callback, 'CODE');
|
|
614
|
|
615 #Setup defaults
|
|
616 $retry = 0 unless defined $retry;
|
|
617 $pause = 1 unless defined $pause;
|
|
618 if(! defined $condition) {
|
|
619 $condition = sub {
|
|
620 return 1;
|
|
621 };
|
|
622 }
|
|
623
|
|
624 assert_ref($condition, 'CODE', '-CONDITION');
|
|
625
|
|
626 my $dbc = $self->db_connection();
|
|
627 my $original_dwi;
|
|
628 my $ac;
|
|
629
|
|
630 my $error;
|
|
631 my $result;
|
|
632
|
|
633 #If we were already in a transaction then we do not do any management of the
|
|
634 #session & wait for the parent transaction(s) to finish
|
|
635 my $perform_transaction = $self->_perform_transaction_code();
|
|
636 if($perform_transaction) {
|
|
637 ($original_dwi, $ac) = $self->_enable_transaction();
|
|
638 }
|
|
639 else {
|
|
640 #If we were in a transaction then ignore any attempts at retry here
|
|
641 $retry = 0;
|
|
642 }
|
|
643
|
|
644 for(my $iteration = 0; $iteration <= $retry; $iteration++) {
|
|
645 eval {
|
|
646 $result = $callback->($dbc);
|
|
647 $dbc->db_handle()->commit() if $perform_transaction;
|
|
648 };
|
|
649 $error = $@;
|
|
650 #If we were allowed to deal with the error then we apply rollbacks & then
|
|
651 #retry or leave to the remainder of the code to throw
|
|
652 if($perform_transaction && $error) {
|
|
653 eval { $dbc->db_handle()->rollback(); };
|
|
654 #If we were not on our last iteration then warn & allow the retry
|
|
655 if($iteration != $retry) {
|
|
656 if($condition->($error)) {
|
|
657 warn("Encountered error on attempt ${iteration} of ${retry} and have issued a rollback. Will retry after sleeping for $pause second(s): $error");
|
|
658 sleep $pause;
|
|
659 }
|
|
660 else {
|
|
661 last; #break early if condition of error was not matched
|
|
662 }
|
|
663 }
|
|
664 }
|
|
665
|
|
666 #Always break the loop if we had a successful attempt
|
|
667 last if ! $error;
|
|
668 }
|
|
669
|
|
670 if($perform_transaction) {
|
|
671 $self->_disable_transaction($original_dwi, $ac);
|
|
672 }
|
|
673
|
|
674 throw("ABORT: Transaction aborted because of error: ${error}") if $error;
|
|
675
|
|
676 return $result;
|
|
677 }
|
|
678
|
|
679 =pod
|
|
680
|
|
681 =head2 execute_update()
|
|
682
|
|
683 Arg [SQL] : string $sql
|
|
684 Arg [CALLBACK] : CodeRef The callback to use for calling methods on the
|
|
685 DBI statement handle or DBConnection object after an
|
|
686 update command
|
|
687 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
|
|
688 Arg [PREPARE_PARAMS] : ArrayRef Parameters to bind to the prepare() StatementHandle call
|
|
689 Returntype : Number of rows affected
|
|
690 Exceptions : If errors occur in the execution of the SQL
|
|
691 Status : Stable
|
|
692
|
|
693 Used for performing updates but conforms to the normal execute statement
|
|
694 subroutines.
|
|
695
|
|
696 use DBI qw(:sql_types);
|
|
697 $helper->execute_update(-SQL => 'update tab set name = ? where id =?',
|
|
698 -PARAMS => [ 'andy', [ 1, SQL_INTEGER ] ] );
|
|
699
|
|
700 If you need to do something a bit more advanced with your DML then you can
|
|
701 give the method a closure and this will be called after the execute has been
|
|
702 issued i.e.
|
|
703
|
|
704 my $obj;
|
|
705 $helper->execute_update(
|
|
706 -SQL => 'insert into tab (name) values(?)',
|
|
707 -CALLBACK => sub {
|
|
708 my ( $sth, $dbh ) = @_;
|
|
709 $obj->{id} = $dbh->{mysql_insertid};
|
|
710 },
|
|
711 -PARAMS => [ $obj->name() ] );
|
|
712
|
|
713 This lets us access the statement handle & database handle to access other
|
|
714 properties such as the last identifier inserted.
|
|
715
|
|
716 =cut
|
|
717
|
|
718 sub execute_update {
|
|
719 my ($self, @args) = @_;
|
|
720 my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args);
|
|
721 my $rv = 0;
|
|
722 my $sth;
|
|
723 eval {
|
|
724 my @prepare_params;
|
|
725 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
|
|
726 $sth = $self->db_connection()->prepare($sql, @prepare_params);
|
|
727 $self->_bind_params($sth, $params);
|
|
728 $rv = $sth->execute();
|
|
729 $callback->($sth, $self->db_connection()->db_handle()) if $callback;
|
|
730 };
|
|
731 my $error = $@;
|
|
732 $self->_finish_sth($sth);
|
|
733 if($error) {
|
|
734 my $params = join ' ', map { (defined $_) ? $_ : q{undef} } @{$params};
|
|
735 throw("Cannot apply sql '${sql}' with params '${params}': ${error}");
|
|
736 }
|
|
737 return $rv;
|
|
738 }
|
|
739
|
|
740 =head2 execute_with_sth()
|
|
741
|
|
742 Arg [SQL] : string $sql
|
|
743 Arg [CALLBACK] : CodeRef The callback to use for working with the statement
|
|
744 handle once returned. This is B<not> a mapper.
|
|
745 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
|
|
746 Arg [PREPARE_PARAMS] : ArrayRef Used to pass parameters to the statement handle
|
|
747 prepare method
|
|
748 Description : A subrotuine which abstracts resource handling and statement
|
|
749 preparing leaving the developer to define how to handle
|
|
750 and process the statement.
|
|
751 Returntype : Anything you wish to return from the callback
|
|
752 Exceptions : If errors occur in the execution of the SQL
|
|
753 Status : Stable
|
|
754
|
|
755 my $meta_count = $helper->execute_with_sth(
|
|
756 -SQL => 'select count(*) from meta where species_id =?',
|
|
757 -PARAMS => [1],
|
|
758 -CALLBACK => sub {
|
|
759 my ($sth) = @_;
|
|
760 my $count;
|
|
761 $sth->bind_columns( \$count );
|
|
762 while ( $sth->fetch ) {
|
|
763 print $count, "\n";
|
|
764 }
|
|
765 return $count;
|
|
766 } );
|
|
767
|
|
768 Very similar to C<execute()> except this gives you full control over the
|
|
769 lifecycle of the statement handle & how you wish to proceed with working
|
|
770 with a statement handle. This is for situations where you believe going through
|
|
771 the mappers causes too much of a slow-down (since we have to execute a
|
|
772 subroutine for every row in order to map it correctly).
|
|
773
|
|
774 However please benchmark before adopting this method as it increases the
|
|
775 complexity of your code and the mapper slow down only becomes apparent when
|
|
776 working with very large numbers of rows.
|
|
777
|
|
778 =cut
|
|
779
|
|
780 sub execute_with_sth {
|
|
781 my ($self, @args) = @_;
|
|
782 my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args);
|
|
783 my $sth = $self->_base_execute( $sql, $params, $prepare_params, $callback );
|
|
784 my $result = eval {$callback->($sth)};
|
|
785 my $error = $@;
|
|
786 $self->_finish_sth($sth);
|
|
787 die $error if $error;
|
|
788 return $result;
|
|
789 }
|
|
790
|
|
791 =pod
|
|
792
|
|
793 =head2 batch()
|
|
794
|
|
795 Arg [SQL] : string $sql
|
|
796 Arg [CALLBACK] : CodeRef The callback to use for working with the statement
|
|
797 handle once returned; specify this or -DATA
|
|
798 Arg [DATA] : ArrayRef The data to insert; specify this or -CALLBACK
|
|
799 Arg [COMMIT_EVERY] : Integer defines the rate at which to issue commits to
|
|
800 the DB handle. This is important when working with
|
|
801 InnoDB databases since it affects the speed of rollback
|
|
802 (larger gaps inbetween commits means more to rollback).
|
|
803
|
|
804 Ignored if using the callback version.
|
|
805 Arg [PREPARE_PARAMS] : ArrayRef Used to pass parameters to the statement handle
|
|
806 prepare method
|
|
807 Returntype : integer rows updated
|
|
808 Exceptions : If errors occur in the execution of the SQL
|
|
809 Status : Stable
|
|
810
|
|
811 my $alotofdata = getitfromsomewhere();
|
|
812 $helper->batch(
|
|
813 -SQL => 'insert into table (one,two) values(?,?)',
|
|
814 -CALLBACk => sub {
|
|
815 my ( $sth, $dbc ) = @_;
|
|
816 foreach my $data (@alotofdata) {
|
|
817 $sth->execute( @{$data} );
|
|
818 }
|
|
819 } );
|
|
820
|
|
821 #Or for a 2D array data driven approach
|
|
822 $helper->batch( -SQL => 'insert into table (one,two) values(?,?)',
|
|
823 -DATA => $alotofdata );
|
|
824
|
|
825 Takes in a sql statement & a code reference. Your SQL is converted into a
|
|
826 prepared statement & then given as the first parameter to the closure. The
|
|
827 second parameter is the DBH which created the statement. This is intended
|
|
828 to let you do mass insertion into a database without the need to
|
|
829 re-preparing the same statement.
|
|
830
|
|
831 This can be combined with the transaction() code to provide a construct
|
|
832 which does batch insertion & is transactionally aware.
|
|
833
|
|
834 We can also use data based batch insertions i.e.
|
|
835
|
|
836 #Needs to be like:
|
|
837 # [ [1,2], [3,4] ]
|
|
838 #Or if using the DBI types:
|
|
839 # [ [ [ 1, SQL_INTEGER ], [ 2, SQL_INTEGER ] ],
|
|
840 # [ [ 3, SQL_INTEGER ], [ 4, SQL_INTEGER ] ] ];
|
|
841
|
|
842 my $alotofdata = getitfromsomewhere();
|
|
843 $helper->batch( -SQL => 'insert into table (one,two) values(?,?)',
|
|
844 -DATA => $alotofdata );
|
|
845
|
|
846 This does exactly what the previous example.
|
|
847
|
|
848 All batch statements will return the value the callback computes. If you are
|
|
849 using the previous example with a data array then the code will return the
|
|
850 number affected rows by the query.
|
|
851
|
|
852 =cut
|
|
853
|
|
854 sub batch {
|
|
855 my ($self, @args) = @_;
|
|
856 my ($sql, $callback, $data, $commit_every, $prepare_params) =
|
|
857 rearrange([qw(sql callback data commit_every prepare_params)], @args);
|
|
858
|
|
859 if(! defined $callback && ! defined $data) {
|
|
860 throw('You need to define a callback for insertion work or the 2D data array');
|
|
861 }
|
|
862
|
|
863 my $result;
|
|
864 if(defined $callback) {
|
|
865 $result = $self->_callback_batch($sql, $callback, $prepare_params);
|
|
866 }
|
|
867 else {
|
|
868 $result = $self->_data_batch($sql, $data, $commit_every, $prepare_params);
|
|
869 }
|
|
870 return $result if defined $result;
|
|
871 return;
|
|
872 }
|
|
873
|
|
874 #------- Internal methods
|
|
875
|
|
876 my $default_mappers = {
|
|
877 first_element => sub {
|
|
878 my ($row) = @_;
|
|
879 return $row->[0];
|
|
880 },
|
|
881 second_element => sub {
|
|
882 my ($row) = @_;
|
|
883 return $row->[1];
|
|
884 },
|
|
885 array_ref => sub {
|
|
886 my $row = shift @_;
|
|
887 return [@{$row}];
|
|
888 }
|
|
889 };
|
|
890
|
|
891 sub _mappers {
|
|
892 my ($self) = @_;
|
|
893 return $default_mappers;
|
|
894 }
|
|
895
|
|
896 sub _perform_transaction_code {
|
|
897 my ($self) = @_;
|
|
898 return $self->{_transaction_active}->{$PROCESS_ID} ? 0 : 1;
|
|
899 }
|
|
900
|
|
901 sub _enable_transaction {
|
|
902 my ($self) = @_;
|
|
903 my $dbc = $self->db_connection();
|
|
904 my $original_dwi = $dbc->disconnect_when_inactive();
|
|
905 $dbc->disconnect_when_inactive(0);
|
|
906 my $ac = $dbc->db_handle()->{'AutoCommit'};
|
|
907 $dbc->db_handle()->{'AutoCommit'} = 0;
|
|
908 $self->{_transaction_active}->{$PROCESS_ID} = 1;
|
|
909 return ($original_dwi, $ac);
|
|
910 }
|
|
911
|
|
912 sub _disable_transaction {
|
|
913 my ($self, $original_dwi, $ac) = @_;
|
|
914 my $dbc = $self->db_connection();
|
|
915 $dbc->db_handle()->{'AutoCommit'} = $ac;
|
|
916 $dbc->disconnect_when_inactive($original_dwi);
|
|
917 delete $self->{_transaction_active}->{$PROCESS_ID};
|
|
918 return;
|
|
919 }
|
|
920
|
|
921 sub _bind_params {
|
|
922 my ( $self, $sth, $params ) = @_;
|
|
923
|
|
924 return if ! defined $params; #Return quickly if we had no data
|
|
925
|
|
926 if(! check_ref($params, 'ARRAY')) {
|
|
927 throw(qq{The given parameters reference '${params}' is not an ARRAY; wrap in an ArrayRef});
|
|
928 }
|
|
929
|
|
930 my $count = 1;
|
|
931 foreach my $param (@{$params}) {
|
|
932 if ( check_ref($param, 'ARRAY') ) {
|
|
933 $sth->bind_param( $count, @{$param} );
|
|
934 }
|
|
935 else {
|
|
936 $sth->bind_param( $count, $param );
|
|
937 }
|
|
938 $count++;
|
|
939 }
|
|
940 return;
|
|
941 }
|
|
942
|
|
943 sub _execute {
|
|
944 my ( $self, $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator ) = @_;
|
|
945
|
|
946 throw('Not given a mapper. _execute() must always been given a CodeRef') unless check_ref($callback, 'CODE');
|
|
947
|
|
948 my $sth = $self->_base_execute($sql, $params, $prepare_params);
|
|
949
|
|
950 my $sth_processor;
|
|
951 if($use_hashrefs) {
|
|
952 $sth_processor = sub {
|
|
953 while( my $row = $sth->fetchrow_hashref() ) {
|
|
954 my $v = $callback->($row, $sth);
|
|
955 return $v if $has_return;
|
|
956 }
|
|
957 $self->_finish_sth($sth);
|
|
958 return undef;
|
|
959 };
|
|
960 }
|
|
961 else {
|
|
962 $sth_processor = sub {
|
|
963 while( my $row = $sth->fetchrow_arrayref() ) {
|
|
964 my $v = $callback->($row, $sth);
|
|
965 return $v if $has_return;
|
|
966 }
|
|
967 $self->_finish_sth($sth);
|
|
968 return undef;
|
|
969 };
|
|
970 }
|
|
971
|
|
972 my $iter = Bio::EnsEMBL::Utils::Iterator->new($sth_processor);
|
|
973 if($has_return) {
|
|
974 return $iter if $iterator;
|
|
975 return $iter->to_arrayref();
|
|
976 }
|
|
977 else {
|
|
978 #Force iteration if we had no return since the caller is expecting this
|
|
979 $iter->each(sub {});
|
|
980 }
|
|
981 return;
|
|
982 }
|
|
983
|
|
984 sub _base_execute {
|
|
985 my ( $self, $sql, $params, $prepare_params) = @_;
|
|
986
|
|
987 $params = [] unless $params;
|
|
988
|
|
989 my $conn = $self->db_connection;
|
|
990
|
|
991 my $sth;
|
|
992 eval {
|
|
993 my @prepare_params;
|
|
994 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
|
|
995 $sth = $conn->prepare($sql, @prepare_params);
|
|
996 throw("Cannot continue as prepare() did not return a handle with prepare params '@prepare_params'")
|
|
997 unless $sth;
|
|
998 $self->_bind_params( $sth, $params );
|
|
999 $sth->execute();
|
|
1000 };
|
|
1001
|
|
1002 my $error = $@;
|
|
1003 if($error) {
|
|
1004 throw("Cannot run '${sql}' with params '@{$params}' due to error: $error") if $error;
|
|
1005 }
|
|
1006
|
|
1007 return $sth;
|
|
1008 }
|
|
1009
|
|
1010 sub _finish_sth {
|
|
1011 my ($self, $sth) = @_;
|
|
1012 eval { $sth->finish() if defined $sth; };
|
|
1013 warn('Cannot finish() the statement handle: $@') if $@;
|
|
1014 return;
|
|
1015 }
|
|
1016
|
|
1017 sub _callback_batch {
|
|
1018 my ($self, $sql, $callback, $prepare_params) = @_;
|
|
1019 my $error;
|
|
1020 my $sth;
|
|
1021 my $closure_return;
|
|
1022 eval {
|
|
1023 my @prepare_params;
|
|
1024 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
|
|
1025 $sth = $self->db_connection()->prepare($sql, @prepare_params);
|
|
1026 $closure_return = $callback->($sth, $self->db_connection());
|
|
1027 };
|
|
1028 $error = $@;
|
|
1029 $self->_finish_sth($sth);
|
|
1030 throw("Problem detected during batch work: $error") if $error;
|
|
1031
|
|
1032 return $closure_return if defined $closure_return;
|
|
1033 return;
|
|
1034 }
|
|
1035
|
|
1036 sub _data_batch {
|
|
1037 my ($self, $sql, $data, $commit_every, $prepare_params) = @_;
|
|
1038
|
|
1039 #Input checks
|
|
1040 assert_ref($data, 'ARRAY', '-DATA');
|
|
1041 my $data_length = scalar(@{$data});
|
|
1042 return 0 unless $data_length > 0;
|
|
1043 my $first_row = $data->[0];
|
|
1044 throw('I expect to work with a 2D ArrayRef but this is not one') unless check_ref($first_row, 'ARRAY');
|
|
1045
|
|
1046 my $callback = sub {
|
|
1047 my ($sth, $dbc) = @_;
|
|
1048 my $total_affected = 0;
|
|
1049 #Iterate over each data point
|
|
1050 for(my $data_index = 0; $data_index < $data_length; $data_index++) {
|
|
1051 my $row = $data->[$data_index];
|
|
1052 $self->_bind_params($sth, $row);
|
|
1053 my $affected = eval {$sth->execute()};
|
|
1054 if($@) {
|
|
1055 throw("Problem working with $sql with params @{$row}: $@");
|
|
1056 }
|
|
1057 my $num_affected = ($affected) ? $affected : 0; #Get around DBI's 0E0
|
|
1058 $total_affected += $num_affected;
|
|
1059
|
|
1060 #Lets us do a commit once every x rows apart from 0. We also finish
|
|
1061 #off with a commit if the code told us we were doing it
|
|
1062 if($commit_every) {
|
|
1063 if( ($data_index % $commit_every == 0) && $data_index != 0) {
|
|
1064 $dbc->db_handle()->commit();
|
|
1065 }
|
|
1066 }
|
|
1067 }
|
|
1068
|
|
1069 #finish off with a commit if the code told us we were doing it
|
|
1070 if($commit_every) {
|
|
1071 $dbc->db_handle()->commit();
|
|
1072 }
|
|
1073
|
|
1074 return $total_affected || 0;
|
|
1075 };
|
|
1076
|
|
1077 return $self->_callback_batch($sql, $callback, $prepare_params)
|
|
1078 }
|
|
1079
|
|
1080 1;
|