comparison variant_effect_predictor/Bio/EnsEMBL/Utils/SqlHelper.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Utils::SqlHelper
24
25 =head1 VERSION
26
27 $Revision: 1.25 $
28
29 =head1 SYNOPSIS
30
31 use Bio::EnsEMBL::Utils::SqlHelper;
32
33 my $helper =
34 Bio::EnsEMBL::Utils::SqlHelper->new( -DB_CONNECTION => $dbc );
35
36 my $arr_ref = $helper->execute(
37 -SQL => 'select name, age from tab where col =?',
38 -CALLBACK => sub {
39 my @row = @{ shift @_ };
40 return { name => $row[0], age => $row[1] };
41 },
42 -PARAMS => ['A'] );
43
44 use Data::Dumper;
45 print Dumper($arr_ref), "\n";
46 # Prints out [name=>'name', age=>1] maybe ....
47
48
49 # For transactional work; only works if your MySQL table
50 # engine/database supports transactional work (such as InnoDB)
51
52 $helper->transaction(
53 -CALLBACK => sub {
54 if ( $helper->execute_single_result(
55 -SQL => 'select count(*) from tab'
56 ) )
57 {
58 return $helper->execute_update('delete from tab');
59 } else {
60 return
61 $helper->batch( -SQL => 'insert into tab (?,?)',
62 -DATA => [ [ 1, 2 ], [ 1, 3 ], [ 1, 4 ] ] );
63 }
64 } );
65
66 =head1 DESCRIPTION
67
68 Easier database interaction
69
70 =head1 METHODS
71
72 See subrotuines.
73
74 =cut
75
76 package Bio::EnsEMBL::Utils::SqlHelper;
77
78 use warnings;
79 use strict;
80
81 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
82 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
83 use Bio::EnsEMBL::Utils::Exception qw(throw);
84 use Bio::EnsEMBL::Utils::Iterator;
85 use English qw( -no_match_vars ); #Used for $PROCESS_ID
86 use Scalar::Util qw(weaken); #Used to not hold a strong ref to DBConnection
87
88 =pod
89
90 =head2 new()
91
92 Arg [DB_CONNECTION] : Bio::EnsEMBL::DBSQL::DBConnection $db_connection
93 Returntype : Instance of helper
94 Exceptions : If the object given as a DBConnection is not one or it
95 was undefined
96 Status : Stable
97
98 Creates a new instance of this object.
99
100 my $dba = get_dba('mydb'); # New DBAdaptor from somewhere
101 my $helper = Bio::EnsEMBL::Utils::SqlHelper->new(
102 -DB_CONNECTION => $dba->dbc() );
103
104 $helper->execute_update( -SQL => 'update tab set flag=?',
105 -PARAMS => [1] );
106
107 =cut
108
109 sub new {
110 my ( $class, @args ) = @_;
111
112 my ($db_connection) = rearrange([qw(db_connection)], @args);
113
114 my $self = bless( {}, ref($class) || $class );
115 throw('-DB_CONNECTION construction parameter was undefined.')
116 unless defined $db_connection;
117 $self->db_connection($db_connection);
118
119 return $self;
120 }
121
122 =pod
123
124 =head2 db_connection()
125
126 Arg [1] : Bio::EnsEMBL::DBSQL::DBConnection $db_connection
127 Description : Sets and retrieves the DBConnection
128 Returntype : Bio::EnsEMBL::DBSQL::DBConnection
129 Exceptions : If the object given as a DBConnection is not one or if an
130 attempt is made to set the value more than once
131 Status : Stable
132
133 =cut
134
135 sub db_connection {
136 my ($self, $db_connection) = @_;
137 if(defined $db_connection) {
138 if(exists $self->{db_connection}) {
139 throw('Cannot reset the DBConnection object; already defined ');
140 }
141 assert_ref($db_connection, 'Bio::EnsEMBL::DBSQL::DBConnection', 'db_connection');
142 $self->{db_connection} = $db_connection;
143 weaken $self->{db_connection};
144 }
145 return $self->{db_connection};
146 }
147
148 # --------- SQL Methods
149
150 =pod
151
152 =head2 execute() - Execute a SQL statement with a custom row handler
153
154 Arg [SQL] : string SQL to execute
155 Arg [CALLBACK] : CodeRef; The callback to use for mapping a row to a data
156 point; leave blank for a default mapping to a 2D array
157 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned
158 to the callback & not ArrayRefs
159 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
160 Arg [PREPARE_PARAMS] : boolean Parameters to be passed onto the Statement Handle
161 prepare call
162 Arg [ITERATOR] : boolean Request a L<Bio::EnsEMBL::Utils::Iterator>
163 rather than a 2D array
164 Returntype : ArrayRef/L<Bio::EnsEMBL::Utils::Iterator>
165 Exceptions : If errors occur in the execution of the SQL
166 Status : Stable
167
168 my $arr_ref = $helper->execute(
169 -SQL => 'select a,b,c from tab where col =?',
170 -CALLBACK => sub {
171 my @row = @{ shift @_ };
172 return { A => $row[0], B => $row[1], C => $row[2] };
173 },
174 -PARAMS => ['A'] );
175
176 #Or with hashrefs
177 my $arr_ref = $helper->execute(
178 -SQL => 'select a,b,c from tab where col =?',
179 -USE_HASHREFS => 1,
180 -CALLBACK => sub {
181 my $row = shift @_;
182 return { A => $row->{a}, B => $row->{b}, C => $row->{c} };
183 },
184 -PARAMS => ['A'] );
185
186 Uses a callback defined by the C<sub> decalaration. Here we specify how
187 the calling code will deal with each row of a database's result set. The
188 sub can return any type of Object/hash/data structure you require.
189
190 Should you not specify a callback then a basic one will be assigned to
191 you which will return a 2D array structure e.g.
192
193 my $arr_ref = $helper->execute(
194 -SQL => 'select a,b,c from tab where col =?',
195 -PARAMS => ['A'] );
196
197 This is equivalent to DBI's c<selectall_arrayref()> subroutine.
198
199 As an extension to this method you can write a closure subroutine which
200 takes in two parameters. The first is the array/hash reference & the
201 second is the statement handle used to execute. 99% of the time you will
202 not need it but there are occasions where you do need it. An example of
203 usage would be:
204
205 my $conn = get_conn(); #From somwewhere
206 my $arr_ref = $conn->execute(
207 -SQL => 'select a,b,c from tab where col =?',
208 -USE_HASHREFS => 1,
209 -CALLBACK => sub {
210 my ( $row, $sth ) = @_;
211 #Then do something with sth
212 return { A => $row->[0], B => $row->[1], C => $row->[2] };
213 },
214 -PARAMS => ['A'] );
215
216 Any arguments to bind to the incoming statement. This can be a set of scalars
217 or a 2D array if you need to specify any kind of types of sql objects i.e.
218
219 use DBI qw(:sql_types);
220
221 my $conn = get_conn();
222 my $arr_ref = $conn->execute(
223 -SQL =>
224 'select a,b,c from tab where col =? and num_col=? and other=?',
225 -USE_HASHREFS => 1,
226 -CALLBACK => sub {
227 my @row = @{ shift @_ };
228 return { A => $row[0], B => $row[1], C => $row[2] };
229 },
230 -PARAMS => [ '1', SQL_VARCHAR ],
231 [ 2, SQL_INTEGER ],
232 'hello' );
233
234 Here we import DBI's sql types into our package and then pass in
235 multiple anonymous array references as parameters. Each param is
236 tested in the input and if it is detected to be an ARRAY reference we
237 dereference the array and run DBI's bind_param method. In fact you can
238 see each part of the incoming paramaters array as the contents to call
239 C<bind_param> with. The only difference is the package tracks the bind
240 position for you.
241
242 We can get back a L<Bio::EnsEMBL::Utils::Iterator> object which can be used
243 to iterate over the results set without first materializing the data into
244 memory. An example would be:
245
246 my $iterator = $helper->execute(
247 -SQL => 'select a,b,c from tab where col =?',
248 -PARAMS => ['A']
249 -ITERATOR => 1);
250 while($iterator->has_next()) {
251 my $row = $iterator->next();
252 #Do something
253 }
254
255 This is very useful for very large datasets.
256
257 =cut
258
259 sub execute {
260 my ( $self, @args ) = @_;
261 my ($sql, $callback, $use_hashrefs, $params, $prepare_params, $iterator) =
262 rearrange([qw(sql callback use_hashrefs params prepare_params iterator)], @args);
263 my $has_return = 1;
264
265 #If no callback then we execute using a default one which returns a 2D array
266 if(!defined $callback) {
267 throw('Cannot use fetchrow_hashref() with default mappers. Turn off this option') if $use_hashrefs;
268 $callback = $self->_mappers()->{array_ref};
269 }
270
271 return $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator );
272 }
273
274 =pod
275
276 =head2 execute_simple()
277
278 Arg [SQL] : string $sql
279 Arg [PARAMS] : ArrayRef $params
280 Arg [CALLBACK] : CodeRef $callback
281 Returntype : ArrayRef of 1D elements
282 Exceptions : If errors occur in the execution of the SQL
283 Status : Stable
284
285 my $classification =
286 $helper->execute_simple(
287 -SQL =>
288 'select meta_val from meta where meta_key =? order by meta_id',
289 -PARAMS => ['species.classification'] );
290
291 Identical to C<execute> except you do not specify a sub-routine reference.
292 Using this code assumes you want an array of single scalar values as returned
293 by the given SQL statement.
294
295 =cut
296
297 sub execute_simple {
298 my ( $self, @args ) = @_;
299 my ($sql, $params, $callback) = rearrange([qw(sql params callback)], @args);
300 my $has_return = 1;
301 my $use_hashrefs = 0;
302 $callback ||= $self->_mappers()->{first_element};
303 return $self->_execute($sql, $callback, $has_return, $use_hashrefs, $params);
304 }
305
306 =pod
307
308 =head2 execute_no_return()
309
310 Arg [SQL] : string sql
311 Arg [CALLBACK] : CodeRef The callback to use for mapping a row to a data point;
312 we assume you are assigning into a data structure which
313 has requirements other than simple translation into an
314 array
315 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned
316 to the callback & not ArrayRefs
317 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
318 Returntype : None
319 Exceptions : If errors occur in the execution of the SQL
320 Status : Stable
321
322 Whilst all other execute methods will return something; this assumes that the
323 given mapper subroutine will be performing the business of placing values
324 somewhere or doing something with them.
325
326 There is a huge temptation to nest queries using this method; do not! Execute
327 the values into an array using one of the other methods then run your subqueries
328 on them; or make a better first query. SQL is flexible; so use it.
329
330 =cut
331
332 sub execute_no_return {
333 my ( $self, @args ) = @_;
334 my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args);
335 throw('No callback defined but this is a required parameter for execute_no_return()') if ! $callback;
336 my $has_return = 0;
337 my $prepare_params = [];
338 $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params);
339 return;
340 }
341
342 =pod
343
344 =head2 execute_into_hash()
345
346 Arg [SQL] : string $sql
347 Arg [CALLBACK] : CodeRef The callback to use for mapping to a value in a hash
348 keyed by the first element in your result set;
349 leave blank for a default mapping to a scalar value
350 of the second element
351 Arg [PARAMS] : The binding parameters to the SQL statement
352 Returntype : HashRef keyed by column 1 & value is the return of callback
353 Exceptions : If errors occur in the execution of the SQL
354 Status : Stable
355
356 A variant of the execute methods but rather than returning a list of
357 mapped results this will assume the first column of a returning map &
358 the calling subroutine will map the remainder of your return as the
359 hash's key.
360
361 B<This code can handle simple queries to hashes, complex value mappings
362 and repeated mappings for the same key>.
363
364 For example:
365
366 my $sql = 'select key, one, two from table where something =?';
367 my $mapper = sub {
368 my ( $row, $value ) = @_;
369 #Ignore field 0 as that is being used for the key
370 my $obj = Some::Obj->new( one => $row->[1], two => $row->[2] );
371 return $obj;
372 };
373
374 my $hash =
375 $helper->execute_into_hash( -SQL => $sql,
376 -CALLBACK => $mapper,
377 -PARAMS => ['val'] );
378
379 #Or for a more simple usage
380 my $sql = 'select biotype, count(gene_id) from gene group by biotype';
381 my $biotype_hash = $conn->execute_into_hash( -SQL => $sql );
382 print $biotype_hash->{protein_coding} || 0, "\n";
383
384 The basic pattern assumes a scenario where you are mapping in a one
385 key to one value. For more advanced mapping techniques you can use the
386 second value passed to the subroutine paramater set. This is shown as
387 C<$value> in the above examples. This value is what is found in the HASH
388 being populated in the background. So on the first time you encounter it
389 for the given key it will be undefined. For future invocations it will
390 be set to the value you gave it. This allows us to setup code like the
391 following
392
393 my %args = ( -SQL => 'select meta_key, meta_value from meta '
394 . 'where meta_key =? order by meta_id',
395 -PARAMS => ['species.classification'] );
396
397 my $hash = $helper->execute_into_hash(
398 %args,
399 -CALLBACK => sub {
400 my ( $row, $value ) = @_;
401 $value = [] if !defined $value;
402 push( @{$value}, $row->[1] );
403 return $value;
404 } );
405
406 #OR
407
408 $hash = $helper->execute_into_hash(
409 %args,
410 -CALLBACK => sub {
411 my ( $row, $value ) = @_;
412 if ( defined $value ) {
413 push( @{$value}, $row->[1] );
414 return;
415 }
416 my $new_value = [ $row->[1] ];
417 return $new_value;
418 } );
419
420 The code understands that returning a defined value means to push this
421 value into the background hash. In example one we keep on re-inserting
422 the Array of classifications into the hash. Example two shows an early
423 return from the callback which indicates to the code we do not have any
424 value to re-insert into the hash. Of the two methods example one is
425 clearer but is possibliy slower.
426
427 B<Remember that the row you are given is the full row & not a view of
428 the reminaing fields.> Therefore indexing for the data you are concerned
429 with begins at position 1.
430
431 =cut
432
433 sub execute_into_hash {
434 my ( $self, @args ) = @_;
435 my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args);
436 my $hash = {};
437
438 #If no callback then we execute using a default one which sets value to 2nd element
439 if(!defined $callback) {
440 $callback = $self->_mappers()->{second_element};
441 }
442
443 #Default mapper uses the 1st key + something else from the mapper
444 my $mapper = sub {
445 my $row = shift @_;
446 my $key = $row->[0];
447 my $value = $hash->{$key};
448 my $new_value = $callback->($row, $value);
449 if(defined $new_value) {
450 $hash->{ $key } = $new_value;
451 }
452 return;
453 };
454
455 $self->execute_no_return(
456 -SQL => $sql,
457 -CALLBACK => $mapper,
458 -PARAMS => $params
459 );
460
461 return $hash;
462 }
463
464 =pod
465
466 =head2 execute_single_result()
467
468 Arg [SQL] : string $sql
469 Arg [CALLBACK] : CodeRef The callback to use for mapping a row to a data point;
470 leave blank for a default scalar mapping
471 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned
472 to the callback & not ArrayRefs
473 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
474 Returntype : Scalar
475 Exceptions : If errors occur in the execution of the SQL, if the query
476 returned more than 1 row and if we found no rows.
477 Status : Stable
478
479 my $meta_count =
480 $helper->execute_single_result(
481 -SQL => 'select count(*) from meta where species_id =?',
482 -PARAMS => [1] );
483
484 Very similar to C<execute()> except it will raise an exception if we have more
485 or less than one row returned
486
487 =cut
488
489 sub execute_single_result {
490 my ( $self, @args ) = @_;
491 my ($sql, $callback, $use_hashrefs, $params) = rearrange(
492 [qw(sql callback use_hashrefs params)], @args);
493
494 my $results = $self->execute_simple(
495 -SQL => $sql,
496 -CALLBACK => $callback,
497 -USE_HASHREFS => $use_hashrefs,
498 -PARAMS => $params
499 );
500
501 my $result_count = scalar(@{$results});
502 if($result_count != 1) {
503 $params = [] if ! $params;
504 my $type = ($result_count == 0) ? 'No' : 'Too many';
505 my $msg = "${type} results returned. Expected 1 but got $result_count for query '${sql}' with params [";
506 $msg .= join( ',', map {(defined $_) ? $_ : '-undef-';} @{$params} );
507 $msg .= ']';
508 throw($msg);
509 }
510 return $results->[0];
511 }
512
513 =pod
514
515 =head2 transaction()
516
517 Arg [CALLBACK] : CodeRef The callback used for transaction isolation; once
518 the subroutine exists the code will decide on rollback
519 or commit. Required
520 Arg [RETRY] : integer the number of retries to attempt with this
521 transactional block. Defaults to 0.
522 Arg [PAUSE] : integer the time in seconds to pause in-between retries.
523 Defaults to 1.
524 Arg [CONDITION] : CodeRef allows you to inspect the exception raised
525 and should your callback return true then the
526 retry will be attempted. If not given then all
527 exceptions mean attempt a retry (if specified)
528 Returntype : Return of the callback
529 Exceptions : If errors occur in the execution of the SQL
530 Status : Stable
531
532 my $val = $helper->transaction(
533 -CALLBACK => sub {
534 my ($dbc) = @_;
535 #Do something
536 return 1;
537 } );
538
539 #Or because of the arguments method we use
540 my $val = $helper->transaction(
541 sub {
542 my ($dbc) = @_;
543 #Do something
544 return 1;
545 } );
546
547 Creates a transactional block which will ensure that the connection is
548 committed when your submmited subroutine has finished or will rollback
549 in the event of an error occuring in your block.
550
551 The code will always force AutoCommit off but will restore it to its
552 previous setting. If your DBI/DBD driver does not support manual
553 commits then this code will break. The code will turn off the
554 C<disconnect_when_idle()> method to allow transactions to work as
555 expected.
556
557 An effect of using REPEATABLE READ transaction isolation (InnoDB's
558 default) is that your data is as fresh as when you started your current
559 transaction. To ensure the freshest data use C<SELECT ... from ... LOCK
560 IN SHARE MODE> or C<SELECT ... from ... LOCK FOR UPDATE> if you are
561 going to issue updates.
562
563 Creating a transaction within a transaction results in the commit
564 rollback statements occuring in the top level transaction. That way any
565 block of code which is meant to to be transaction can be wrapped in
566 this block ( assuming the same instance of SQLHelper is passed around &
567 used).
568
569 You can also request the retry of a transactional block of code which is
570 causing problems. This is not a perfect solution as it indicates your
571 programming model is broken. This mode can be specified as such:
572
573 my $val = $helper->transaction(
574 -RETRY => 3, -PAUSE => 2,
575 -CALLBACK => sub {
576 my ($dbc) = @_;
577 #Do something
578 return 1;
579 } );
580
581 The C<-RETRY> argument indicates the number of times we attempt the transaction
582 and C<-PAUSE> indicates the time in-between attempts. These retries will
583 only occur in the root transaction block i.e. you cannot influence the
584 retry system in a sub transaction. You can influence if the retry is done with
585 the C<-CONDITION> argument which accepts a Code reference (same as the
586 C<-CALLBACK> parameter). This allows you to inspect the error thrown to
587 retry only in some situations e.g.
588
589 my $val = $helper->transaction(
590 -RETRY => 3, -PAUSE => 2,
591 -CALLBACK => sub {
592 my ($dbc) = @_;
593 #Do something
594 return 1;
595 },
596 -CONDITION => sub {
597 my ($error) = @_;
598 return ( $error =~ /deadlock/ ) ? 1 : 0;
599 }
600 );
601
602 Here we attempt a transaction and will B<only> retry when we have an error
603 with the phrase deadlock.
604
605 =cut
606
607 sub transaction {
608 my ($self, @args) = @_;
609
610 my ($callback, $retry, $pause, $condition) = rearrange([qw(callback retry pause condition)], @args);
611
612 throw('-CALLBACK was not a CodeRef. Got a reference of type ['.ref($callback).']. Check your parameters')
613 unless check_ref($callback, 'CODE');
614
615 #Setup defaults
616 $retry = 0 unless defined $retry;
617 $pause = 1 unless defined $pause;
618 if(! defined $condition) {
619 $condition = sub {
620 return 1;
621 };
622 }
623
624 assert_ref($condition, 'CODE', '-CONDITION');
625
626 my $dbc = $self->db_connection();
627 my $original_dwi;
628 my $ac;
629
630 my $error;
631 my $result;
632
633 #If we were already in a transaction then we do not do any management of the
634 #session & wait for the parent transaction(s) to finish
635 my $perform_transaction = $self->_perform_transaction_code();
636 if($perform_transaction) {
637 ($original_dwi, $ac) = $self->_enable_transaction();
638 }
639 else {
640 #If we were in a transaction then ignore any attempts at retry here
641 $retry = 0;
642 }
643
644 for(my $iteration = 0; $iteration <= $retry; $iteration++) {
645 eval {
646 $result = $callback->($dbc);
647 $dbc->db_handle()->commit() if $perform_transaction;
648 };
649 $error = $@;
650 #If we were allowed to deal with the error then we apply rollbacks & then
651 #retry or leave to the remainder of the code to throw
652 if($perform_transaction && $error) {
653 eval { $dbc->db_handle()->rollback(); };
654 #If we were not on our last iteration then warn & allow the retry
655 if($iteration != $retry) {
656 if($condition->($error)) {
657 warn("Encountered error on attempt ${iteration} of ${retry} and have issued a rollback. Will retry after sleeping for $pause second(s): $error");
658 sleep $pause;
659 }
660 else {
661 last; #break early if condition of error was not matched
662 }
663 }
664 }
665
666 #Always break the loop if we had a successful attempt
667 last if ! $error;
668 }
669
670 if($perform_transaction) {
671 $self->_disable_transaction($original_dwi, $ac);
672 }
673
674 throw("ABORT: Transaction aborted because of error: ${error}") if $error;
675
676 return $result;
677 }
678
679 =pod
680
681 =head2 execute_update()
682
683 Arg [SQL] : string $sql
684 Arg [CALLBACK] : CodeRef The callback to use for calling methods on the
685 DBI statement handle or DBConnection object after an
686 update command
687 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
688 Arg [PREPARE_PARAMS] : ArrayRef Parameters to bind to the prepare() StatementHandle call
689 Returntype : Number of rows affected
690 Exceptions : If errors occur in the execution of the SQL
691 Status : Stable
692
693 Used for performing updates but conforms to the normal execute statement
694 subroutines.
695
696 use DBI qw(:sql_types);
697 $helper->execute_update(-SQL => 'update tab set name = ? where id =?',
698 -PARAMS => [ 'andy', [ 1, SQL_INTEGER ] ] );
699
700 If you need to do something a bit more advanced with your DML then you can
701 give the method a closure and this will be called after the execute has been
702 issued i.e.
703
704 my $obj;
705 $helper->execute_update(
706 -SQL => 'insert into tab (name) values(?)',
707 -CALLBACK => sub {
708 my ( $sth, $dbh ) = @_;
709 $obj->{id} = $dbh->{mysql_insertid};
710 },
711 -PARAMS => [ $obj->name() ] );
712
713 This lets us access the statement handle & database handle to access other
714 properties such as the last identifier inserted.
715
716 =cut
717
718 sub execute_update {
719 my ($self, @args) = @_;
720 my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args);
721 my $rv = 0;
722 my $sth;
723 eval {
724 my @prepare_params;
725 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
726 $sth = $self->db_connection()->prepare($sql, @prepare_params);
727 $self->_bind_params($sth, $params);
728 $rv = $sth->execute();
729 $callback->($sth, $self->db_connection()->db_handle()) if $callback;
730 };
731 my $error = $@;
732 $self->_finish_sth($sth);
733 if($error) {
734 my $params = join ' ', map { (defined $_) ? $_ : q{undef} } @{$params};
735 throw("Cannot apply sql '${sql}' with params '${params}': ${error}");
736 }
737 return $rv;
738 }
739
740 =head2 execute_with_sth()
741
742 Arg [SQL] : string $sql
743 Arg [CALLBACK] : CodeRef The callback to use for working with the statement
744 handle once returned. This is B<not> a mapper.
745 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement
746 Arg [PREPARE_PARAMS] : ArrayRef Used to pass parameters to the statement handle
747 prepare method
748 Description : A subrotuine which abstracts resource handling and statement
749 preparing leaving the developer to define how to handle
750 and process the statement.
751 Returntype : Anything you wish to return from the callback
752 Exceptions : If errors occur in the execution of the SQL
753 Status : Stable
754
755 my $meta_count = $helper->execute_with_sth(
756 -SQL => 'select count(*) from meta where species_id =?',
757 -PARAMS => [1],
758 -CALLBACK => sub {
759 my ($sth) = @_;
760 my $count;
761 $sth->bind_columns( \$count );
762 while ( $sth->fetch ) {
763 print $count, "\n";
764 }
765 return $count;
766 } );
767
768 Very similar to C<execute()> except this gives you full control over the
769 lifecycle of the statement handle & how you wish to proceed with working
770 with a statement handle. This is for situations where you believe going through
771 the mappers causes too much of a slow-down (since we have to execute a
772 subroutine for every row in order to map it correctly).
773
774 However please benchmark before adopting this method as it increases the
775 complexity of your code and the mapper slow down only becomes apparent when
776 working with very large numbers of rows.
777
778 =cut
779
780 sub execute_with_sth {
781 my ($self, @args) = @_;
782 my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args);
783 my $sth = $self->_base_execute( $sql, $params, $prepare_params, $callback );
784 my $result = eval {$callback->($sth)};
785 my $error = $@;
786 $self->_finish_sth($sth);
787 die $error if $error;
788 return $result;
789 }
790
791 =pod
792
793 =head2 batch()
794
795 Arg [SQL] : string $sql
796 Arg [CALLBACK] : CodeRef The callback to use for working with the statement
797 handle once returned; specify this or -DATA
798 Arg [DATA] : ArrayRef The data to insert; specify this or -CALLBACK
799 Arg [COMMIT_EVERY] : Integer defines the rate at which to issue commits to
800 the DB handle. This is important when working with
801 InnoDB databases since it affects the speed of rollback
802 (larger gaps inbetween commits means more to rollback).
803
804 Ignored if using the callback version.
805 Arg [PREPARE_PARAMS] : ArrayRef Used to pass parameters to the statement handle
806 prepare method
807 Returntype : integer rows updated
808 Exceptions : If errors occur in the execution of the SQL
809 Status : Stable
810
811 my $alotofdata = getitfromsomewhere();
812 $helper->batch(
813 -SQL => 'insert into table (one,two) values(?,?)',
814 -CALLBACk => sub {
815 my ( $sth, $dbc ) = @_;
816 foreach my $data (@alotofdata) {
817 $sth->execute( @{$data} );
818 }
819 } );
820
821 #Or for a 2D array data driven approach
822 $helper->batch( -SQL => 'insert into table (one,two) values(?,?)',
823 -DATA => $alotofdata );
824
825 Takes in a sql statement & a code reference. Your SQL is converted into a
826 prepared statement & then given as the first parameter to the closure. The
827 second parameter is the DBH which created the statement. This is intended
828 to let you do mass insertion into a database without the need to
829 re-preparing the same statement.
830
831 This can be combined with the transaction() code to provide a construct
832 which does batch insertion & is transactionally aware.
833
834 We can also use data based batch insertions i.e.
835
836 #Needs to be like:
837 # [ [1,2], [3,4] ]
838 #Or if using the DBI types:
839 # [ [ [ 1, SQL_INTEGER ], [ 2, SQL_INTEGER ] ],
840 # [ [ 3, SQL_INTEGER ], [ 4, SQL_INTEGER ] ] ];
841
842 my $alotofdata = getitfromsomewhere();
843 $helper->batch( -SQL => 'insert into table (one,two) values(?,?)',
844 -DATA => $alotofdata );
845
846 This does exactly what the previous example.
847
848 All batch statements will return the value the callback computes. If you are
849 using the previous example with a data array then the code will return the
850 number affected rows by the query.
851
852 =cut
853
854 sub batch {
855 my ($self, @args) = @_;
856 my ($sql, $callback, $data, $commit_every, $prepare_params) =
857 rearrange([qw(sql callback data commit_every prepare_params)], @args);
858
859 if(! defined $callback && ! defined $data) {
860 throw('You need to define a callback for insertion work or the 2D data array');
861 }
862
863 my $result;
864 if(defined $callback) {
865 $result = $self->_callback_batch($sql, $callback, $prepare_params);
866 }
867 else {
868 $result = $self->_data_batch($sql, $data, $commit_every, $prepare_params);
869 }
870 return $result if defined $result;
871 return;
872 }
873
874 #------- Internal methods
875
876 my $default_mappers = {
877 first_element => sub {
878 my ($row) = @_;
879 return $row->[0];
880 },
881 second_element => sub {
882 my ($row) = @_;
883 return $row->[1];
884 },
885 array_ref => sub {
886 my $row = shift @_;
887 return [@{$row}];
888 }
889 };
890
891 sub _mappers {
892 my ($self) = @_;
893 return $default_mappers;
894 }
895
896 sub _perform_transaction_code {
897 my ($self) = @_;
898 return $self->{_transaction_active}->{$PROCESS_ID} ? 0 : 1;
899 }
900
901 sub _enable_transaction {
902 my ($self) = @_;
903 my $dbc = $self->db_connection();
904 my $original_dwi = $dbc->disconnect_when_inactive();
905 $dbc->disconnect_when_inactive(0);
906 my $ac = $dbc->db_handle()->{'AutoCommit'};
907 $dbc->db_handle()->{'AutoCommit'} = 0;
908 $self->{_transaction_active}->{$PROCESS_ID} = 1;
909 return ($original_dwi, $ac);
910 }
911
912 sub _disable_transaction {
913 my ($self, $original_dwi, $ac) = @_;
914 my $dbc = $self->db_connection();
915 $dbc->db_handle()->{'AutoCommit'} = $ac;
916 $dbc->disconnect_when_inactive($original_dwi);
917 delete $self->{_transaction_active}->{$PROCESS_ID};
918 return;
919 }
920
921 sub _bind_params {
922 my ( $self, $sth, $params ) = @_;
923
924 return if ! defined $params; #Return quickly if we had no data
925
926 if(! check_ref($params, 'ARRAY')) {
927 throw(qq{The given parameters reference '${params}' is not an ARRAY; wrap in an ArrayRef});
928 }
929
930 my $count = 1;
931 foreach my $param (@{$params}) {
932 if ( check_ref($param, 'ARRAY') ) {
933 $sth->bind_param( $count, @{$param} );
934 }
935 else {
936 $sth->bind_param( $count, $param );
937 }
938 $count++;
939 }
940 return;
941 }
942
943 sub _execute {
944 my ( $self, $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator ) = @_;
945
946 throw('Not given a mapper. _execute() must always been given a CodeRef') unless check_ref($callback, 'CODE');
947
948 my $sth = $self->_base_execute($sql, $params, $prepare_params);
949
950 my $sth_processor;
951 if($use_hashrefs) {
952 $sth_processor = sub {
953 while( my $row = $sth->fetchrow_hashref() ) {
954 my $v = $callback->($row, $sth);
955 return $v if $has_return;
956 }
957 $self->_finish_sth($sth);
958 return undef;
959 };
960 }
961 else {
962 $sth_processor = sub {
963 while( my $row = $sth->fetchrow_arrayref() ) {
964 my $v = $callback->($row, $sth);
965 return $v if $has_return;
966 }
967 $self->_finish_sth($sth);
968 return undef;
969 };
970 }
971
972 my $iter = Bio::EnsEMBL::Utils::Iterator->new($sth_processor);
973 if($has_return) {
974 return $iter if $iterator;
975 return $iter->to_arrayref();
976 }
977 else {
978 #Force iteration if we had no return since the caller is expecting this
979 $iter->each(sub {});
980 }
981 return;
982 }
983
984 sub _base_execute {
985 my ( $self, $sql, $params, $prepare_params) = @_;
986
987 $params = [] unless $params;
988
989 my $conn = $self->db_connection;
990
991 my $sth;
992 eval {
993 my @prepare_params;
994 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
995 $sth = $conn->prepare($sql, @prepare_params);
996 throw("Cannot continue as prepare() did not return a handle with prepare params '@prepare_params'")
997 unless $sth;
998 $self->_bind_params( $sth, $params );
999 $sth->execute();
1000 };
1001
1002 my $error = $@;
1003 if($error) {
1004 throw("Cannot run '${sql}' with params '@{$params}' due to error: $error") if $error;
1005 }
1006
1007 return $sth;
1008 }
1009
1010 sub _finish_sth {
1011 my ($self, $sth) = @_;
1012 eval { $sth->finish() if defined $sth; };
1013 warn('Cannot finish() the statement handle: $@') if $@;
1014 return;
1015 }
1016
1017 sub _callback_batch {
1018 my ($self, $sql, $callback, $prepare_params) = @_;
1019 my $error;
1020 my $sth;
1021 my $closure_return;
1022 eval {
1023 my @prepare_params;
1024 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY');
1025 $sth = $self->db_connection()->prepare($sql, @prepare_params);
1026 $closure_return = $callback->($sth, $self->db_connection());
1027 };
1028 $error = $@;
1029 $self->_finish_sth($sth);
1030 throw("Problem detected during batch work: $error") if $error;
1031
1032 return $closure_return if defined $closure_return;
1033 return;
1034 }
1035
1036 sub _data_batch {
1037 my ($self, $sql, $data, $commit_every, $prepare_params) = @_;
1038
1039 #Input checks
1040 assert_ref($data, 'ARRAY', '-DATA');
1041 my $data_length = scalar(@{$data});
1042 return 0 unless $data_length > 0;
1043 my $first_row = $data->[0];
1044 throw('I expect to work with a 2D ArrayRef but this is not one') unless check_ref($first_row, 'ARRAY');
1045
1046 my $callback = sub {
1047 my ($sth, $dbc) = @_;
1048 my $total_affected = 0;
1049 #Iterate over each data point
1050 for(my $data_index = 0; $data_index < $data_length; $data_index++) {
1051 my $row = $data->[$data_index];
1052 $self->_bind_params($sth, $row);
1053 my $affected = eval {$sth->execute()};
1054 if($@) {
1055 throw("Problem working with $sql with params @{$row}: $@");
1056 }
1057 my $num_affected = ($affected) ? $affected : 0; #Get around DBI's 0E0
1058 $total_affected += $num_affected;
1059
1060 #Lets us do a commit once every x rows apart from 0. We also finish
1061 #off with a commit if the code told us we were doing it
1062 if($commit_every) {
1063 if( ($data_index % $commit_every == 0) && $data_index != 0) {
1064 $dbc->db_handle()->commit();
1065 }
1066 }
1067 }
1068
1069 #finish off with a commit if the code told us we were doing it
1070 if($commit_every) {
1071 $dbc->db_handle()->commit();
1072 }
1073
1074 return $total_affected || 0;
1075 };
1076
1077 return $self->_callback_batch($sql, $callback, $prepare_params)
1078 }
1079
1080 1;