Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/SqlHelper.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Utils::SqlHelper | |
24 | |
25 =head1 VERSION | |
26 | |
27 $Revision: 1.25 $ | |
28 | |
29 =head1 SYNOPSIS | |
30 | |
31 use Bio::EnsEMBL::Utils::SqlHelper; | |
32 | |
33 my $helper = | |
34 Bio::EnsEMBL::Utils::SqlHelper->new( -DB_CONNECTION => $dbc ); | |
35 | |
36 my $arr_ref = $helper->execute( | |
37 -SQL => 'select name, age from tab where col =?', | |
38 -CALLBACK => sub { | |
39 my @row = @{ shift @_ }; | |
40 return { name => $row[0], age => $row[1] }; | |
41 }, | |
42 -PARAMS => ['A'] ); | |
43 | |
44 use Data::Dumper; | |
45 print Dumper($arr_ref), "\n"; | |
46 # Prints out [name=>'name', age=>1] maybe .... | |
47 | |
48 | |
49 # For transactional work; only works if your MySQL table | |
50 # engine/database supports transactional work (such as InnoDB) | |
51 | |
52 $helper->transaction( | |
53 -CALLBACK => sub { | |
54 if ( $helper->execute_single_result( | |
55 -SQL => 'select count(*) from tab' | |
56 ) ) | |
57 { | |
58 return $helper->execute_update('delete from tab'); | |
59 } else { | |
60 return | |
61 $helper->batch( -SQL => 'insert into tab (?,?)', | |
62 -DATA => [ [ 1, 2 ], [ 1, 3 ], [ 1, 4 ] ] ); | |
63 } | |
64 } ); | |
65 | |
66 =head1 DESCRIPTION | |
67 | |
68 Easier database interaction | |
69 | |
70 =head1 METHODS | |
71 | |
72 See subrotuines. | |
73 | |
74 =cut | |
75 | |
76 package Bio::EnsEMBL::Utils::SqlHelper; | |
77 | |
78 use warnings; | |
79 use strict; | |
80 | |
81 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
82 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref); | |
83 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
84 use Bio::EnsEMBL::Utils::Iterator; | |
85 use English qw( -no_match_vars ); #Used for $PROCESS_ID | |
86 use Scalar::Util qw(weaken); #Used to not hold a strong ref to DBConnection | |
87 | |
88 =pod | |
89 | |
90 =head2 new() | |
91 | |
92 Arg [DB_CONNECTION] : Bio::EnsEMBL::DBSQL::DBConnection $db_connection | |
93 Returntype : Instance of helper | |
94 Exceptions : If the object given as a DBConnection is not one or it | |
95 was undefined | |
96 Status : Stable | |
97 | |
98 Creates a new instance of this object. | |
99 | |
100 my $dba = get_dba('mydb'); # New DBAdaptor from somewhere | |
101 my $helper = Bio::EnsEMBL::Utils::SqlHelper->new( | |
102 -DB_CONNECTION => $dba->dbc() ); | |
103 | |
104 $helper->execute_update( -SQL => 'update tab set flag=?', | |
105 -PARAMS => [1] ); | |
106 | |
107 =cut | |
108 | |
109 sub new { | |
110 my ( $class, @args ) = @_; | |
111 | |
112 my ($db_connection) = rearrange([qw(db_connection)], @args); | |
113 | |
114 my $self = bless( {}, ref($class) || $class ); | |
115 throw('-DB_CONNECTION construction parameter was undefined.') | |
116 unless defined $db_connection; | |
117 $self->db_connection($db_connection); | |
118 | |
119 return $self; | |
120 } | |
121 | |
122 =pod | |
123 | |
124 =head2 db_connection() | |
125 | |
126 Arg [1] : Bio::EnsEMBL::DBSQL::DBConnection $db_connection | |
127 Description : Sets and retrieves the DBConnection | |
128 Returntype : Bio::EnsEMBL::DBSQL::DBConnection | |
129 Exceptions : If the object given as a DBConnection is not one or if an | |
130 attempt is made to set the value more than once | |
131 Status : Stable | |
132 | |
133 =cut | |
134 | |
135 sub db_connection { | |
136 my ($self, $db_connection) = @_; | |
137 if(defined $db_connection) { | |
138 if(exists $self->{db_connection}) { | |
139 throw('Cannot reset the DBConnection object; already defined '); | |
140 } | |
141 assert_ref($db_connection, 'Bio::EnsEMBL::DBSQL::DBConnection', 'db_connection'); | |
142 $self->{db_connection} = $db_connection; | |
143 weaken $self->{db_connection}; | |
144 } | |
145 return $self->{db_connection}; | |
146 } | |
147 | |
148 # --------- SQL Methods | |
149 | |
150 =pod | |
151 | |
152 =head2 execute() - Execute a SQL statement with a custom row handler | |
153 | |
154 Arg [SQL] : string SQL to execute | |
155 Arg [CALLBACK] : CodeRef; The callback to use for mapping a row to a data | |
156 point; leave blank for a default mapping to a 2D array | |
157 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned | |
158 to the callback & not ArrayRefs | |
159 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement | |
160 Arg [PREPARE_PARAMS] : boolean Parameters to be passed onto the Statement Handle | |
161 prepare call | |
162 Arg [ITERATOR] : boolean Request a L<Bio::EnsEMBL::Utils::Iterator> | |
163 rather than a 2D array | |
164 Returntype : ArrayRef/L<Bio::EnsEMBL::Utils::Iterator> | |
165 Exceptions : If errors occur in the execution of the SQL | |
166 Status : Stable | |
167 | |
168 my $arr_ref = $helper->execute( | |
169 -SQL => 'select a,b,c from tab where col =?', | |
170 -CALLBACK => sub { | |
171 my @row = @{ shift @_ }; | |
172 return { A => $row[0], B => $row[1], C => $row[2] }; | |
173 }, | |
174 -PARAMS => ['A'] ); | |
175 | |
176 #Or with hashrefs | |
177 my $arr_ref = $helper->execute( | |
178 -SQL => 'select a,b,c from tab where col =?', | |
179 -USE_HASHREFS => 1, | |
180 -CALLBACK => sub { | |
181 my $row = shift @_; | |
182 return { A => $row->{a}, B => $row->{b}, C => $row->{c} }; | |
183 }, | |
184 -PARAMS => ['A'] ); | |
185 | |
186 Uses a callback defined by the C<sub> decalaration. Here we specify how | |
187 the calling code will deal with each row of a database's result set. The | |
188 sub can return any type of Object/hash/data structure you require. | |
189 | |
190 Should you not specify a callback then a basic one will be assigned to | |
191 you which will return a 2D array structure e.g. | |
192 | |
193 my $arr_ref = $helper->execute( | |
194 -SQL => 'select a,b,c from tab where col =?', | |
195 -PARAMS => ['A'] ); | |
196 | |
197 This is equivalent to DBI's c<selectall_arrayref()> subroutine. | |
198 | |
199 As an extension to this method you can write a closure subroutine which | |
200 takes in two parameters. The first is the array/hash reference & the | |
201 second is the statement handle used to execute. 99% of the time you will | |
202 not need it but there are occasions where you do need it. An example of | |
203 usage would be: | |
204 | |
205 my $conn = get_conn(); #From somwewhere | |
206 my $arr_ref = $conn->execute( | |
207 -SQL => 'select a,b,c from tab where col =?', | |
208 -USE_HASHREFS => 1, | |
209 -CALLBACK => sub { | |
210 my ( $row, $sth ) = @_; | |
211 #Then do something with sth | |
212 return { A => $row->[0], B => $row->[1], C => $row->[2] }; | |
213 }, | |
214 -PARAMS => ['A'] ); | |
215 | |
216 Any arguments to bind to the incoming statement. This can be a set of scalars | |
217 or a 2D array if you need to specify any kind of types of sql objects i.e. | |
218 | |
219 use DBI qw(:sql_types); | |
220 | |
221 my $conn = get_conn(); | |
222 my $arr_ref = $conn->execute( | |
223 -SQL => | |
224 'select a,b,c from tab where col =? and num_col=? and other=?', | |
225 -USE_HASHREFS => 1, | |
226 -CALLBACK => sub { | |
227 my @row = @{ shift @_ }; | |
228 return { A => $row[0], B => $row[1], C => $row[2] }; | |
229 }, | |
230 -PARAMS => [ '1', SQL_VARCHAR ], | |
231 [ 2, SQL_INTEGER ], | |
232 'hello' ); | |
233 | |
234 Here we import DBI's sql types into our package and then pass in | |
235 multiple anonymous array references as parameters. Each param is | |
236 tested in the input and if it is detected to be an ARRAY reference we | |
237 dereference the array and run DBI's bind_param method. In fact you can | |
238 see each part of the incoming paramaters array as the contents to call | |
239 C<bind_param> with. The only difference is the package tracks the bind | |
240 position for you. | |
241 | |
242 We can get back a L<Bio::EnsEMBL::Utils::Iterator> object which can be used | |
243 to iterate over the results set without first materializing the data into | |
244 memory. An example would be: | |
245 | |
246 my $iterator = $helper->execute( | |
247 -SQL => 'select a,b,c from tab where col =?', | |
248 -PARAMS => ['A'] | |
249 -ITERATOR => 1); | |
250 while($iterator->has_next()) { | |
251 my $row = $iterator->next(); | |
252 #Do something | |
253 } | |
254 | |
255 This is very useful for very large datasets. | |
256 | |
257 =cut | |
258 | |
259 sub execute { | |
260 my ( $self, @args ) = @_; | |
261 my ($sql, $callback, $use_hashrefs, $params, $prepare_params, $iterator) = | |
262 rearrange([qw(sql callback use_hashrefs params prepare_params iterator)], @args); | |
263 my $has_return = 1; | |
264 | |
265 #If no callback then we execute using a default one which returns a 2D array | |
266 if(!defined $callback) { | |
267 throw('Cannot use fetchrow_hashref() with default mappers. Turn off this option') if $use_hashrefs; | |
268 $callback = $self->_mappers()->{array_ref}; | |
269 } | |
270 | |
271 return $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator ); | |
272 } | |
273 | |
274 =pod | |
275 | |
276 =head2 execute_simple() | |
277 | |
278 Arg [SQL] : string $sql | |
279 Arg [PARAMS] : ArrayRef $params | |
280 Arg [CALLBACK] : CodeRef $callback | |
281 Returntype : ArrayRef of 1D elements | |
282 Exceptions : If errors occur in the execution of the SQL | |
283 Status : Stable | |
284 | |
285 my $classification = | |
286 $helper->execute_simple( | |
287 -SQL => | |
288 'select meta_val from meta where meta_key =? order by meta_id', | |
289 -PARAMS => ['species.classification'] ); | |
290 | |
291 Identical to C<execute> except you do not specify a sub-routine reference. | |
292 Using this code assumes you want an array of single scalar values as returned | |
293 by the given SQL statement. | |
294 | |
295 =cut | |
296 | |
297 sub execute_simple { | |
298 my ( $self, @args ) = @_; | |
299 my ($sql, $params, $callback) = rearrange([qw(sql params callback)], @args); | |
300 my $has_return = 1; | |
301 my $use_hashrefs = 0; | |
302 $callback ||= $self->_mappers()->{first_element}; | |
303 return $self->_execute($sql, $callback, $has_return, $use_hashrefs, $params); | |
304 } | |
305 | |
306 =pod | |
307 | |
308 =head2 execute_no_return() | |
309 | |
310 Arg [SQL] : string sql | |
311 Arg [CALLBACK] : CodeRef The callback to use for mapping a row to a data point; | |
312 we assume you are assigning into a data structure which | |
313 has requirements other than simple translation into an | |
314 array | |
315 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned | |
316 to the callback & not ArrayRefs | |
317 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement | |
318 Returntype : None | |
319 Exceptions : If errors occur in the execution of the SQL | |
320 Status : Stable | |
321 | |
322 Whilst all other execute methods will return something; this assumes that the | |
323 given mapper subroutine will be performing the business of placing values | |
324 somewhere or doing something with them. | |
325 | |
326 There is a huge temptation to nest queries using this method; do not! Execute | |
327 the values into an array using one of the other methods then run your subqueries | |
328 on them; or make a better first query. SQL is flexible; so use it. | |
329 | |
330 =cut | |
331 | |
332 sub execute_no_return { | |
333 my ( $self, @args ) = @_; | |
334 my ($sql, $callback, $use_hashrefs, $params) = rearrange([qw(sql callback use_hashrefs params)], @args); | |
335 throw('No callback defined but this is a required parameter for execute_no_return()') if ! $callback; | |
336 my $has_return = 0; | |
337 my $prepare_params = []; | |
338 $self->_execute( $sql, $callback, $has_return, $use_hashrefs, $params); | |
339 return; | |
340 } | |
341 | |
342 =pod | |
343 | |
344 =head2 execute_into_hash() | |
345 | |
346 Arg [SQL] : string $sql | |
347 Arg [CALLBACK] : CodeRef The callback to use for mapping to a value in a hash | |
348 keyed by the first element in your result set; | |
349 leave blank for a default mapping to a scalar value | |
350 of the second element | |
351 Arg [PARAMS] : The binding parameters to the SQL statement | |
352 Returntype : HashRef keyed by column 1 & value is the return of callback | |
353 Exceptions : If errors occur in the execution of the SQL | |
354 Status : Stable | |
355 | |
356 A variant of the execute methods but rather than returning a list of | |
357 mapped results this will assume the first column of a returning map & | |
358 the calling subroutine will map the remainder of your return as the | |
359 hash's key. | |
360 | |
361 B<This code can handle simple queries to hashes, complex value mappings | |
362 and repeated mappings for the same key>. | |
363 | |
364 For example: | |
365 | |
366 my $sql = 'select key, one, two from table where something =?'; | |
367 my $mapper = sub { | |
368 my ( $row, $value ) = @_; | |
369 #Ignore field 0 as that is being used for the key | |
370 my $obj = Some::Obj->new( one => $row->[1], two => $row->[2] ); | |
371 return $obj; | |
372 }; | |
373 | |
374 my $hash = | |
375 $helper->execute_into_hash( -SQL => $sql, | |
376 -CALLBACK => $mapper, | |
377 -PARAMS => ['val'] ); | |
378 | |
379 #Or for a more simple usage | |
380 my $sql = 'select biotype, count(gene_id) from gene group by biotype'; | |
381 my $biotype_hash = $conn->execute_into_hash( -SQL => $sql ); | |
382 print $biotype_hash->{protein_coding} || 0, "\n"; | |
383 | |
384 The basic pattern assumes a scenario where you are mapping in a one | |
385 key to one value. For more advanced mapping techniques you can use the | |
386 second value passed to the subroutine paramater set. This is shown as | |
387 C<$value> in the above examples. This value is what is found in the HASH | |
388 being populated in the background. So on the first time you encounter it | |
389 for the given key it will be undefined. For future invocations it will | |
390 be set to the value you gave it. This allows us to setup code like the | |
391 following | |
392 | |
393 my %args = ( -SQL => 'select meta_key, meta_value from meta ' | |
394 . 'where meta_key =? order by meta_id', | |
395 -PARAMS => ['species.classification'] ); | |
396 | |
397 my $hash = $helper->execute_into_hash( | |
398 %args, | |
399 -CALLBACK => sub { | |
400 my ( $row, $value ) = @_; | |
401 $value = [] if !defined $value; | |
402 push( @{$value}, $row->[1] ); | |
403 return $value; | |
404 } ); | |
405 | |
406 #OR | |
407 | |
408 $hash = $helper->execute_into_hash( | |
409 %args, | |
410 -CALLBACK => sub { | |
411 my ( $row, $value ) = @_; | |
412 if ( defined $value ) { | |
413 push( @{$value}, $row->[1] ); | |
414 return; | |
415 } | |
416 my $new_value = [ $row->[1] ]; | |
417 return $new_value; | |
418 } ); | |
419 | |
420 The code understands that returning a defined value means to push this | |
421 value into the background hash. In example one we keep on re-inserting | |
422 the Array of classifications into the hash. Example two shows an early | |
423 return from the callback which indicates to the code we do not have any | |
424 value to re-insert into the hash. Of the two methods example one is | |
425 clearer but is possibliy slower. | |
426 | |
427 B<Remember that the row you are given is the full row & not a view of | |
428 the reminaing fields.> Therefore indexing for the data you are concerned | |
429 with begins at position 1. | |
430 | |
431 =cut | |
432 | |
433 sub execute_into_hash { | |
434 my ( $self, @args ) = @_; | |
435 my ($sql, $callback, $params) = rearrange([qw(sql callback params)], @args); | |
436 my $hash = {}; | |
437 | |
438 #If no callback then we execute using a default one which sets value to 2nd element | |
439 if(!defined $callback) { | |
440 $callback = $self->_mappers()->{second_element}; | |
441 } | |
442 | |
443 #Default mapper uses the 1st key + something else from the mapper | |
444 my $mapper = sub { | |
445 my $row = shift @_; | |
446 my $key = $row->[0]; | |
447 my $value = $hash->{$key}; | |
448 my $new_value = $callback->($row, $value); | |
449 if(defined $new_value) { | |
450 $hash->{ $key } = $new_value; | |
451 } | |
452 return; | |
453 }; | |
454 | |
455 $self->execute_no_return( | |
456 -SQL => $sql, | |
457 -CALLBACK => $mapper, | |
458 -PARAMS => $params | |
459 ); | |
460 | |
461 return $hash; | |
462 } | |
463 | |
464 =pod | |
465 | |
466 =head2 execute_single_result() | |
467 | |
468 Arg [SQL] : string $sql | |
469 Arg [CALLBACK] : CodeRef The callback to use for mapping a row to a data point; | |
470 leave blank for a default scalar mapping | |
471 Arg [USE_HASHREFS] : boolean If set to true will cause HashRefs to be returned | |
472 to the callback & not ArrayRefs | |
473 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement | |
474 Returntype : Scalar | |
475 Exceptions : If errors occur in the execution of the SQL, if the query | |
476 returned more than 1 row and if we found no rows. | |
477 Status : Stable | |
478 | |
479 my $meta_count = | |
480 $helper->execute_single_result( | |
481 -SQL => 'select count(*) from meta where species_id =?', | |
482 -PARAMS => [1] ); | |
483 | |
484 Very similar to C<execute()> except it will raise an exception if we have more | |
485 or less than one row returned | |
486 | |
487 =cut | |
488 | |
489 sub execute_single_result { | |
490 my ( $self, @args ) = @_; | |
491 my ($sql, $callback, $use_hashrefs, $params) = rearrange( | |
492 [qw(sql callback use_hashrefs params)], @args); | |
493 | |
494 my $results = $self->execute_simple( | |
495 -SQL => $sql, | |
496 -CALLBACK => $callback, | |
497 -USE_HASHREFS => $use_hashrefs, | |
498 -PARAMS => $params | |
499 ); | |
500 | |
501 my $result_count = scalar(@{$results}); | |
502 if($result_count != 1) { | |
503 $params = [] if ! $params; | |
504 my $type = ($result_count == 0) ? 'No' : 'Too many'; | |
505 my $msg = "${type} results returned. Expected 1 but got $result_count for query '${sql}' with params ["; | |
506 $msg .= join( ',', map {(defined $_) ? $_ : '-undef-';} @{$params} ); | |
507 $msg .= ']'; | |
508 throw($msg); | |
509 } | |
510 return $results->[0]; | |
511 } | |
512 | |
513 =pod | |
514 | |
515 =head2 transaction() | |
516 | |
517 Arg [CALLBACK] : CodeRef The callback used for transaction isolation; once | |
518 the subroutine exists the code will decide on rollback | |
519 or commit. Required | |
520 Arg [RETRY] : integer the number of retries to attempt with this | |
521 transactional block. Defaults to 0. | |
522 Arg [PAUSE] : integer the time in seconds to pause in-between retries. | |
523 Defaults to 1. | |
524 Arg [CONDITION] : CodeRef allows you to inspect the exception raised | |
525 and should your callback return true then the | |
526 retry will be attempted. If not given then all | |
527 exceptions mean attempt a retry (if specified) | |
528 Returntype : Return of the callback | |
529 Exceptions : If errors occur in the execution of the SQL | |
530 Status : Stable | |
531 | |
532 my $val = $helper->transaction( | |
533 -CALLBACK => sub { | |
534 my ($dbc) = @_; | |
535 #Do something | |
536 return 1; | |
537 } ); | |
538 | |
539 #Or because of the arguments method we use | |
540 my $val = $helper->transaction( | |
541 sub { | |
542 my ($dbc) = @_; | |
543 #Do something | |
544 return 1; | |
545 } ); | |
546 | |
547 Creates a transactional block which will ensure that the connection is | |
548 committed when your submmited subroutine has finished or will rollback | |
549 in the event of an error occuring in your block. | |
550 | |
551 The code will always force AutoCommit off but will restore it to its | |
552 previous setting. If your DBI/DBD driver does not support manual | |
553 commits then this code will break. The code will turn off the | |
554 C<disconnect_when_idle()> method to allow transactions to work as | |
555 expected. | |
556 | |
557 An effect of using REPEATABLE READ transaction isolation (InnoDB's | |
558 default) is that your data is as fresh as when you started your current | |
559 transaction. To ensure the freshest data use C<SELECT ... from ... LOCK | |
560 IN SHARE MODE> or C<SELECT ... from ... LOCK FOR UPDATE> if you are | |
561 going to issue updates. | |
562 | |
563 Creating a transaction within a transaction results in the commit | |
564 rollback statements occuring in the top level transaction. That way any | |
565 block of code which is meant to to be transaction can be wrapped in | |
566 this block ( assuming the same instance of SQLHelper is passed around & | |
567 used). | |
568 | |
569 You can also request the retry of a transactional block of code which is | |
570 causing problems. This is not a perfect solution as it indicates your | |
571 programming model is broken. This mode can be specified as such: | |
572 | |
573 my $val = $helper->transaction( | |
574 -RETRY => 3, -PAUSE => 2, | |
575 -CALLBACK => sub { | |
576 my ($dbc) = @_; | |
577 #Do something | |
578 return 1; | |
579 } ); | |
580 | |
581 The C<-RETRY> argument indicates the number of times we attempt the transaction | |
582 and C<-PAUSE> indicates the time in-between attempts. These retries will | |
583 only occur in the root transaction block i.e. you cannot influence the | |
584 retry system in a sub transaction. You can influence if the retry is done with | |
585 the C<-CONDITION> argument which accepts a Code reference (same as the | |
586 C<-CALLBACK> parameter). This allows you to inspect the error thrown to | |
587 retry only in some situations e.g. | |
588 | |
589 my $val = $helper->transaction( | |
590 -RETRY => 3, -PAUSE => 2, | |
591 -CALLBACK => sub { | |
592 my ($dbc) = @_; | |
593 #Do something | |
594 return 1; | |
595 }, | |
596 -CONDITION => sub { | |
597 my ($error) = @_; | |
598 return ( $error =~ /deadlock/ ) ? 1 : 0; | |
599 } | |
600 ); | |
601 | |
602 Here we attempt a transaction and will B<only> retry when we have an error | |
603 with the phrase deadlock. | |
604 | |
605 =cut | |
606 | |
607 sub transaction { | |
608 my ($self, @args) = @_; | |
609 | |
610 my ($callback, $retry, $pause, $condition) = rearrange([qw(callback retry pause condition)], @args); | |
611 | |
612 throw('-CALLBACK was not a CodeRef. Got a reference of type ['.ref($callback).']. Check your parameters') | |
613 unless check_ref($callback, 'CODE'); | |
614 | |
615 #Setup defaults | |
616 $retry = 0 unless defined $retry; | |
617 $pause = 1 unless defined $pause; | |
618 if(! defined $condition) { | |
619 $condition = sub { | |
620 return 1; | |
621 }; | |
622 } | |
623 | |
624 assert_ref($condition, 'CODE', '-CONDITION'); | |
625 | |
626 my $dbc = $self->db_connection(); | |
627 my $original_dwi; | |
628 my $ac; | |
629 | |
630 my $error; | |
631 my $result; | |
632 | |
633 #If we were already in a transaction then we do not do any management of the | |
634 #session & wait for the parent transaction(s) to finish | |
635 my $perform_transaction = $self->_perform_transaction_code(); | |
636 if($perform_transaction) { | |
637 ($original_dwi, $ac) = $self->_enable_transaction(); | |
638 } | |
639 else { | |
640 #If we were in a transaction then ignore any attempts at retry here | |
641 $retry = 0; | |
642 } | |
643 | |
644 for(my $iteration = 0; $iteration <= $retry; $iteration++) { | |
645 eval { | |
646 $result = $callback->($dbc); | |
647 $dbc->db_handle()->commit() if $perform_transaction; | |
648 }; | |
649 $error = $@; | |
650 #If we were allowed to deal with the error then we apply rollbacks & then | |
651 #retry or leave to the remainder of the code to throw | |
652 if($perform_transaction && $error) { | |
653 eval { $dbc->db_handle()->rollback(); }; | |
654 #If we were not on our last iteration then warn & allow the retry | |
655 if($iteration != $retry) { | |
656 if($condition->($error)) { | |
657 warn("Encountered error on attempt ${iteration} of ${retry} and have issued a rollback. Will retry after sleeping for $pause second(s): $error"); | |
658 sleep $pause; | |
659 } | |
660 else { | |
661 last; #break early if condition of error was not matched | |
662 } | |
663 } | |
664 } | |
665 | |
666 #Always break the loop if we had a successful attempt | |
667 last if ! $error; | |
668 } | |
669 | |
670 if($perform_transaction) { | |
671 $self->_disable_transaction($original_dwi, $ac); | |
672 } | |
673 | |
674 throw("ABORT: Transaction aborted because of error: ${error}") if $error; | |
675 | |
676 return $result; | |
677 } | |
678 | |
679 =pod | |
680 | |
681 =head2 execute_update() | |
682 | |
683 Arg [SQL] : string $sql | |
684 Arg [CALLBACK] : CodeRef The callback to use for calling methods on the | |
685 DBI statement handle or DBConnection object after an | |
686 update command | |
687 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement | |
688 Arg [PREPARE_PARAMS] : ArrayRef Parameters to bind to the prepare() StatementHandle call | |
689 Returntype : Number of rows affected | |
690 Exceptions : If errors occur in the execution of the SQL | |
691 Status : Stable | |
692 | |
693 Used for performing updates but conforms to the normal execute statement | |
694 subroutines. | |
695 | |
696 use DBI qw(:sql_types); | |
697 $helper->execute_update(-SQL => 'update tab set name = ? where id =?', | |
698 -PARAMS => [ 'andy', [ 1, SQL_INTEGER ] ] ); | |
699 | |
700 If you need to do something a bit more advanced with your DML then you can | |
701 give the method a closure and this will be called after the execute has been | |
702 issued i.e. | |
703 | |
704 my $obj; | |
705 $helper->execute_update( | |
706 -SQL => 'insert into tab (name) values(?)', | |
707 -CALLBACK => sub { | |
708 my ( $sth, $dbh ) = @_; | |
709 $obj->{id} = $dbh->{mysql_insertid}; | |
710 }, | |
711 -PARAMS => [ $obj->name() ] ); | |
712 | |
713 This lets us access the statement handle & database handle to access other | |
714 properties such as the last identifier inserted. | |
715 | |
716 =cut | |
717 | |
718 sub execute_update { | |
719 my ($self, @args) = @_; | |
720 my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args); | |
721 my $rv = 0; | |
722 my $sth; | |
723 eval { | |
724 my @prepare_params; | |
725 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY'); | |
726 $sth = $self->db_connection()->prepare($sql, @prepare_params); | |
727 $self->_bind_params($sth, $params); | |
728 $rv = $sth->execute(); | |
729 $callback->($sth, $self->db_connection()->db_handle()) if $callback; | |
730 }; | |
731 my $error = $@; | |
732 $self->_finish_sth($sth); | |
733 if($error) { | |
734 my $params = join ' ', map { (defined $_) ? $_ : q{undef} } @{$params}; | |
735 throw("Cannot apply sql '${sql}' with params '${params}': ${error}"); | |
736 } | |
737 return $rv; | |
738 } | |
739 | |
740 =head2 execute_with_sth() | |
741 | |
742 Arg [SQL] : string $sql | |
743 Arg [CALLBACK] : CodeRef The callback to use for working with the statement | |
744 handle once returned. This is B<not> a mapper. | |
745 Arg [PARAMS] : ArrayRef The binding parameters to the SQL statement | |
746 Arg [PREPARE_PARAMS] : ArrayRef Used to pass parameters to the statement handle | |
747 prepare method | |
748 Description : A subrotuine which abstracts resource handling and statement | |
749 preparing leaving the developer to define how to handle | |
750 and process the statement. | |
751 Returntype : Anything you wish to return from the callback | |
752 Exceptions : If errors occur in the execution of the SQL | |
753 Status : Stable | |
754 | |
755 my $meta_count = $helper->execute_with_sth( | |
756 -SQL => 'select count(*) from meta where species_id =?', | |
757 -PARAMS => [1], | |
758 -CALLBACK => sub { | |
759 my ($sth) = @_; | |
760 my $count; | |
761 $sth->bind_columns( \$count ); | |
762 while ( $sth->fetch ) { | |
763 print $count, "\n"; | |
764 } | |
765 return $count; | |
766 } ); | |
767 | |
768 Very similar to C<execute()> except this gives you full control over the | |
769 lifecycle of the statement handle & how you wish to proceed with working | |
770 with a statement handle. This is for situations where you believe going through | |
771 the mappers causes too much of a slow-down (since we have to execute a | |
772 subroutine for every row in order to map it correctly). | |
773 | |
774 However please benchmark before adopting this method as it increases the | |
775 complexity of your code and the mapper slow down only becomes apparent when | |
776 working with very large numbers of rows. | |
777 | |
778 =cut | |
779 | |
780 sub execute_with_sth { | |
781 my ($self, @args) = @_; | |
782 my ($sql, $callback, $params, $prepare_params) = rearrange([qw(sql callback params prepare_params)], @args); | |
783 my $sth = $self->_base_execute( $sql, $params, $prepare_params, $callback ); | |
784 my $result = eval {$callback->($sth)}; | |
785 my $error = $@; | |
786 $self->_finish_sth($sth); | |
787 die $error if $error; | |
788 return $result; | |
789 } | |
790 | |
791 =pod | |
792 | |
793 =head2 batch() | |
794 | |
795 Arg [SQL] : string $sql | |
796 Arg [CALLBACK] : CodeRef The callback to use for working with the statement | |
797 handle once returned; specify this or -DATA | |
798 Arg [DATA] : ArrayRef The data to insert; specify this or -CALLBACK | |
799 Arg [COMMIT_EVERY] : Integer defines the rate at which to issue commits to | |
800 the DB handle. This is important when working with | |
801 InnoDB databases since it affects the speed of rollback | |
802 (larger gaps inbetween commits means more to rollback). | |
803 | |
804 Ignored if using the callback version. | |
805 Arg [PREPARE_PARAMS] : ArrayRef Used to pass parameters to the statement handle | |
806 prepare method | |
807 Returntype : integer rows updated | |
808 Exceptions : If errors occur in the execution of the SQL | |
809 Status : Stable | |
810 | |
811 my $alotofdata = getitfromsomewhere(); | |
812 $helper->batch( | |
813 -SQL => 'insert into table (one,two) values(?,?)', | |
814 -CALLBACk => sub { | |
815 my ( $sth, $dbc ) = @_; | |
816 foreach my $data (@alotofdata) { | |
817 $sth->execute( @{$data} ); | |
818 } | |
819 } ); | |
820 | |
821 #Or for a 2D array data driven approach | |
822 $helper->batch( -SQL => 'insert into table (one,two) values(?,?)', | |
823 -DATA => $alotofdata ); | |
824 | |
825 Takes in a sql statement & a code reference. Your SQL is converted into a | |
826 prepared statement & then given as the first parameter to the closure. The | |
827 second parameter is the DBH which created the statement. This is intended | |
828 to let you do mass insertion into a database without the need to | |
829 re-preparing the same statement. | |
830 | |
831 This can be combined with the transaction() code to provide a construct | |
832 which does batch insertion & is transactionally aware. | |
833 | |
834 We can also use data based batch insertions i.e. | |
835 | |
836 #Needs to be like: | |
837 # [ [1,2], [3,4] ] | |
838 #Or if using the DBI types: | |
839 # [ [ [ 1, SQL_INTEGER ], [ 2, SQL_INTEGER ] ], | |
840 # [ [ 3, SQL_INTEGER ], [ 4, SQL_INTEGER ] ] ]; | |
841 | |
842 my $alotofdata = getitfromsomewhere(); | |
843 $helper->batch( -SQL => 'insert into table (one,two) values(?,?)', | |
844 -DATA => $alotofdata ); | |
845 | |
846 This does exactly what the previous example. | |
847 | |
848 All batch statements will return the value the callback computes. If you are | |
849 using the previous example with a data array then the code will return the | |
850 number affected rows by the query. | |
851 | |
852 =cut | |
853 | |
854 sub batch { | |
855 my ($self, @args) = @_; | |
856 my ($sql, $callback, $data, $commit_every, $prepare_params) = | |
857 rearrange([qw(sql callback data commit_every prepare_params)], @args); | |
858 | |
859 if(! defined $callback && ! defined $data) { | |
860 throw('You need to define a callback for insertion work or the 2D data array'); | |
861 } | |
862 | |
863 my $result; | |
864 if(defined $callback) { | |
865 $result = $self->_callback_batch($sql, $callback, $prepare_params); | |
866 } | |
867 else { | |
868 $result = $self->_data_batch($sql, $data, $commit_every, $prepare_params); | |
869 } | |
870 return $result if defined $result; | |
871 return; | |
872 } | |
873 | |
874 #------- Internal methods | |
875 | |
876 my $default_mappers = { | |
877 first_element => sub { | |
878 my ($row) = @_; | |
879 return $row->[0]; | |
880 }, | |
881 second_element => sub { | |
882 my ($row) = @_; | |
883 return $row->[1]; | |
884 }, | |
885 array_ref => sub { | |
886 my $row = shift @_; | |
887 return [@{$row}]; | |
888 } | |
889 }; | |
890 | |
891 sub _mappers { | |
892 my ($self) = @_; | |
893 return $default_mappers; | |
894 } | |
895 | |
896 sub _perform_transaction_code { | |
897 my ($self) = @_; | |
898 return $self->{_transaction_active}->{$PROCESS_ID} ? 0 : 1; | |
899 } | |
900 | |
901 sub _enable_transaction { | |
902 my ($self) = @_; | |
903 my $dbc = $self->db_connection(); | |
904 my $original_dwi = $dbc->disconnect_when_inactive(); | |
905 $dbc->disconnect_when_inactive(0); | |
906 my $ac = $dbc->db_handle()->{'AutoCommit'}; | |
907 $dbc->db_handle()->{'AutoCommit'} = 0; | |
908 $self->{_transaction_active}->{$PROCESS_ID} = 1; | |
909 return ($original_dwi, $ac); | |
910 } | |
911 | |
912 sub _disable_transaction { | |
913 my ($self, $original_dwi, $ac) = @_; | |
914 my $dbc = $self->db_connection(); | |
915 $dbc->db_handle()->{'AutoCommit'} = $ac; | |
916 $dbc->disconnect_when_inactive($original_dwi); | |
917 delete $self->{_transaction_active}->{$PROCESS_ID}; | |
918 return; | |
919 } | |
920 | |
921 sub _bind_params { | |
922 my ( $self, $sth, $params ) = @_; | |
923 | |
924 return if ! defined $params; #Return quickly if we had no data | |
925 | |
926 if(! check_ref($params, 'ARRAY')) { | |
927 throw(qq{The given parameters reference '${params}' is not an ARRAY; wrap in an ArrayRef}); | |
928 } | |
929 | |
930 my $count = 1; | |
931 foreach my $param (@{$params}) { | |
932 if ( check_ref($param, 'ARRAY') ) { | |
933 $sth->bind_param( $count, @{$param} ); | |
934 } | |
935 else { | |
936 $sth->bind_param( $count, $param ); | |
937 } | |
938 $count++; | |
939 } | |
940 return; | |
941 } | |
942 | |
943 sub _execute { | |
944 my ( $self, $sql, $callback, $has_return, $use_hashrefs, $params, $prepare_params, $iterator ) = @_; | |
945 | |
946 throw('Not given a mapper. _execute() must always been given a CodeRef') unless check_ref($callback, 'CODE'); | |
947 | |
948 my $sth = $self->_base_execute($sql, $params, $prepare_params); | |
949 | |
950 my $sth_processor; | |
951 if($use_hashrefs) { | |
952 $sth_processor = sub { | |
953 while( my $row = $sth->fetchrow_hashref() ) { | |
954 my $v = $callback->($row, $sth); | |
955 return $v if $has_return; | |
956 } | |
957 $self->_finish_sth($sth); | |
958 return undef; | |
959 }; | |
960 } | |
961 else { | |
962 $sth_processor = sub { | |
963 while( my $row = $sth->fetchrow_arrayref() ) { | |
964 my $v = $callback->($row, $sth); | |
965 return $v if $has_return; | |
966 } | |
967 $self->_finish_sth($sth); | |
968 return undef; | |
969 }; | |
970 } | |
971 | |
972 my $iter = Bio::EnsEMBL::Utils::Iterator->new($sth_processor); | |
973 if($has_return) { | |
974 return $iter if $iterator; | |
975 return $iter->to_arrayref(); | |
976 } | |
977 else { | |
978 #Force iteration if we had no return since the caller is expecting this | |
979 $iter->each(sub {}); | |
980 } | |
981 return; | |
982 } | |
983 | |
984 sub _base_execute { | |
985 my ( $self, $sql, $params, $prepare_params) = @_; | |
986 | |
987 $params = [] unless $params; | |
988 | |
989 my $conn = $self->db_connection; | |
990 | |
991 my $sth; | |
992 eval { | |
993 my @prepare_params; | |
994 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY'); | |
995 $sth = $conn->prepare($sql, @prepare_params); | |
996 throw("Cannot continue as prepare() did not return a handle with prepare params '@prepare_params'") | |
997 unless $sth; | |
998 $self->_bind_params( $sth, $params ); | |
999 $sth->execute(); | |
1000 }; | |
1001 | |
1002 my $error = $@; | |
1003 if($error) { | |
1004 throw("Cannot run '${sql}' with params '@{$params}' due to error: $error") if $error; | |
1005 } | |
1006 | |
1007 return $sth; | |
1008 } | |
1009 | |
1010 sub _finish_sth { | |
1011 my ($self, $sth) = @_; | |
1012 eval { $sth->finish() if defined $sth; }; | |
1013 warn('Cannot finish() the statement handle: $@') if $@; | |
1014 return; | |
1015 } | |
1016 | |
1017 sub _callback_batch { | |
1018 my ($self, $sql, $callback, $prepare_params) = @_; | |
1019 my $error; | |
1020 my $sth; | |
1021 my $closure_return; | |
1022 eval { | |
1023 my @prepare_params; | |
1024 @prepare_params = @{$prepare_params} if check_ref($prepare_params, 'ARRAY'); | |
1025 $sth = $self->db_connection()->prepare($sql, @prepare_params); | |
1026 $closure_return = $callback->($sth, $self->db_connection()); | |
1027 }; | |
1028 $error = $@; | |
1029 $self->_finish_sth($sth); | |
1030 throw("Problem detected during batch work: $error") if $error; | |
1031 | |
1032 return $closure_return if defined $closure_return; | |
1033 return; | |
1034 } | |
1035 | |
1036 sub _data_batch { | |
1037 my ($self, $sql, $data, $commit_every, $prepare_params) = @_; | |
1038 | |
1039 #Input checks | |
1040 assert_ref($data, 'ARRAY', '-DATA'); | |
1041 my $data_length = scalar(@{$data}); | |
1042 return 0 unless $data_length > 0; | |
1043 my $first_row = $data->[0]; | |
1044 throw('I expect to work with a 2D ArrayRef but this is not one') unless check_ref($first_row, 'ARRAY'); | |
1045 | |
1046 my $callback = sub { | |
1047 my ($sth, $dbc) = @_; | |
1048 my $total_affected = 0; | |
1049 #Iterate over each data point | |
1050 for(my $data_index = 0; $data_index < $data_length; $data_index++) { | |
1051 my $row = $data->[$data_index]; | |
1052 $self->_bind_params($sth, $row); | |
1053 my $affected = eval {$sth->execute()}; | |
1054 if($@) { | |
1055 throw("Problem working with $sql with params @{$row}: $@"); | |
1056 } | |
1057 my $num_affected = ($affected) ? $affected : 0; #Get around DBI's 0E0 | |
1058 $total_affected += $num_affected; | |
1059 | |
1060 #Lets us do a commit once every x rows apart from 0. We also finish | |
1061 #off with a commit if the code told us we were doing it | |
1062 if($commit_every) { | |
1063 if( ($data_index % $commit_every == 0) && $data_index != 0) { | |
1064 $dbc->db_handle()->commit(); | |
1065 } | |
1066 } | |
1067 } | |
1068 | |
1069 #finish off with a commit if the code told us we were doing it | |
1070 if($commit_every) { | |
1071 $dbc->db_handle()->commit(); | |
1072 } | |
1073 | |
1074 return $total_affected || 0; | |
1075 }; | |
1076 | |
1077 return $self->_callback_batch($sql, $callback, $prepare_params) | |
1078 } | |
1079 | |
1080 1; |