0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2011 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19
|
|
20 =head1 NAME
|
|
21
|
|
22 Bio::EnsEMBL::Funcgen::Utils::Helper
|
|
23
|
|
24 =head1 SYNOPSIS
|
|
25
|
|
26
|
|
27 e.g.
|
|
28
|
|
29
|
|
30 my $object = Bio::EnsEMBL::Object->new
|
|
31 (
|
|
32 logging => 1,
|
|
33 log_file => "/tmp/Misc.log",
|
|
34 debug_level => 2,
|
|
35 debug_file => "/tmp/Misc.dbg",
|
|
36 );
|
|
37
|
|
38 $object->log("This is a log message.");
|
|
39 $object->debug(1,"This is a debug message.");
|
|
40 $object->system("rmdir /tmp/test");
|
|
41
|
|
42
|
|
43 ----------------------------------------------------------------------------
|
|
44
|
|
45
|
|
46 =head1 OPTIONS
|
|
47
|
|
48 =over 8
|
|
49
|
|
50
|
|
51 =item B<-debug>
|
|
52
|
|
53 Turns on and defines the verbosity of debugging output, 1-3, default = 0 = off
|
|
54
|
|
55 =over 8
|
|
56
|
|
57 =item B<-log_file|l>
|
|
58
|
|
59 Defines the log file, default = "${instance}.log"
|
|
60
|
|
61 =item B<-help>
|
|
62
|
|
63 Print a brief help message and exits.
|
|
64
|
|
65 =item B<-man>
|
|
66
|
|
67 Prints the manual page and exits.
|
|
68
|
|
69 =back
|
|
70
|
|
71 =head1 DESCRIPTION
|
|
72
|
|
73 B<This program> performs several debugging and logging functions, aswell as providing several inheritable EFGUtils methods.
|
|
74
|
|
75 =cut
|
|
76
|
|
77 ################################################################################
|
|
78
|
|
79 package Bio::EnsEMBL::Funcgen::Utils::Helper;
|
|
80
|
|
81 use Bio::Root::Root;
|
|
82 use Data::Dumper;
|
|
83 use Bio::EnsEMBL::Utils::Exception qw (throw stack_trace);
|
|
84 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
85 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (get_date);
|
|
86 #use Devel::Timer;
|
|
87 use Carp;#? Can't use unless we can get it to redirect
|
|
88 use File::Basename;
|
|
89
|
|
90
|
|
91 use strict;
|
|
92 use vars qw(@ISA);
|
|
93 @ISA = qw(Bio::Root::Root);
|
|
94
|
|
95 #List of valid rollback levels
|
|
96 #To be used in conjunction with -full_delete
|
|
97 my @rollback_tables = ('data_set', 'feature_set', 'result_set', 'input_set', 'experiment', 'array', 'array_chip', 'experimental_chip');
|
|
98
|
|
99 #Some local filevars to avoid assigning to package typeglobs
|
|
100 my ($DBGFILE, $LOGFILE);
|
|
101
|
|
102 ################################################################################
|
|
103
|
|
104 =head2 new
|
|
105
|
|
106 Description : Constructor method to create a new object with passed or
|
|
107 default attributes.
|
|
108
|
|
109 Arg [1] : hash containing optional attributes :-
|
|
110 log_file - name of log file (default = undef -> STDOUT)
|
|
111 debug_level - level of detail of debug message [1-3] (default = 0 = off)
|
|
112 debug_file - name of debug file (default = undef -> STDERR)
|
|
113
|
|
114 ReturnType : Helper
|
|
115
|
|
116 Example : my $Helper = new Bio::EnsEMBL::Helper(
|
|
117 debug_level => 3,
|
|
118 debug_file => "/tmp/efg.debug",
|
|
119 log_file => "/tmp/efg.log",
|
|
120 );
|
|
121
|
|
122 Exceptions : throws exception if failed to open debug file
|
|
123 : throws exception if failed to open log file
|
|
124
|
|
125 =cut
|
|
126
|
|
127 ################################################################################
|
|
128
|
|
129 #To do , change to rearrange
|
|
130
|
|
131 sub new{
|
|
132 my ($caller, %args) = @_;
|
|
133
|
|
134 my ($self, %attrdata, $argname);
|
|
135 my $class = ref($caller) || $caller;
|
|
136
|
|
137 #Create object from parent class
|
|
138 $self = $class->SUPER::new(%args);
|
|
139
|
|
140 #we need to mirror ensembl behaviour here
|
|
141 #use rearrange and set default afterwards if not defined
|
|
142
|
|
143 # objects private data and default values
|
|
144 #Not all of these need to be in main
|
|
145
|
|
146 %attrdata = (
|
|
147 _tee => $main::_tee,
|
|
148 _debug_level => $main::_debug_level,
|
|
149 _debug_file => $main::_debug_file,
|
|
150 _log_file => $main::_log_file,#default should be set in caller
|
|
151 _no_log => $main::_no_log,#suppresses log file generation if log file not defined
|
|
152 _default_log_dir => $main::_default_log_dir,
|
|
153 );
|
|
154
|
|
155 # set each class attribute using passed value or default value
|
|
156 foreach my $attrname (keys %attrdata){
|
|
157 ($argname = $attrname) =~ s/^_//; # remove leading underscore
|
|
158 $self->{$attrname} = (exists $args{$argname}) ? $args{$argname} : $attrdata{$attrname};
|
|
159 }
|
|
160
|
|
161
|
|
162 $self->{'_tee'} = 1 if $self->{'_no_log'};
|
|
163 #should we undef log_file here too?
|
|
164 #This currently only turns off default logging
|
|
165
|
|
166 $self->{_default_log_dir} ||= $ENV{'HOME'}.'/logs';
|
|
167 $self->{'_report'} = [];
|
|
168
|
|
169
|
|
170 # DEBUG OUTPUT & STDERR
|
|
171
|
|
172 #should default to lowest or highest debug level here!
|
|
173
|
|
174 if(defined $self->{_debug_level} && $self->{_debug_level}){
|
|
175 $main::_debug_level = $self->{_debug_level};
|
|
176
|
|
177 if(defined $self->{_debug_file}){
|
|
178 $main::_debug_file = $self->{_debug_file};
|
|
179
|
|
180 open($DBGFILE, '>>', $self->{_debug_file})
|
|
181 or throw("Failed to open debug file : $!");
|
|
182
|
|
183 #open (DBGFILE, "<STDERR | tee -a ".$self->{_debug_file});#Mirrors STDERR to debug file
|
|
184 }
|
|
185 else{
|
|
186 open($DBGFILE, '>&STDERR');
|
|
187 }
|
|
188
|
|
189 select $DBGFILE; $| = 1; # make debug file unbuffered
|
|
190
|
|
191 $self->debug(1,"Debugging started ".localtime()." on $0 at level ".$self->{_debug_level}." ...");
|
|
192 }
|
|
193
|
|
194 my $log_file = $self->{_log_file};
|
|
195
|
|
196
|
|
197 # LOG OUTPUT
|
|
198 if (defined $self->{_log_file}){
|
|
199
|
|
200 #This causes print on unopened file as we try and log in the DESTROY
|
|
201 throw('You have specified mutually exclusive parameters log_file and no_log') if($self->{'_no_log'});
|
|
202 $main::_log_file = $self->{_log_file};
|
|
203
|
|
204 #we need to implment tee here
|
|
205 if($self->{'_tee'}){
|
|
206 open($LOGFILE, ' | tee -a '.$log_file);
|
|
207 }
|
|
208 else{
|
|
209 open($LOGFILE, '>>', $log_file)
|
|
210 or throw("Failed to open log file : $log_file\nError: $!");
|
|
211 }
|
|
212 }
|
|
213 else{
|
|
214 #Change this to get the name of the control script and append with PID.out
|
|
215 #This is to ensure that we always capture output
|
|
216 #We need to also log params
|
|
217 #We will have to call this from the child class.
|
|
218
|
|
219
|
|
220 #Only do this if we don't have supress default logs set
|
|
221 #To avoid loads of loags during testing
|
|
222 if(! $self->{'_no_log'}){
|
|
223
|
|
224 my @stack = stack_trace();
|
|
225 my $top_level = $stack[$#stack];
|
|
226 my (undef, $file) = @{$top_level};
|
|
227 $file =~ s/.*\///;
|
|
228
|
|
229 $self->run_system_cmd('mkdir '.$self->{_default_log_dir}) if(! -e $self->{_default_log_dir});
|
|
230 $self->{'_log_file'} = $self->{_default_log_dir}.'/'.$file.'.'.$$.'.log';
|
|
231 warn "No log file defined, defaulting to:\t".$self->{'_log_file'}."\n";
|
|
232
|
|
233 #we should still tee here
|
|
234 if($self->{'_tee'}){
|
|
235 open($LOGFILE, '| tee -a '.$self->{'_log_file'});
|
|
236 }
|
|
237 else{
|
|
238 open($LOGFILE, '>', $self->{'_log_file'})
|
|
239 or throw('Failed to open log file : '.$self->{'_log_file'}."\nError: $!");
|
|
240 }
|
|
241
|
|
242 }
|
|
243 else{
|
|
244 #Have to include STD filehandles in operator
|
|
245 open($LOGFILE, '>&STDOUT');
|
|
246 }
|
|
247 }
|
|
248
|
|
249 select $LOGFILE; $| = 1; # make log file unbuffered
|
|
250 $self->log("\n\nLogging started at ".localtime()."...");
|
|
251
|
|
252 # RESET STDOUT TO DEFAULT
|
|
253 select STDOUT; $| = 1;
|
|
254
|
|
255 $self->debug(2,"Helper class instance created.");
|
|
256
|
|
257 return $self;
|
|
258 }
|
|
259
|
|
260
|
|
261 ################################################################################
|
|
262
|
|
263 =head2 DESTROY
|
|
264
|
|
265 Description : Called by gargbage collection to enable tidy up before object deleted
|
|
266
|
|
267 ReturnType : none
|
|
268
|
|
269 Example : none - should not be called directly
|
|
270
|
|
271 Exceptions : none
|
|
272
|
|
273 =cut
|
|
274
|
|
275 ################################################################################
|
|
276
|
|
277 sub DESTROY{
|
|
278 my ($self) = @_;
|
|
279
|
|
280
|
|
281 $self->report;
|
|
282
|
|
283 if($self->{_log_file}){
|
|
284 $self->log("Logging complete ".localtime().".");
|
|
285 $self->log('Virtual Memory '.`ps -p $$ -o vsz |tail -1`);
|
|
286 $self->log('Resident Memory '.`ps -p $$ -o rss |tail -1`);
|
|
287
|
|
288
|
|
289
|
|
290
|
|
291 # close LOGFILE; # if inherited object then cannot close filehandle !!!
|
|
292 }
|
|
293
|
|
294 if($self->{_debug_level}){
|
|
295 $self->debug(1,"Debugging complete ".localtime().".");
|
|
296 # close DBGFILE; # if inherited object then cannot close filehandle !!!
|
|
297 }
|
|
298
|
|
299 if(defined $self->{'_timer'}){
|
|
300 $self->{'_timer'}->report();
|
|
301 }
|
|
302
|
|
303 $self->debug(2,"Bio::EnsEMBL::Helper class instance destroyed.");
|
|
304
|
|
305 return;
|
|
306 }
|
|
307
|
|
308
|
|
309
|
|
310
|
|
311 ##Need generic method in here to get stack and line info
|
|
312 ###Use Root.pm stack methods!
|
|
313 # and replace this with caller line method for logging
|
|
314 sub _get_stack{
|
|
315 my ($self) = shift;
|
|
316
|
|
317
|
|
318 #need to resolve this method with that in debug, pass log or debug arg for different format
|
|
319
|
|
320 my @prog = (caller(2)) ? caller(2) : (caller(1)) ? caller(1) : (undef,"undef",0);
|
|
321
|
|
322 return "[".localtime()." - ".basename($prog[1]).":$prog[2]]";
|
|
323 }
|
|
324
|
|
325
|
|
326 ################################################################################
|
|
327
|
|
328 =head2 log
|
|
329
|
|
330 Arg[0] : string - log message.
|
|
331 Arg[1] : boolean - memory usage, appends current process memory stats
|
|
332 Description : Method to write messages to a previously set up log file.
|
|
333 Return type : none
|
|
334 Example : $root->log("Processing file $filename ...", 1);
|
|
335 Exceptions : none
|
|
336
|
|
337 =cut
|
|
338
|
|
339 ################################################################################
|
|
340
|
|
341 sub log{
|
|
342 my ($self, $message, $mem, $date, $no_return) = @_;
|
|
343
|
|
344 if($mem){
|
|
345 $message.= " :: ".`ps -p $$ -o vsz |tail -1`;
|
|
346 chomp $message;
|
|
347 $message .= " KB";
|
|
348 }
|
|
349
|
|
350 if($date){
|
|
351 my $time = localtime();
|
|
352 chomp($time);
|
|
353 $message .= ' - '.localtime();
|
|
354 }
|
|
355
|
|
356 $message .= "\n" if ! $no_return;
|
|
357
|
|
358 print $LOGFILE "::\t$message";
|
|
359
|
|
360 # Add to debug file if not printing to STDERR?
|
|
361 # only if verbose?
|
|
362 # this would double print everything to STDOUT if tee and debug has not redefined STDERR
|
|
363
|
|
364 $self->debug(1,$message);
|
|
365 }
|
|
366
|
|
367 ################################################################################
|
|
368
|
|
369
|
|
370 =head2 report
|
|
371
|
|
372 Arg[0] : optional string - log message.
|
|
373 Arg[1] : optional boolean - memory usage, appends current process memory stats
|
|
374 Description : Wrapper method for log, which also stores message for summary reporting
|
|
375 Return type : none
|
|
376 Example : $root->report("WARNING: You have not done this or that and want it reported at the end of a script");
|
|
377 Exceptions : none
|
|
378
|
|
379 =cut
|
|
380
|
|
381 ################################################################################
|
|
382
|
|
383 sub report{
|
|
384 my ($self, $message, $mem) = @_;
|
|
385
|
|
386 if(defined $message){
|
|
387
|
|
388 $self->log($message, $mem);
|
|
389
|
|
390 push @{$self->{'_report'}}, $message;
|
|
391 }
|
|
392 elsif(scalar(@{$self->{'_report'}})){
|
|
393 print $LOGFILE "\n::\tSUMMARY REPORT\t::\n";
|
|
394 print $LOGFILE join("\n", @{$self->{'_report'}})."\n";
|
|
395
|
|
396 $self->{'_report'} = [];
|
|
397 }
|
|
398
|
|
399 return;
|
|
400 }
|
|
401
|
|
402
|
|
403
|
|
404
|
|
405
|
|
406
|
|
407 ################################################################################
|
|
408
|
|
409 =head2 log_header
|
|
410
|
|
411 Arg[0] : string - log message.
|
|
412 Arg[1] : boolean - memory usage, appends current process memory stats
|
|
413 Description : Wrapper method to format a log as a header line
|
|
414 Return type : none
|
|
415 Example : $root->log("Processing file $filename ...", 1);
|
|
416 Exceptions : none
|
|
417
|
|
418 =cut
|
|
419
|
|
420 ################################################################################
|
|
421
|
|
422 sub log_header{
|
|
423 my ($self, $message, $mem, $date) = @_;
|
|
424
|
|
425 print $LOGFILE "\n\n";
|
|
426 $self->log("::\t$message\t::\t::", $mem, $date);
|
|
427 print $LOGFILE "\n";
|
|
428 }
|
|
429
|
|
430
|
|
431
|
|
432
|
|
433
|
|
434 ################################################################################
|
|
435
|
|
436 =head2 debug
|
|
437
|
|
438 Description : Method to write debug info to a previously set up debug file.
|
|
439 Over-rides Root.pm on/off style debugging
|
|
440
|
|
441 Args : int: debug level and string: log message.
|
|
442
|
|
443 ReturnType : none
|
|
444
|
|
445 Example : $root->debug(2,"dir=$dir file=$file");
|
|
446
|
|
447 Exceptions : none
|
|
448
|
|
449 =cut
|
|
450
|
|
451 ################################################################################
|
|
452
|
|
453 sub debug{
|
|
454 my ($self,$level,$message) = @_;
|
|
455
|
|
456
|
|
457
|
|
458 #Can we not detect whther message is a scalar, array or hash and Dump or print accordingly?
|
|
459
|
|
460 my (@call,$cnt,$prog_name,$prog_line,$call_name,$call_line);
|
|
461
|
|
462 $prog_name = $call_name = "undef";
|
|
463 $prog_line = $call_line = $cnt = 0;
|
|
464
|
|
465 # if debug on at the requested level then output the passed message
|
|
466 if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){
|
|
467
|
|
468 ######Replace this with Carp method?
|
|
469 while (@call = caller($cnt++)){
|
|
470
|
|
471 if ($cnt == 2){
|
|
472 $call_name = basename($call[1]);
|
|
473 $call_line = $call[2]
|
|
474 }
|
|
475
|
|
476 $prog_name = basename($call[1]);
|
|
477 $prog_line = $call[2];
|
|
478 }
|
|
479
|
|
480 #This still attempts to print if file not opened
|
|
481 print $DBGFILE "debug $message\t: [$$ - $prog_name:$prog_line $call_name:$call_line]\n";
|
|
482
|
|
483 #carp("carping $message");
|
|
484 }
|
|
485 }
|
|
486
|
|
487
|
|
488 ################################################################################
|
|
489
|
|
490 =head2 debug_hash
|
|
491
|
|
492 Description : Method to write the contents of passed hash to debug output.
|
|
493
|
|
494 Args : int: debug level and hashref.
|
|
495
|
|
496 ReturnType : none
|
|
497
|
|
498 Example : $Helper->debug_hash(3,\%hash);
|
|
499
|
|
500 Exceptions : none
|
|
501
|
|
502 =cut
|
|
503
|
|
504 ################################################################################
|
|
505
|
|
506 sub debug_hash{
|
|
507 my ($self,$level,$hashref) = @_;
|
|
508
|
|
509 my ($attr);
|
|
510
|
|
511 # if debug on at the requested level then output the passed hash
|
|
512 if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){
|
|
513 print $DBGFILE Data::Dumper::Dumper(\$hashref)."\n";
|
|
514 }
|
|
515 }
|
|
516
|
|
517
|
|
518
|
|
519 ################################################################################
|
|
520
|
|
521 =head2 run_system_cmd
|
|
522
|
|
523 Description : Method to control the execution of the standard system() command
|
|
524
|
|
525 ReturnType : none
|
|
526
|
|
527 Example : $Helper->debug(2,"dir=$dir file=$file");
|
|
528
|
|
529 Exceptions : throws exception if system command returns none zero
|
|
530
|
|
531 =cut
|
|
532
|
|
533 ################################################################################
|
|
534
|
|
535
|
|
536 #Move most of this to EFGUtils.pm
|
|
537 #Maintain wrapper here with throws, only warn in EFGUtils
|
|
538
|
|
539 sub run_system_cmd{
|
|
540 my ($self, $command, $no_exit) = @_;
|
|
541
|
|
542 my $redirect = '';
|
|
543
|
|
544 $self->debug(3, "system($command)");
|
|
545
|
|
546 # decide where the command line output should be redirected
|
|
547
|
|
548 #This should account for redirects
|
|
549 #This just sends everything to 1 no?
|
|
550
|
|
551 if (defined $self->{_debug_level} && $self->{_debug_level} >= 3){
|
|
552
|
|
553 if (defined $self->{_debug_file}){
|
|
554 $redirect = " >>".$self->{_debug_file}." 2>&1";
|
|
555 }
|
|
556 else{
|
|
557 $redirect = "";
|
|
558 }
|
|
559 }
|
|
560 else{
|
|
561 #$redirect = " > /dev/null 2>&1";
|
|
562 }
|
|
563
|
|
564 # execute the passed system command
|
|
565 my $status = system("$command $redirect");
|
|
566 my $exit_code = $status >> 8;
|
|
567
|
|
568 if ($status == -1) {
|
|
569 warn "Failed to execute: $!\n";
|
|
570 }
|
|
571 elsif ($status & 127) {
|
|
572 warn sprintf("Child died with signal %d, %s coredump\nError:\t$!",($status & 127),($status & 128) ? 'with' : 'without');
|
|
573 }
|
|
574 elsif($status != 0) {
|
|
575 warn sprintf("Child exited with value %d\nError:\t$!\n", $exit_code); #get the true exit code
|
|
576 }
|
|
577
|
|
578 if ($exit_code != 0){
|
|
579
|
|
580 if (! $no_exit){
|
|
581 throw("System command failed:\t$command\nExit code:\t$exit_code\n$!");
|
|
582 }
|
|
583 else{
|
|
584 warn("System command returned non-zero exit code:\t$command\nExit code:\t$exit_code\n$!");
|
|
585 }
|
|
586 }
|
|
587
|
|
588 #reverse boolean logic for perl...can't do this anymore due to tab2mage successful non-zero exit codes :/
|
|
589
|
|
590 return $exit_code;
|
|
591 }
|
|
592
|
|
593
|
|
594 #add sys_get method ehre to handle system calls which retrieve data?
|
|
595 #i.e.backtick commands `find . -name *fasta`
|
|
596 #or use want or flag with above method?
|
|
597 #should open pipe instead to capture error?
|
|
598
|
|
599 sub get_data{
|
|
600 my ($self, $data_type, $data_name) = @_;
|
|
601
|
|
602 #This method is just to provide standard checking for specific get_data/config methods
|
|
603
|
|
604 if(defined $data_name){
|
|
605 throw("Defs data name $data_name for type '$data_type' does not exist\n") if (! exists $self->{"${data_type}"}{$data_name});
|
|
606 }else{
|
|
607 throw("Defs data type $data_type does not exist\n") if (! exists $self->{"${data_type}"});
|
|
608 }
|
|
609
|
|
610 return (defined $data_name) ? $self->{"${data_type}"}{$data_name} : $self->{"${data_type}"};
|
|
611 }
|
|
612
|
|
613
|
|
614 #sub Timer{
|
|
615 # my ($self) = shift;
|
|
616
|
|
617 # $self->{'_timer'} = new Devel::Timer() if(! defined $self->{'_timer'});
|
|
618
|
|
619 # return $self->{'_timer'};
|
|
620
|
|
621 #}
|
|
622
|
|
623
|
|
624 sub set_header_hash{
|
|
625 my ($self, $header_ref, $fields) = @_;
|
|
626
|
|
627 my %hpos;
|
|
628
|
|
629 for my $x(0..$#{$header_ref}){
|
|
630 $hpos{$header_ref->[$x]} = $x;
|
|
631 }
|
|
632
|
|
633
|
|
634 if($fields){
|
|
635
|
|
636 foreach my $field(@$fields){
|
|
637
|
|
638 if(! exists $hpos{$field}){
|
|
639 throw("Header does not contain mandatory field:\t${field}");
|
|
640 }
|
|
641 }
|
|
642 }
|
|
643
|
|
644 return \%hpos;
|
|
645 }
|
|
646
|
|
647 #Move this to EFGUtils?
|
|
648
|
|
649 sub backup_file{
|
|
650 my ($self, $file_path) = @_;
|
|
651
|
|
652 throw("Must define a file path to backup") if(! $file_path);
|
|
653
|
|
654 if (-f $file_path) {
|
|
655 $self->log("Backing up:\t$file_path");
|
|
656 system ("mv ${file_path} ${file_path}.".`date '+%T'`);
|
|
657 }
|
|
658
|
|
659 return;
|
|
660
|
|
661 }
|
|
662
|
|
663 #This should move to Utils
|
|
664 #as it is a simple string manipulation
|
|
665
|
|
666 sub get_schema_and_build{
|
|
667 my ($self, $dbname) = @_;
|
|
668 my @dbname = split/_/, $dbname;
|
|
669 return [$dbname[($#dbname -1)], $dbname[($#dbname )]];
|
|
670 }
|
|
671
|
|
672 =head2 get_regbuild_set_states
|
|
673
|
|
674 Arg [1] : Bio::EnsEMBL::DBAdaptor
|
|
675 Example : my ($dset_states, $rset_states, $fset_states) = $helper->get_regbuild_set_states($db);
|
|
676 Description: Returns Array refs of appropriate states for sets use din the regulatory build
|
|
677 Returntype : Array
|
|
678 Exceptions : Warns if cannot find chromosome CoordSystem
|
|
679 Caller : HealthChecker & regulatory build code
|
|
680 Status : At risk
|
|
681
|
|
682 =cut
|
|
683
|
|
684
|
|
685 sub get_regbuild_set_states{
|
|
686 my ($self, $db) = @_;
|
|
687
|
|
688 my $cs_a = $db->get_CoordSystemAdaptor;
|
|
689
|
|
690 #These states need to be mirrored in RegulatorySets.java
|
|
691
|
|
692 my $chrom_cs = $cs_a->fetch_by_name('chromosome');
|
|
693 my (@dset_states, @rset_states, @fset_states);
|
|
694
|
|
695 if(! defined $chrom_cs){
|
|
696 #This species most likely does not have a regbuild
|
|
697 #really just need to get the 'highest' level here
|
|
698 warn "Could not find Chromosome CoordSystem. ".$db->dbc->dbname.". most likely does not contain a RegulatoryBuild";
|
|
699 }
|
|
700 else{
|
|
701 my $imp_cs_status = 'IMPORTED_'.$cs_a->fetch_by_name('chromosome')->version;
|
|
702
|
|
703 #What about non-chromosome assemblies?
|
|
704 #top level will not return version...why not?
|
|
705 @dset_states = ('DISPLAYABLE');
|
|
706 @rset_states = (@dset_states, 'DAS_DISPLAYABLE', $imp_cs_status);
|
|
707 @fset_states = (@rset_states, 'MART_DISPLAYABLE');
|
|
708 }
|
|
709
|
|
710 return (\@dset_states, \@rset_states, \@fset_states);
|
|
711 }
|
|
712
|
|
713
|
|
714
|
|
715 =head2 define_and_validate_sets
|
|
716
|
|
717 Arg [1] : hash - set constructor parameters:
|
|
718 -dbadaptor Bio::EnsEMBL::Funcgen::DBAdaptor
|
|
719 -name Data/FeatureSet/ResultSet name to create
|
|
720 -feature_type Bio::EnsEMBL::Funcgen::FeatureType
|
|
721 -cell_type Bio::EnsEMBL::Funcgen::CellType
|
|
722 -analysis FeatureSet Bio::EnsEMBL::Analysis
|
|
723 -feature_class e.g. annotated or regulatory
|
|
724 -description FeatureSet description
|
|
725 -recovery Allows definition of extant sets so long as they match
|
|
726 -append Boolean - Forces import on top of previously imported data
|
|
727 -rollback Rolls back product feature set.
|
|
728 -supporting_sets Complete set of pre-stored supporting or input sets for this DataSet
|
|
729 -slices ARRAYREF of Slices to rollback
|
|
730 Example : my $dset = $self->define_and_validate_Set(%params);
|
|
731 Description: Checks whether set is already in DB based on set name, rolls back features
|
|
732 if roll back flag set. Or creates new DataSet and Feature|ResultSet if not present.
|
|
733 Returntype : Bio::EnsEMBL::Funcgen::DataSet
|
|
734 Exceptions : Throws if DBAdaptor param not valid
|
|
735 Caller : Importers and Parsers
|
|
736 Status : At risk
|
|
737
|
|
738 =cut
|
|
739
|
|
740 sub define_and_validate_sets{
|
|
741 my $self = shift;
|
|
742
|
|
743 #change slice to slices to support multi slice import from InputSet::define_sets
|
|
744 #Can't do full rollback in slice mode
|
|
745 #This may not be safe in slice mode as we will then have mixed inputs/outputs
|
|
746
|
|
747 my ($name, $anal, $ftype, $ctype, $type, $append, $db, $ssets, $description, $rollback, $recovery, $slices, $display_label) = rearrange(['NAME', 'ANALYSIS', 'FEATURE_TYPE', 'CELL_TYPE', 'FEATURE_CLASS', 'APPEND',
|
|
748 'DBADAPTOR', 'SUPPORTING_SETS', 'DESCRIPTION', 'ROLLBACK', 'RECOVERY', 'SLICES', 'DISPLAY_LABEL'], @_);
|
|
749
|
|
750
|
|
751 #VALIDATE CONFIG HASH
|
|
752 #$config_hash ||= {};#default so exists will work without testing
|
|
753 #if(keys %{$config_hash}){
|
|
754 # #There is a module to handle config hashes somewhere!
|
|
755 # throw('config_hash not yet implemented for define_and_validate_sets');
|
|
756 #my @known_config = ('full_delete');#We never want full delete here as this is a create method!
|
|
757 #Can we set vars from has by refs like getopts?
|
|
758 #map {
|
|
759 # throw("Found unsupported config hash parameter:\t$_") if ! grep(/^${_}$/, @known_config);
|
|
760 #} keys %{$config_hash};
|
|
761 # }
|
|
762
|
|
763 #define rollback level
|
|
764 #extract this to _set_rollback_level($rollback_mode, $feature_class)
|
|
765 my $rollback_level = 0;
|
|
766
|
|
767 #These should be globally defined so all rollback methods can use them
|
|
768 my %valid_rollback_modes =
|
|
769 (
|
|
770 product_features => 1,
|
|
771 #Just product features and FeatureSet status, what about DataSet status?
|
|
772 #full delete does nothing here?
|
|
773
|
|
774 sets => 2,
|
|
775 #Includes product_features and
|
|
776 #deletes supporting_sets entries unless we specify append
|
|
777 #revoke all states on Feature/Data/InputSets
|
|
778 #Full delete removes Feature/Data/InputSet entries
|
|
779 #Never includes ResultSets!
|
|
780
|
|
781 supporting_features => 3,
|
|
782 #Includes product_feature and sets
|
|
783 #Removes all states and supporting features
|
|
784 #inc. ResultSet results/ResultFeatures
|
|
785 #Full_delete remove supporting set entries
|
|
786 #Otherwise just rollback states for affected sets
|
|
787 );
|
|
788
|
|
789 if($rollback){
|
|
790 if(! exists $valid_rollback_modes{$rollback}){
|
|
791 #Default to some sensible values
|
|
792 $rollback = 'product_features';#default for FeatureSets
|
|
793
|
|
794 #Always want overwrite supporting sets if there is a difference
|
|
795 $rollback = 'sets' if ($type eq 'regulatory');
|
|
796 $rollback = 'supporting_sets' if ($type eq 'result');
|
|
797
|
|
798 warn ("You have not set a valid rollback mode(product_features|sets|supporting_features), defaulting to $rollback for feature class $type\n");
|
|
799 }
|
|
800
|
|
801 $rollback_level = $valid_rollback_modes{$rollback};
|
|
802 }
|
|
803
|
|
804
|
|
805 if($slices && (ref($slices) ne 'ARRAY')){
|
|
806 throw('-slices param must be an ARRAYREF of Bio::EnsEMBL::Slice objects');
|
|
807 #Rest of validation done in other methods
|
|
808 }
|
|
809
|
|
810
|
|
811
|
|
812 #But how are we going to resolve the append behaviour when we also want to validate the ssets?
|
|
813 #Can't, so append also functions to enable addition in the absence of some or all previous data/esets?
|
|
814 #No this is not true, we want to be able to fetch an extant set for import,
|
|
815 #we just need to be aware of sset IMPORTED status?
|
|
816 #This should be a recovery thing, allow fetch, but validate sets?
|
|
817
|
|
818
|
|
819 #Check mandatory params
|
|
820 if(! (ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){
|
|
821 throw('Must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor');
|
|
822 }
|
|
823
|
|
824 throw('Must provide a -name ') if(! defined $name);
|
|
825
|
|
826 #Not necessarily, just do rollback then append?
|
|
827 #But then we'd potentially have a supporting set associated which has had it's data removed from the feature set.
|
|
828 #Generating sets for an ExpSet will always have append set
|
|
829 #This could be valid for generically grabing/creating sets for adding new supporting sets e.g. reg build
|
|
830 throw('-append and -rollback are mutually exclusive') if $rollback_level && $append;
|
|
831
|
|
832 #This will never happen due to previous test? append will always fail?
|
|
833 #warn('You are defining a pre-existing FeatureSet without rolling back'.
|
|
834 # ' previous data, this could result in data duplication') if $append && ! $rollback_level;
|
|
835 #Is this really possible, surely the supporting set will fail to store due to unique key?
|
|
836
|
|
837
|
|
838 #Should we warn here about append && recovery?
|
|
839 #Aren't these mutually exclusive?
|
|
840 #Do we know if we have new data? append should override recovery, or just specifiy append
|
|
841 #This will stop the import and highlight the issue to the user
|
|
842 #We need to be able to run with both otherwise the import will not work
|
|
843
|
|
844
|
|
845 throw('Must provide a -feature_class e.g. annotated, external, result or regulatory') if(! defined $type);
|
|
846 #Check for annotated, external, regulatory etc here?
|
|
847 #Should never be external as we don't have DataSets for external sets?
|
|
848
|
|
849 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureType', $ftype);
|
|
850 if (defined $ctype){
|
|
851 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::CellType', $ctype);
|
|
852 }
|
|
853 elsif($type ne 'regulatory'){
|
|
854 throw('Only Data/FeatureSets with type \'regulatory\' can have an undefined CellType');
|
|
855 #Coudl extend this to core set by name eq 'RegulatoryFeatures'?
|
|
856 }
|
|
857
|
|
858 $db->is_stored_and_valid('Bio::EnsEMBL::Analysis', $anal);
|
|
859
|
|
860 my $dset_adaptor = $db->get_DataSetAdaptor;
|
|
861 my $fset_adaptor = $db->get_FeatureSetAdaptor;
|
|
862 my $rset_adaptor = $db->get_ResultSetAdaptor;
|
|
863
|
|
864 #DataSet centric definition to enable multiple DataSets
|
|
865 #to be generated from the same supporting sets
|
|
866 my $dset = $dset_adaptor->fetch_by_name($name);
|
|
867 my ($fset, $rset, @input_sets);
|
|
868
|
|
869 #Validate stored vs passed set data
|
|
870
|
|
871 if(defined $dset){
|
|
872 $self->log('Found Stored DataSet '.$dset->name);
|
|
873
|
|
874 if($type ne 'result'){#i.e. annotated
|
|
875
|
|
876 #Does this account for regulatory?
|
|
877
|
|
878 $fset = $dset->product_FeatureSet;
|
|
879 #Here we have the possiblity that a feature_set with a different name may have
|
|
880 #been associated with the DataSet
|
|
881
|
|
882 if(defined $fset){
|
|
883 $self->log("Found associated product FeatureSet:\t".$fset->name);
|
|
884
|
|
885 #if(! $clobber &&
|
|
886 if($fset->name ne $name){
|
|
887 throw('Invalid product FeatureSet name ('.$fset->name.') for DataSet ('.$name.'). Rollback will overwrite the FeatureSet and mismatched name will be retained.');
|
|
888 #Need to clobber both or give explicit name for datasets or rename dataset???
|
|
889 #Force this throw for now, make this fix manual as we may end up automatically overwriting data
|
|
890 }
|
|
891 }
|
|
892
|
|
893 #This needs to be modified to support InputSets in ResultSets?
|
|
894 #Would never have mixed Input/ResultSets so no need
|
|
895 #Could potential need to do it for mixed Result/FeatureSets
|
|
896 #if we ever use an analysis which uses both set types
|
|
897
|
|
898 #check supporting_sets here if defined
|
|
899 #We have the problem here of wanting to add ssets to a previously existing dset
|
|
900 #we may not know the original sset, or which of the ssets are new
|
|
901 #Hence there is a likelihood of a mismatch.
|
|
902
|
|
903 #Much of this is replicated in store_udpated sets
|
|
904
|
|
905
|
|
906 if(defined $ssets){
|
|
907 my @sorted_ssets = sort {$a->dbID <=> $b->dbID} @{$ssets};
|
|
908 my @stored_ssets = sort {$a->dbID <=> $b->dbID} @{$dset->get_supporting_sets};
|
|
909 my $mismatch = 0;
|
|
910
|
|
911 $mismatch = 1 if(scalar(@sorted_ssets) != scalar(@stored_ssets));
|
|
912
|
|
913 if(! $mismatch){
|
|
914
|
|
915 for my $i(0..$#stored_ssets){
|
|
916
|
|
917 if($stored_ssets[$i]->dbID != $sorted_ssets[$i]->dbID){
|
|
918 $mismatch=1;
|
|
919 last;
|
|
920 }
|
|
921 }
|
|
922 }
|
|
923
|
|
924
|
|
925
|
|
926
|
|
927 if($mismatch){
|
|
928 #We're really print this names here which may hide the true cell/feature/anal type differences.
|
|
929 my $mismatch = 'There is a (name/type/analysis) mismatch between the supplied supporting_sets and the'.
|
|
930 ' supporting_sets in the DB for DataSet '.$dset->name."\n\nStored:\n"
|
|
931 .join(', ', (map { $_->name } @stored_ssets))."\n\nSupplied supporting_sets:\n"
|
|
932 .join(', ', (map { $_->name } @sorted_ssets));
|
|
933
|
|
934
|
|
935 if($append){
|
|
936 warn($mismatch."\n\nAppending supporting set data to unvalidated supporting sets");
|
|
937 }
|
|
938 elsif($rollback_level > 1){#supporting set rollback
|
|
939 warn($mismatch."\n\nReplacing previously stored supporting sets with newly defined sets\n");
|
|
940
|
|
941 if($slices){
|
|
942 warn("WARNING:\tPerforming supporting_set rollback in slice mode. This may corrupt the supporting_set definition for other slices in this DataSet if they are not re-generated using the same supporting_sets\n");
|
|
943 }
|
|
944
|
|
945 #Remove supporting_set entries
|
|
946 #This should be in a rollback_DataSet method
|
|
947 #This has moved to DataSetAdaptor::store_update_sets
|
|
948
|
|
949 #Reset supporting sets
|
|
950 $dset->{'supporting_sets'} = undef;
|
|
951 $dset->add_supporting_sets(\@sorted_ssets);
|
|
952 #Move this to last block?
|
|
953 #This will currently fail as it test for product_FeatureSet
|
|
954 #How do we get around this? Remove IMPORTED status and only throw if fset has IMPORTED status?
|
|
955
|
|
956 #warn "pre store sset ".@{$dset->get_supporting_sets};
|
|
957
|
|
958 #($dset) = @{$dset_adaptor->store_updated_sets([$dset], $rollback_level)};
|
|
959 #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory';
|
|
960 }
|
|
961 else{
|
|
962 throw($mismatch);
|
|
963 }
|
|
964 }
|
|
965 }
|
|
966 else{
|
|
967 warn("No supporting sets defined, skipping supporting set validation for definition of DataSet:\t".$name);
|
|
968 }
|
|
969 }
|
|
970 else{#result_features from InputSet
|
|
971 #Do we ever pass supporting sets here?
|
|
972 #Do we need to test vs stored_sets?
|
|
973
|
|
974
|
|
975 #There is the potential for more than one ResultSet to be associated with DataSet
|
|
976 #But as we are using the same name, this restricts the number wrt the cardinality
|
|
977 #of the name field. i.e. 1 name per analysis/cell_type/feature_type.
|
|
978 #This now works slightly differently to the rest of this method as we
|
|
979 #need to treat the ResultSet as we are currently treating the FeatureSet below.
|
|
980
|
|
981 #However, the use case of this method is for one InputSet giving rise to one ResultSet
|
|
982 #Hence just throw if we find more than one or have a name mismatch???
|
|
983 my @stored_sets = @{$dset->get_supporting_sets};
|
|
984
|
|
985
|
|
986
|
|
987 #THis assumes we will always have supporting sets
|
|
988 #and is failing as we have removed this test in DataSet::new
|
|
989 #But where are we storing it without the supporting set?
|
|
990
|
|
991 if(scalar(@stored_sets) > 1){
|
|
992 throw('define_and_validate_sets does not yet support DataSets with multiple supporting ResultSets for result_features');
|
|
993 }
|
|
994 elsif(! @stored_sets){
|
|
995 throw("DataSet($name) does not have any stored supporting sets. These should have been defined when storing the DataSet");
|
|
996 #Or should we handle this?
|
|
997 }
|
|
998
|
|
999 $rset = $stored_sets[0];
|
|
1000
|
|
1001 if($rset->set_type ne 'result'){
|
|
1002 throw("DataSet already contains a supporting set which is not a ResultSet:\t".$rset->set_type."\t".$stored_sets[0]->name);
|
|
1003 }
|
|
1004 elsif($ssets){
|
|
1005 #Do we ever pass supporting sets, test for completeness
|
|
1006
|
|
1007 #Just test we have the same supplied ssets if it is defined
|
|
1008 if(scalar(@$ssets) != 1){
|
|
1009 throw("ResultFeature data sets currently only support one supporting ResultSet.\nSupproting sets:\t".
|
|
1010 join(', ', (map { $_->name.'('.$_->set_type } @$ssets)));
|
|
1011 }
|
|
1012 elsif(! ($rset->dbID == $ssets->[0]->dbID) &&
|
|
1013 ($ssets->[0]->set_type eq 'result')){
|
|
1014 throw('Supplied supporting set('.$ssets->[0]->name.') does not match stored supporting set('.$rset->name.')');
|
|
1015 }
|
|
1016 }
|
|
1017
|
|
1018 @input_sets = @{$rset->get_InputSets};
|
|
1019 }
|
|
1020 }
|
|
1021
|
|
1022
|
|
1023
|
|
1024 if($type eq 'result'){
|
|
1025
|
|
1026 #Validate the defined InputSets
|
|
1027 if (scalar(@$ssets) > 1) {
|
|
1028 throw("define_and_validate_sets does not yet support multiple InputSets for defining a ResultSet:\t".$name);
|
|
1029
|
|
1030 }
|
|
1031
|
|
1032 if ($ssets->[0]->set_type ne 'input') {
|
|
1033 throw("To define a ResultSet($name) containing result_features, you must provide and InputSet as a supporting set\nArray based ResultSets(i.e. experimental_chip/channel) are not defined using this method, see specific Import Parsers.");
|
|
1034 }
|
|
1035
|
|
1036
|
|
1037 #Try and grab the rset just in case it has been orphaned somehow
|
|
1038 if (! defined $rset) {
|
|
1039 $rset = $rset_adaptor->fetch_all_by_name($name, $ftype, $ctype, $anal)->[0];
|
|
1040 #Should only ever be one given all parts of unique key
|
|
1041 @input_sets = @{$rset->get_InputSets} if $rset;
|
|
1042
|
|
1043 }
|
|
1044
|
|
1045
|
|
1046 if (defined $rset) { #Validate stored InputSets
|
|
1047
|
|
1048 if (scalar(@input_sets) != scalar(@$ssets)) {
|
|
1049 throw('Found mismatch between number of previously stored InputSets('.scalar(@input_sets).') and defined InputSets('.scalar(@$ssets).'). You must provide a complete list of InputSets to define your ResultSet.');
|
|
1050 }
|
|
1051
|
|
1052 if ($input_sets[0]->dbID != $ssets->[0]->dbID) {
|
|
1053 throw('Found dbID mismatch between previously stored InputSet('.$input_sets[0]->name.') and define InputSet('.$ssets->[0]->name.')');
|
|
1054 }
|
|
1055
|
|
1056 #rollback ResultSet/InputSet here?
|
|
1057 if($rollback_level > 2){
|
|
1058 warn "rollback not yet fully implemented for Result/InputSets";
|
|
1059
|
|
1060 #Does this need to be by slice?
|
|
1061 #What about states if we are running in parallel?
|
|
1062
|
|
1063 if($slices){
|
|
1064 map {$self->rollback_ResultSet($rset, $rollback, $_)} @$slices;
|
|
1065 }
|
|
1066 else{
|
|
1067 $self->rollback_ResultSet($rset, $rollback);
|
|
1068 }
|
|
1069
|
|
1070 }
|
|
1071
|
|
1072 }
|
|
1073 else{#define ResultSet
|
|
1074 ($rset) = @{$rset_adaptor->store(Bio::EnsEMBL::Funcgen::ResultSet->new
|
|
1075 (
|
|
1076 -name => $name,
|
|
1077 -feature_type => $ftype,
|
|
1078 -cell_type => $ctype,
|
|
1079 -table_name => 'input_set',
|
|
1080 -table_id => $ssets->[0]->dbID,
|
|
1081 -analysis => $anal
|
|
1082 )
|
|
1083 )};
|
|
1084
|
|
1085 }
|
|
1086 }
|
|
1087 else{#annotated/regulatory/external i.e. FeatureSet
|
|
1088
|
|
1089 #Try and grab the fset just in case it has been orphaned somehow
|
|
1090 if(! defined $fset){
|
|
1091 $fset = $fset_adaptor->fetch_by_name($name);
|
|
1092
|
|
1093 if(defined $fset){
|
|
1094 #Now we need to test whether it is attached to a dset
|
|
1095 #Will be incorrect dset if it is as we couldn't get it before
|
|
1096 #else we test the types and rollback
|
|
1097 $self->log("Found stored orphan FeatureSet:\t".$fset->name);
|
|
1098
|
|
1099 my $stored_dset = $dset_adaptor->fetch_by_product_FeatureSet($fset);
|
|
1100
|
|
1101 if(defined $stored_dset){
|
|
1102 throw('Found FeatureSet('.$name.') associated with incorrect DataSet('.$stored_dset->name.
|
|
1103 ").\nTry using another -name in the set parameters hash");
|
|
1104
|
|
1105 }
|
|
1106 }
|
|
1107 }
|
|
1108
|
|
1109 #Rollback or create FeatureSet
|
|
1110 if(defined $fset){
|
|
1111
|
|
1112 if($rollback_level){
|
|
1113 #Don't check for IMPORTED here as we want to rollback anyway
|
|
1114 #Not forcing delete here as this may be used as a supporting set itself.
|
|
1115
|
|
1116 $self->rollback_FeatureSet($fset, undef, $slices);
|
|
1117 }
|
|
1118 elsif ($append || $recovery) {
|
|
1119 #This is only true if we have an sset mismatch
|
|
1120
|
|
1121 #Do we need to revoke IMPORTED here too?
|
|
1122 #This behaves differently dependant on the supporting set.
|
|
1123 #InputSet status refers to loading in FeatureSet, where as ResultSet status refers to loading into result table
|
|
1124
|
|
1125 #So we really want to revoke it
|
|
1126 #But this leaves us vulnerable to losing data if the import crashes after this point
|
|
1127 #because we have no way of assesing which is complete data and which is incomplete data
|
|
1128 #within a feature set.
|
|
1129 #This means we need a status on supporting_set, not InputSet or ResultSet
|
|
1130 #as this has to be in the context of a dataset.
|
|
1131 #Grrr, this means we need a SupportingSet class which simply wraps the InputSet/ResultSet
|
|
1132 #We also need a single dbID for the supporting_set table
|
|
1133 #Which means we will have to do some wierdity with the normal dbID implementation
|
|
1134 #i.e. Have supporting_set_id, so we can still access all the normal dbID method for the given Set class
|
|
1135 #This will have to be hardcoded into the state methods
|
|
1136 #Also will need to specify when we want to store as supporting_status or normal set status.
|
|
1137
|
|
1138 #This is an awful lot to protect against vulnerability
|
|
1139 #Also as there easy way to track what features came from which supporting set
|
|
1140 #There isn't currently a viable way to rollback, hence will have to redo the whole set.
|
|
1141
|
|
1142 #Maybe we can enforce this by procedure?
|
|
1143 #By simply not associating the supporting set until it has been loaded into the feature set?
|
|
1144 #This may cause even more tracking problems
|
|
1145
|
|
1146 #Right then, simply warn and do not revoke feature_set IMPORTED to protect old data?
|
|
1147 #Parsers should identify supporting_sets(InputSets) which exist but do not have IMPORTED
|
|
1148 #status and fail, specifying -recover which will rollback_FeatureSet which will revoke the IMPORTED status
|
|
1149
|
|
1150 #This can mean a failed import can leave a partially imported feature set with the IMPORTED status!!!
|
|
1151
|
|
1152 #We just need to handle InputSets and ResultSets differently.
|
|
1153 #In parsers or here?
|
|
1154 #Probably best in the parsers as this is where the states are set.
|
|
1155
|
|
1156
|
|
1157 #Should we throw here for ResultSet?
|
|
1158 #Force rollback of FeatureSet first or create new one?
|
|
1159 #And throw for InputSet?
|
|
1160 #This again comes back to whether we will ever have more than one file
|
|
1161 #for a give InputSet, currently not.
|
|
1162
|
|
1163 $self->log("WARNING\t::\tAdding data to a extant FeatureSet:\t".$fset->name);
|
|
1164 } else {
|
|
1165 throw('Found extant FeatureSet '.$fset->name.'. Maybe you want to specify the rollback, append or recovery parameter or roll back the FeatureSet separately?');
|
|
1166 }
|
|
1167 } else {
|
|
1168 #create a new one
|
|
1169 $self->log("Creating new FeatureSet:\t".$name);
|
|
1170
|
|
1171 $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new(
|
|
1172 -name => $name,
|
|
1173 -feature_type => $ftype,
|
|
1174 -cell_type => $ctype,
|
|
1175 -analysis => $anal,
|
|
1176 -feature_class => $type,
|
|
1177 -description => $description,
|
|
1178 -display_label => $display_label,
|
|
1179 );
|
|
1180 ($fset) = @{$fset_adaptor->store($fset)};
|
|
1181 }
|
|
1182 }
|
|
1183
|
|
1184 #Create/Update the DataSet
|
|
1185 if(defined $dset){
|
|
1186 #Could do these updates above?
|
|
1187 #But delayed to reduce redundancy
|
|
1188
|
|
1189 if($type ne 'result'){
|
|
1190
|
|
1191 if(! defined $dset->product_FeatureSet){
|
|
1192 $self->log("Updating DataSet with new product FeatureSet:\t".$fset->name);
|
|
1193 $dset->product_FeatureSet($fset);
|
|
1194 }
|
|
1195
|
|
1196 $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0];
|
|
1197 #This cannot store the focus sets as we don't know which are which yet
|
|
1198 #Only the script knows this
|
|
1199 # $dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory';
|
|
1200 }
|
|
1201 else{
|
|
1202 #We may have the case where we have a DataSet(with a FeatureSet) but no ResultSet
|
|
1203 #i.e. Load result_features after peak calls
|
|
1204 #So update dset with ResultSet
|
|
1205
|
|
1206 if(! @{$dset->get_supporting_sets}){
|
|
1207 $self->log("Updating DataSet with new ResultSet:\t".$rset->name);
|
|
1208 $dset->add_supporting_sets([$rset]);
|
|
1209 $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0];
|
|
1210 }
|
|
1211 }
|
|
1212 }
|
|
1213 else{
|
|
1214 $self->log("Creating new ${type}_feature DataSet:\t".$name);
|
|
1215
|
|
1216 if($type ne 'result'){
|
|
1217 ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new
|
|
1218 (
|
|
1219 -name => $name,
|
|
1220 -feature_set => $fset,
|
|
1221 -supporting_sets => $ssets,
|
|
1222 ))};
|
|
1223 #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory';
|
|
1224 }
|
|
1225 else{
|
|
1226 warn "creating dataset $name with supporting set $rset";
|
|
1227 ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new
|
|
1228 (
|
|
1229 -name => $name,
|
|
1230 -supporting_sets => [$rset],
|
|
1231 ))};
|
|
1232 }
|
|
1233 }
|
|
1234
|
|
1235 return $dset;
|
|
1236 }
|
|
1237
|
|
1238
|
|
1239 #Rollback/load methods migrated from DBAdaptor
|
|
1240 #Move to SetAdaptors, better located and will remove cyclical dependancy
|
|
1241
|
|
1242 =head2 rollback_FeatureSet
|
|
1243
|
|
1244 Arg [0] : Bio::EnsEMBL::Funcgen::FeatureSet
|
|
1245 Arg [1] : optional - boolean force delete flag, if this FeatureSet is use as a support
|
|
1246 for another DataSet.
|
|
1247 Arg [2] : optional - arrayref of Bio::EnsEMBL::Slice objects to rollback
|
|
1248 Arg [3] : optional - boolean flag to perform full rollback i.e. default will just remove feature
|
|
1249 specifying this with also delete the feature_set record
|
|
1250 Example : $self->rollback_FeatureSet($fset);
|
|
1251 Description: Deletes all status and feature entries for this FeatureSet.
|
|
1252 Checks whether FeatureSet is a supporting set in any other DataSet.
|
|
1253 Returntype : none
|
|
1254 Exceptions : Throws if any deletes fails or if db method unavailable
|
|
1255 Caller : Importers and Parsers
|
|
1256 Status : At risk
|
|
1257
|
|
1258 =cut
|
|
1259
|
|
1260
|
|
1261 sub rollback_FeatureSet{
|
|
1262 my ($self, $fset, $force_delete, $slices, $full_delete) = @_;
|
|
1263
|
|
1264 #Remove force delete and just throw?
|
|
1265 #Currently only used in project_feature_set.
|
|
1266 #May want to keep an old RegBuild for mapping/comparison?
|
|
1267 #Coudl get around this by simply deleting the data_set? Unknown impact.
|
|
1268 #Move to config hash?
|
|
1269 #No need for rollback_level here as we always want to do the same thing
|
|
1270
|
|
1271 my ($sql, $slice_name);
|
|
1272 my $slice_join = '';
|
|
1273 my $table = $fset->feature_class.'_feature';
|
|
1274 my $adaptor = $fset->adaptor || throw('FeatureSet must have an adaptor');
|
|
1275 my $db = $adaptor->db;
|
|
1276 #Cyclical dpendancy here, so not strictly necessary.
|
|
1277 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset);
|
|
1278
|
|
1279
|
|
1280 $self->log_header('Rolling back '.$fset->feature_class." FeatureSet:\t".$fset->name);
|
|
1281
|
|
1282 if($slices){
|
|
1283
|
|
1284 if($full_delete){
|
|
1285 throw("Cannot specify a full_delete for a Slice based rollback:\t".$fset->name);
|
|
1286 }
|
|
1287
|
|
1288
|
|
1289 if(! ref($slices) eq 'ARRAY'){
|
|
1290 throw('Slices must be an ARRAYREF of Slice objects');
|
|
1291 }
|
|
1292
|
|
1293 map { throw("Must pass a valid Bio::EnsEMBL::Slice") if (! (ref($_) && $_->isa('Bio::EnsEMBL::Slice'))) } @$slices;
|
|
1294 $self->log("Restricting to slices:\n\t\t".join("\n\t\t", (map { $_->name } @$slices)) );
|
|
1295 #Allow subslice rollback only for one slice at a time
|
|
1296 my $subslice = (scalar(@$slices) == 1) ? 1 : 0;
|
|
1297 my @sr_ids;
|
|
1298
|
|
1299 foreach my $slice(@$slices){
|
|
1300 my $efg_sr_id = $fset->get_FeatureAdaptor->get_seq_region_id_by_Slice($slice);
|
|
1301
|
|
1302 if(! $efg_sr_id){
|
|
1303 $self->log("Slice is not present in eFG DB:\t".$slice->name);
|
|
1304 }else{
|
|
1305
|
|
1306 if(! $subslice){#Test is not subslice
|
|
1307 my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name);
|
|
1308
|
|
1309 if(($slice->start != 1) ||
|
|
1310 ($full_slice->end != $slice->end)){
|
|
1311 throw("Can only rollback subslices one at a time:\nRollback slice:\t"
|
|
1312 .$slice->name."\nFull slice:\t".$full_slice->name);
|
|
1313 }
|
|
1314 }
|
|
1315
|
|
1316 push @sr_ids, $efg_sr_id;
|
|
1317 }
|
|
1318 }
|
|
1319
|
|
1320 if(scalar(@sr_ids) == 1){
|
|
1321 #Allow sub slice rollback
|
|
1322 #add range here from meta coord?
|
|
1323 $slice_join = " and f.seq_region_id=$sr_ids[0] and f.seq_region_start<=".$slices->[0]->end.' and f.seq_region_end>='.$slices->[0]->start;
|
|
1324 }
|
|
1325 else{
|
|
1326 $slice_join = ' and f.seq_region_id in ('.join(', ', @sr_ids).')';
|
|
1327 }
|
|
1328 }
|
|
1329
|
|
1330
|
|
1331
|
|
1332 #Check whether this is a supporting set for another data_set
|
|
1333 my @dsets = @{$db->get_DataSetAdaptor->fetch_all_by_supporting_set($fset)};
|
|
1334
|
|
1335 if(@dsets){
|
|
1336 my $txt = $fset->name." is a supporting set of the following DataSets:\t".join(', ', (map {$_->name} @dsets));
|
|
1337
|
|
1338 if($force_delete){
|
|
1339 $self->log("WARNING:\t$txt\n");
|
|
1340 }
|
|
1341 else{
|
|
1342 throw($txt."\nPlease resolve or specify the force_delete argument")
|
|
1343 }
|
|
1344 }
|
|
1345
|
|
1346 #Remove states
|
|
1347 if(! $slices){
|
|
1348 $fset->adaptor->revoke_states($fset);
|
|
1349
|
|
1350 #Revoke InputSet states here as this refers to whether
|
|
1351 #they are imported in the FeatureSet
|
|
1352 #Do this in FeatureSet->revoke_states?
|
|
1353
|
|
1354 my $dset = $db->get_DataSetAdaptor->fetch_by_product_FeatureSet($fset);
|
|
1355
|
|
1356 #Account for absent dset if we have an external_feature set
|
|
1357
|
|
1358 if((! defined $dset) &&
|
|
1359 $fset->feature_class ne 'external'){
|
|
1360 warn "WARNING:\tFeatureSet ".$fset->name." does not have an associated DataSet. Rollback may be incomplete";
|
|
1361 }
|
|
1362
|
|
1363 if($dset){
|
|
1364
|
|
1365 foreach my $sset(@{$dset->get_supporting_sets}){
|
|
1366 #Maybe skip this if we defined slice?
|
|
1367
|
|
1368 #??? Do we want to do this?
|
|
1369 #This is dependant on the feature_class of the InputSet
|
|
1370 #result InputSets may have been imported as ResultFeatureCollections
|
|
1371 #So we want to leave those in place
|
|
1372 #annotated feature_class InputSets are directly imports, so the status of these refers
|
|
1373 #to the FeatureSet import status
|
|
1374 #Where is the imported status set for SWEmbl?
|
|
1375
|
|
1376 if(($sset->feature_class eq 'annotated') &&
|
|
1377 $sset->isa('Bio::EnsEMBL::Funcgen::InputSet')){
|
|
1378
|
|
1379 $self->rollback_InputSet($sset) if $sset->isa('Bio::EnsEMBL::Funcgen::InputSet');
|
|
1380 $self->rollback_InputSet($sset);#add full delete here?
|
|
1381 #Do not want to rollback here for other type of sset
|
|
1382 }
|
|
1383 }
|
|
1384 }
|
|
1385 }
|
|
1386 else{
|
|
1387 $self->log('Skipping '.$fset->name.' revoke_states for partial Slice rollback, maybe revoke IMPORTED? ');
|
|
1388 }
|
|
1389
|
|
1390 #should add some log statements here?
|
|
1391
|
|
1392 my $row_cnt;
|
|
1393
|
|
1394 #Rollback reg attributes
|
|
1395 if($fset->feature_class eq 'regulatory'){
|
|
1396 $sql = "DELETE ra from regulatory_attribute ra, $table f where f.${table}_id=ra.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join;
|
|
1397 $self->rollback_table($sql, 'regulatory_attribute', undef, $db);
|
|
1398
|
|
1399
|
|
1400
|
|
1401 if($full_delete){
|
|
1402 #Now delete meta entries
|
|
1403 #This is messy as we use the following meta_key nomencalture
|
|
1404 #which do not match the fset names
|
|
1405 #regbuild.feature_set_ids_v5
|
|
1406 #regbuild.feature_type_ids_v5
|
|
1407 #regbuild.focus_feature_set_ids
|
|
1408 #regbuild.initial_release_date_v6
|
|
1409 #regbuild.last_annotation_update_v6
|
|
1410 #regbuild.version NEED TO ADD THIS
|
|
1411 #Also need to revise how these are generated by build_reg_feats.
|
|
1412 #WHat about new cell_type level feature sets?
|
|
1413 #How will we model these in the meta table?
|
|
1414
|
|
1415 warn "Need to revise meta table entries before we add a delete here, remove manually for now for:\t".$fset->name;
|
|
1416
|
|
1417 #We would only remove meta entries if we are performing a full rollback
|
|
1418 my $version;
|
|
1419 ($version = $fset->name) =~ s/.*_v([0-9]+)$/$1/;
|
|
1420 $version = ($version eq $fset->name) ? '' : "_v${version}";
|
|
1421
|
|
1422 #These are versionless meta_keys and apply to all sets
|
|
1423 #handle these in reg build script
|
|
1424 #'regbuild.initial_release_date',
|
|
1425 #'regbuild.last_annotation_update'
|
|
1426 #'regbuild.version'
|
|
1427
|
|
1428 foreach my $mkey('regbuild.%s.feature_set_ids',
|
|
1429 'regbuild.%s.feature_type_ids',
|
|
1430 'regbuild.%s.focus_feature_set_ids'){
|
|
1431
|
|
1432 my $meta_key = sprintf($mkey, $fset->cell_type->name).$version;
|
|
1433 $sql = "DELETE from meta where meta_key='${meta_key}'";
|
|
1434 $self->rollback_table($sql, 'meta', undef, $db);
|
|
1435 }
|
|
1436 }
|
|
1437 }
|
|
1438
|
|
1439
|
|
1440 #Need to remove object xrefs here
|
|
1441 #Do not remove xrefs as these may be used by something else!
|
|
1442 $sql = "DELETE ox from object_xref ox, $table f where ox.ensembl_object_type='".ucfirst($fset->feature_class)."Feature' and ox.ensembl_id=f.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join;
|
|
1443 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db);
|
|
1444
|
|
1445
|
|
1446 #Remove associated_feature_type records
|
|
1447 #Do not remove actual feature_type records as they may be used by something else.
|
|
1448
|
|
1449 $sql ="DELETE aft from associated_feature_type aft, $table f where f.feature_set_id=".$fset->dbID." and f.${table}_id=aft.table_id and aft.table_name='".$fset->feature_class."_feature'".$slice_join;
|
|
1450 $self->rollback_table($sql, 'associated_feature_type', undef, $db);
|
|
1451
|
|
1452
|
|
1453
|
|
1454 #Remove features
|
|
1455 $sql = "DELETE f from $table f where f.feature_set_id=".$fset->dbID.$slice_join;
|
|
1456 $self->rollback_table($sql, $table, "${table}_id", $db);
|
|
1457
|
|
1458 if($full_delete){ #Also delete feature/data_set records
|
|
1459
|
|
1460 $sql = "DELETE from feature_set where feature_set_id=".$fset->dbID;
|
|
1461 $self->rollback_table($sql, 'feature_set', 'feature_set_id', $db);
|
|
1462 $self->log("Deleted feature_set entry for:\t".$fset->name);
|
|
1463
|
|
1464
|
|
1465 $sql = "DELETE from data_set where feature_set_id=".$fset->dbID;
|
|
1466 $self->rollback_table($sql, 'data_set', 'data_set_id', $db);
|
|
1467 $self->log("Deleted associated data_set entry for:\t".$fset->name);
|
|
1468 }
|
|
1469
|
|
1470 return;
|
|
1471 }
|
|
1472
|
|
1473
|
|
1474 =head2 rollback_ResultSet
|
|
1475
|
|
1476 Arg[1] : Bio::EnsEMBL::Funcgen::ResultSet
|
|
1477 Arg[2] : Boolean - optional flag to roll back array results
|
|
1478 Example : $self->rollback_ResultSet($rset);
|
|
1479 Description: Deletes all status. chip_channel and result_set entries for this ResultSet.
|
|
1480 Will also rollback_results sets if rollback_results specified. This will also
|
|
1481 update or delete associated ResultSets where appropriate.
|
|
1482 Returntype : Arrayref containing the ResultSet and associated DataSet which have not been rolled back
|
|
1483 Exceptions : Throws if ResultSet not valid
|
|
1484 Throws is result_rollback flag specified but associated product FeatureSet found.
|
|
1485 Caller : General
|
|
1486 Status : At risk
|
|
1487
|
|
1488 =cut
|
|
1489
|
|
1490 #Need to change slice to slices ref here
|
|
1491 #Need to add full rollback, which will specify to remove all sets
|
|
1492 #as well as results and
|
|
1493 #These params need clarifying as their nature changes between input_set and array rsets
|
|
1494 #Don't we always want to rollback_results?
|
|
1495 #force should only really be used to rollback InputSet ResultFeature sets
|
|
1496 #i.e. Read collections which are not used as direct input for the linked product FeatureSet
|
|
1497 #This should fail with array data associated with a product feature set
|
|
1498
|
|
1499 #Do we want to separate ResultFeature rollback from result rollback?
|
|
1500 #Currently the array based collection rollback is done by hand
|
|
1501 #Could be done via the ResultFeature Collector, but should probably use this method.
|
|
1502
|
|
1503
|
|
1504 #rollback_results is only used in the MAGE parser to identify sets which have an
|
|
1505 #associated product fset.
|
|
1506 #Can't really separate due to integrated functionality
|
|
1507
|
|
1508 sub rollback_ResultSet{
|
|
1509 my ($self, $rset, $rollback_results, $slice, $force, $full_delete) = @_;
|
|
1510
|
|
1511 if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){
|
|
1512 throw('Must provide a valid stored Bio::EnsEMBL::ResultSet');
|
|
1513 }
|
|
1514
|
|
1515 if($slice && $rset->table_name ne 'input_set'){
|
|
1516 throw('Can only rollback_ResultSet by Slice if the ResultSet contains InputSets');
|
|
1517 }
|
|
1518
|
|
1519 #We're still validating against itself??
|
|
1520 #And reciprocating part of the test :|
|
|
1521 my $sql;
|
|
1522 my $db = $rset->adaptor->db;#This needs to be tested
|
|
1523 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset);
|
|
1524 $self->log("Rolling back ResultSet:\t".$rset->name);
|
|
1525 my $dset_adaptor = $self->db->get_DataSetAdaptor;
|
|
1526 my $rset_adaptor = $self->db->get_ResultSetAdaptor;
|
|
1527 my @skipped_sets;
|
|
1528
|
|
1529 ### Check if this ResultSet is part of a DataSet with a product feature set
|
|
1530
|
|
1531 foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($rset)}){
|
|
1532
|
|
1533 if (defined $dset){
|
|
1534 $self->log('Found linked DataSet('.$dset->name.") for ResultSet:\t".$rset->log_label);
|
|
1535
|
|
1536 if(my $fset = $dset->product_FeatureSet){
|
|
1537 @skipped_sets = ($rset,$dset);
|
|
1538
|
|
1539 #What impact does this have on result_rollback?
|
|
1540 #None as we never get there
|
|
1541 #But what if we have specified rollback results?
|
|
1542 #We should throw here as we can't perform the rollback
|
|
1543
|
|
1544 if($rollback_results){
|
|
1545
|
|
1546 if($rset->table_name ne 'input_set' ||
|
|
1547 (! $force)){#is an input_set/reads collection
|
|
1548 #This will always throws for non-input_set ResultSets
|
|
1549
|
|
1550 throw("Could not rollback supporting ResultSet and results for:\t".$rset->log_label.
|
|
1551 "\nEither manually resolve the supporting/feature set relationship or set the 'force' flag.\n");
|
|
1552 # ."Alternatively omit the rollback_results argument if you simply want to redefine the ResultSet without loading any new data");
|
|
1553 #This last bit is no longer true
|
|
1554 #Remove rollback_results?
|
|
1555 }
|
|
1556 else{
|
|
1557 @skipped_sets = ();
|
|
1558 $self->log("Forcing results rollback for InputSet based ResultSet:\t".$rset->log_label);
|
|
1559 }
|
|
1560 }
|
|
1561
|
|
1562 if(@skipped_sets){
|
|
1563 $self->log('Skipping rollback. Found product FeatureSet('.$fset->name.") for supporting ResultSet:\t".$rset->log_label);
|
|
1564 }
|
|
1565
|
|
1566 }
|
|
1567 elsif((! defined $slice) &&
|
|
1568 $full_delete){
|
|
1569 #Found rset in dset, but not yet processed so can remove safely.
|
|
1570 $self->unlink_ResultSet_DataSet($rset, $dset);
|
|
1571 }
|
|
1572 }
|
|
1573 }
|
|
1574
|
|
1575
|
|
1576 #Now do similar for all associated ResultSets
|
|
1577 if(! @skipped_sets){
|
|
1578
|
|
1579
|
|
1580 #Rollback results if required
|
|
1581 if($rollback_results){
|
|
1582
|
|
1583 $self->log("Rolling back results for ResultSet:\t".$rset->log_label);
|
|
1584 #Check result_set_input_ids are present in other result sets.
|
|
1585 my @assoc_rsets = @{$rset_adaptor->fetch_all_linked_by_ResultSet($rset)};
|
|
1586 my $feature_supporting = 0;
|
|
1587
|
|
1588 foreach my $assoc_rset(@assoc_rsets){
|
|
1589
|
|
1590 foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($assoc_rset)}){
|
|
1591
|
|
1592 #Check for other product_FeatureSets
|
|
1593 if(my $fset = $dset->product_FeatureSet){
|
|
1594 $feature_supporting++;
|
|
1595 $self->log('Found product FeatureSet('.$fset->name.
|
|
1596 ") for associated supporting ResultSet:\t".$rset->log_label);
|
|
1597
|
|
1598 if($rset->table_name ne 'input_set' ||
|
|
1599 (! $force)){#is an input_set/reads collection
|
|
1600 $feature_supporting++;
|
|
1601 }
|
|
1602 }
|
|
1603 }
|
|
1604 }
|
|
1605
|
|
1606
|
|
1607 if(! $feature_supporting){
|
|
1608
|
|
1609 #RollBack result_feature table first
|
|
1610 $self->rollback_ResultFeatures($rset, $slice);
|
|
1611
|
|
1612 #Now rollback other states
|
|
1613 $rset->adaptor->revoke_states($rset);
|
|
1614
|
|
1615
|
|
1616 #This also handles Echip status rollback
|
|
1617 if ($rset->table_name ne 'input_set'){
|
|
1618 $self->log("Rolling back result table for ResultSet:\t".$rset->log_label);
|
|
1619 $self->rollback_results($rset->result_set_input_ids);
|
|
1620 }
|
|
1621
|
|
1622 $self->log('Removing result_set_input entries from associated ResultSets') if @assoc_rsets;
|
|
1623
|
|
1624 if((! $slice) &&
|
|
1625 $full_delete){
|
|
1626
|
|
1627 #Now remove result_set_input_ids from associated rsets.
|
|
1628 foreach my $assoc_rset(@assoc_rsets){
|
|
1629 $sql = 'DELETE from result_set_input where result_set_id='.$assoc_rset->dbID.
|
|
1630 ' and result_set_input_id in('.join', ', @{$assoc_rset->result_set_input_ids}.')';
|
|
1631 $db->dbc->do($sql);
|
|
1632
|
|
1633 # we need to delete complete subsets from the result_set table.
|
|
1634 my $subset = 1;
|
|
1635
|
|
1636 foreach my $cc_id(@{$assoc_rset->result_set_input_ids}){
|
|
1637
|
|
1638 if(! grep { /$cc_id/ } @{$rset->result_set_input_ids}){
|
|
1639 $subset = 0;
|
|
1640 last;
|
|
1641 }
|
|
1642 }
|
|
1643
|
|
1644 #$assoc_rset is complete subset of $rset so can delete
|
|
1645 #We know this does not have an assoicated product feature set
|
|
1646 #Only if it is not derived from an input_set
|
|
1647 if($subset){
|
|
1648 $self->log("Deleting associated subset ResultSet:\t".$assoc_rset->log_label);
|
|
1649
|
|
1650 #Delete status entries first
|
|
1651 $assoc_rset->adaptor->revoke_states($assoc_rset);
|
|
1652
|
|
1653 #All cc records will have already been deleted
|
|
1654 $sql = 'DELETE from result_set where result_set_id='.$assoc_rset->dbID;
|
|
1655 $db->dbc->do($sql);
|
|
1656 }
|
|
1657 }
|
|
1658 }
|
|
1659
|
|
1660
|
|
1661 #Now warn about Echips in Experiments which may need removing.
|
|
1662 if($rset->table_name ne 'input_set'){
|
|
1663 my %experiment_chips;
|
|
1664
|
|
1665 foreach my $echip(@{$rset->get_ExperimentalChips}){
|
|
1666 $experiment_chips{$echip->experiment->name}{$echip->unique_id} = undef;
|
|
1667 }
|
|
1668
|
|
1669 foreach my $exp(keys %experiment_chips){
|
|
1670 $self->log("Experiment $exp has had ".scalar(values %{$experiment_chips{$exp}}).
|
|
1671 " ExperimentalChips rolled back:\t".join('; ', values %{$experiment_chips{$exp}}).
|
|
1672 ".\nTo fully remove these, use the rollback_experiment.pl (with -chip_ids) script");
|
|
1673 }
|
|
1674 }
|
|
1675 else{
|
|
1676 #Should only be one to rollback
|
|
1677 foreach my $iset(@{$rset->get_InputSets}){
|
|
1678 $self->rollback_InputSet($iset);
|
|
1679 }
|
|
1680 }
|
|
1681 }
|
|
1682 else{
|
|
1683 #$self->log("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label);
|
|
1684 #warn("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label);
|
|
1685 #do we need to return this info in skipped_rsets?
|
|
1686 #This is just to allow importer to know which ones
|
|
1687 #weren't rolled back to avoid naming clashes.
|
|
1688 #so no.
|
|
1689
|
|
1690 #But the results persist on the same chip_channel_ids
|
|
1691 #So not returning this rset may result in loading of more data
|
|
1692 #This should fail as status entries will not have been removed
|
|
1693 #Still we should throw here as we'll most likely want to manually resolve this
|
|
1694 #Besides this would be obfuscating the function
|
|
1695
|
|
1696 throw("Could not rollback ResultSet and results, found $feature_supporting associated supporting ".
|
|
1697 "ResultSets for:\t".$rset->log_label."\nManually resolve the supporting/feature set relationship or omit the ".
|
|
1698 "rollback_results argument if you simply want to redefine the ResultSet without loading any new data");
|
|
1699 }
|
|
1700 }
|
|
1701 else{
|
|
1702 $self->log('Skipping results rollback');
|
|
1703
|
|
1704 if($rset->name =~ /_IMPORT$/){
|
|
1705 throw("Rolling back an IMPORT set without rolling back the result can result in ophaning result records for a whole experiment. Specify the result_rollback flag if you want to rollback the results for:\t".$rset->log_label);
|
|
1706 }
|
|
1707 }
|
|
1708
|
|
1709 #Delete chip_channel and result_set records
|
|
1710 #This should only be done with full delete
|
|
1711 if((! $slice) &&
|
|
1712 $full_delete){
|
|
1713 $sql = 'DELETE from result_set_input where result_set_id='.$rset->dbID;
|
|
1714 $self->rollback_table($sql, 'result_set_input', 'result_set_input_id', $db);
|
|
1715
|
|
1716 $sql = 'DELETE from result_set where result_set_id='.$rset->dbID;
|
|
1717 $db->dbc->do($sql);
|
|
1718 $self->rollback_table($sql, 'result_set', 'result_set_id', $db);
|
|
1719 }
|
|
1720 }
|
|
1721
|
|
1722 return \@skipped_sets;
|
|
1723 }
|
|
1724
|
|
1725
|
|
1726
|
|
1727 sub unlink_ResultSet_DataSet{
|
|
1728 my ($self, $rset, $dset, $new_name) = @_;
|
|
1729
|
|
1730 #validate set vars
|
|
1731
|
|
1732 my $db = $rset->adaptor->db;
|
|
1733
|
|
1734 $self->log("Removing supporting ResultSet from DataSet:\t".$dset->name."\tResultSet:".$rset->log_label);
|
|
1735 my $sql = 'DELETE from supporting_set where data_set_id='.$dset->dbID.
|
|
1736 ' and type="result" and supporting_set_id='.$rset->dbID;
|
|
1737
|
|
1738 warn "Removing ".$rset->log_label." as a supporting set to DataSet:\t".$dset->name.
|
|
1739 "\nThis may result in a DataSet with no supporting sets";
|
|
1740 $db->dbc->do($sql);
|
|
1741
|
|
1742 if($new_name){
|
|
1743 #We risk overwriting any previously renamed result sets.
|
|
1744 #Should use datestamp?
|
|
1745 $sql = 'UPDATE result_set set name="OLD_'.$rset->name.'" where result_set_id='.$rset->dbID;
|
|
1746 $self->db->dbc->do($sql);
|
|
1747
|
|
1748 if($dset->product_FeatureSet){
|
|
1749 $self->log('Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data');
|
|
1750 warn 'Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data';
|
|
1751 }
|
|
1752 }
|
|
1753
|
|
1754 return;
|
|
1755 }
|
|
1756
|
|
1757 =head2 rollback_InputSet
|
|
1758
|
|
1759 Arg[1] : Bio::EnsEMBL::Funcgen::InputSet
|
|
1760 Example : $self->rollback_InputSet($eset);
|
|
1761 Description: Deletes all status entries for this InputSet and it's Subsets
|
|
1762 Returntype : none
|
|
1763 Exceptions : Throws if any deletes fails or if db method unavailable
|
|
1764 Caller : Importers and Parsers
|
|
1765 Status : At risk
|
|
1766
|
|
1767 =cut
|
|
1768
|
|
1769
|
|
1770 sub rollback_InputSet{
|
|
1771 my ($self, $eset, $force_delete, $full_delete) = @_;
|
|
1772
|
|
1773
|
|
1774 #Need to implement force_delete!!!!!!!!!!!!!!!!!!!!!!
|
|
1775 #Need to check this is not used in a DataSet/ResultSet
|
|
1776
|
|
1777 my $adaptor = $eset->adaptor || throw('InputSet must have an adaptor');
|
|
1778 my $db = $adaptor->db;
|
|
1779
|
|
1780
|
|
1781 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::InputSet', $eset);
|
|
1782
|
|
1783 $self->log("Rolling back InputSet:\t".$eset->name);
|
|
1784
|
|
1785 #SubSets
|
|
1786 foreach my $esset(@{$eset->get_InputSubsets}){
|
|
1787 $esset->adaptor->revoke_states($esset);
|
|
1788 }
|
|
1789
|
|
1790 #InputSet
|
|
1791 $eset->adaptor->revoke_states($eset);
|
|
1792
|
|
1793 return;
|
|
1794 }
|
|
1795
|
|
1796
|
|
1797 =head2 rollback_results
|
|
1798
|
|
1799 Arg[1] : Arrayref of chip_channel ids
|
|
1800 Example : $self->rollback_results($rset->chip_channels_ids);
|
|
1801 Description: Deletes all result records for the given chip_channel ids.
|
|
1802 Also deletes all status records for associated experimental_chips or channels
|
|
1803 Returntype : None
|
|
1804 Exceptions : Throws if no chip_channel ids provided
|
|
1805 Caller : General
|
|
1806 Status : At risk
|
|
1807
|
|
1808 =cut
|
|
1809
|
|
1810 #changed implementation to take arrayref
|
|
1811
|
|
1812 sub rollback_results{
|
|
1813 my ($self, $cc_ids) = @_;
|
|
1814
|
|
1815 my @cc_ids = @{$cc_ids};
|
|
1816
|
|
1817 #Need to test for $self->db here?
|
|
1818
|
|
1819
|
|
1820 if(! scalar(@cc_ids) >0){
|
|
1821 throw('Must pass an array ref of result_set_input_ids to rollback');
|
|
1822 }
|
|
1823
|
|
1824 #Rollback status entries
|
|
1825 #Cannot use revoke_states here?
|
|
1826 #We can if we retrieve the Chip or Channel first
|
|
1827 #Add to ResultSet adaptor
|
|
1828 my $sql = 'DELETE s from status s, result_set_input rsi WHERE rsi.result_set_input_id IN ('.join(',', @cc_ids).
|
|
1829 ') AND rsi.table_id=s.table_id AND rsi.table_name=s.table_name';
|
|
1830
|
|
1831 if(! $self->db->dbc->do($sql)){
|
|
1832 throw("Status rollback failed for result_set_input_ids:\t@cc_ids\n".$self->db->dbc->db_handle->errstr());
|
|
1833 }
|
|
1834
|
|
1835
|
|
1836 #Rollback result entries
|
|
1837 $sql = 'DELETE from result where result_set_input_id in ('.join(',', @cc_ids).');';
|
|
1838 $self->rollback_table($sql, 'result', 'result_id', $self->db);
|
|
1839 return;
|
|
1840 }
|
|
1841
|
|
1842
|
|
1843 =head2 rollback_ResultFeatures
|
|
1844
|
|
1845 Arg[0] : Bio::EnsEMBL::Funcgen::ResultSet
|
|
1846 Arg[1] : Optional - Bio::EnsEMBL::Slice
|
|
1847 Arg[2] : Optional - no_revoke Boolean. This is only used when generating new windows
|
|
1848 from a 0 window size which has been projected from a previous assembly.
|
|
1849 Example : $self->rollback_result_features($rset);
|
|
1850 Description: Deletes all result_feature records for the given ResultSet.
|
|
1851 Also deletes 'RESULT_FEATURE_SET' status.
|
|
1852 Returntype : None
|
|
1853 Exceptions : Throws if ResultSet not provided
|
|
1854 Caller : General
|
|
1855 Status : At risk
|
|
1856
|
|
1857 =cut
|
|
1858
|
|
1859
|
|
1860 sub rollback_ResultFeatures{
|
|
1861 my ($self, $rset, $slice, $no_revoke) = @_;
|
|
1862
|
|
1863 if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){
|
|
1864 throw('Must provide a valid stored Bio::EnsEMBL::ResultSet');
|
|
1865 }
|
|
1866
|
|
1867 if(! $slice && $no_revoke){
|
|
1868 throw("Cannot rollback_ResultFeatures with no_reovke unless you specify a Slice");
|
|
1869 }
|
|
1870 #else warn if slice and no_revoke?
|
|
1871
|
|
1872 my ($sql, $slice_name, $slice_constraint);
|
|
1873
|
|
1874 if($slice){
|
|
1875
|
|
1876 if(ref($slice) && $slice->isa('Bio::EnsEMBL::Slice')){
|
|
1877 my $sr_id = $rset->adaptor->db->get_ResultFeatureAdaptor->get_seq_region_id_by_Slice($slice);
|
|
1878
|
|
1879 if($sr_id){
|
|
1880
|
|
1881 #Need to test for full slice here
|
|
1882 my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name);
|
|
1883 $slice_name = "\t".$slice->name;
|
|
1884 $slice_constraint = ' and seq_region_id='.$sr_id;
|
|
1885
|
|
1886 if(($slice->start != 1) ||
|
|
1887 ($slice->end != $full_slice->end)){
|
|
1888
|
|
1889 throw("rollback_ResultFeatures does not yet support non-full length Slices:\t".$slice_name);
|
|
1890
|
|
1891 #Need to test whether we have non-0 wsize collections without the exact seq_region values
|
|
1892 #$sql='SELECT window_size from result_feature where result_feature_id='.$rset->dbID.
|
|
1893 # ' and window_size!=0 and seq_region_start!='.$slice->start.' and seq_region_end!='.$slice->end.$slice_constraint;
|
|
1894 }
|
|
1895 }
|
|
1896 else{#seq_region is not yet present in DB
|
|
1897 return;
|
|
1898 }
|
|
1899 }
|
|
1900 else{
|
|
1901 throw('slice argument must be a valid Bio::EnsEMBL::Slice');
|
|
1902 }
|
|
1903 }
|
|
1904
|
|
1905 #We're still validating against itself??
|
|
1906 #And reciprocating part of the test :|
|
|
1907 my $db = $rset->adaptor->db;
|
|
1908 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset);
|
|
1909
|
|
1910 #Do this conditionally on whether it is a result_feature_set?
|
|
1911 #This may break if we have removed the status but not finished the rollback so no!
|
|
1912 $self->log("Rolling back result_feature table for ResultSet:\t".$rset->name.$slice_name);
|
|
1913
|
|
1914 #Rollback status entry
|
|
1915 if($rset->has_status('RESULT_FEATURE_SET') && ! $no_revoke){
|
|
1916 $rset->adaptor->revoke_status('RESULT_FEATURE_SET', $rset);
|
|
1917 }
|
|
1918
|
|
1919 #Cannot use revoke_states here?
|
|
1920 #We can if we retrieve the Chip or Channel first
|
|
1921 #Add to ResultSet adaptor
|
|
1922 $sql = 'DELETE from result_feature where result_set_id='.$rset->dbID.$slice_constraint;
|
|
1923 $self->rollback_table($sql, 'result_feature', 'result_feature_id', $db);
|
|
1924
|
|
1925 return;
|
|
1926 }
|
|
1927
|
|
1928
|
|
1929
|
|
1930 =head2 rollback_ArrayChips
|
|
1931
|
|
1932 Arg[1] : ARRAYREF: Bio::EnsEMBL::Funcgen::ArrayChip objects
|
|
1933 Example : $self->rollback_ArrayChips([$achip1, $achip2]);
|
|
1934 Description: Deletes all Probes, ProbeSets, ProbeFeatures and
|
|
1935 states associated with this ArrayChip
|
|
1936 Returntype : None
|
|
1937 Exceptions : Throws if ArrayChip not valid and stored
|
|
1938 Throws if ArrayChips are not of same class
|
|
1939 Caller : General
|
|
1940 Status : At risk
|
|
1941
|
|
1942 =cut
|
|
1943
|
|
1944 #This should be tied to a CS id!!!
|
|
1945 #And analysis dependant?
|
|
1946 #We may not want to delete alignment by different analyses?
|
|
1947 #In practise the slice methods ignore analysis_id for this table
|
|
1948 #So we currently never use this!
|
|
1949 #So IMPORTED status should be tied to CS id and Analysis id?
|
|
1950
|
|
1951 sub rollback_ArrayChips{
|
|
1952 my ($self, $acs, $mode, $force, $keep_xrefs, $no_clean_up, $force_clean_up) = @_;
|
|
1953
|
|
1954 #no_clean_up and force_clean_up allow analyze/optimize to be skipped until the last rollback
|
|
1955 #We could get around this by specifying all ArrayChips for all formats at the same time?
|
|
1956 #Need to implement in RollbackArrays
|
|
1957
|
|
1958 $mode ||= 'probe';
|
|
1959
|
|
1960 if($mode && ($mode ne 'probe' &&
|
|
1961 $mode ne 'probe_feature' &&
|
|
1962 $mode ne 'ProbeAlign' &&
|
|
1963 $mode ne 'ProbeTranscriptAlign' &&
|
|
1964 $mode ne 'probe2transcript')){
|
|
1965 throw("You have passed an invalid mode argument($mode), you must omit or specify either 'probe2transcript', 'probe', 'ProbeAlign, 'ProbeTranscriptAlign' or 'probe_feature' for all of the Align output");
|
|
1966 }
|
|
1967
|
|
1968 if($force && ($force ne 'force')){
|
|
1969 throw("You have not specified a valid force argument($force), you must specify 'force' or omit");
|
|
1970 }
|
|
1971
|
|
1972 if($keep_xrefs && ($keep_xrefs ne 'keep_xrefs')){
|
|
1973 throw("You have not specified a valid keep_xrefs argument($keep_xrefs), you must specify 'keep_xrefs' or omit");
|
|
1974 }
|
|
1975
|
|
1976
|
|
1977 if($keep_xrefs){
|
|
1978
|
|
1979 if($mode eq 'probe' || $mode eq 'probe2transcript'){
|
|
1980 throw("You cannot specify 'keep_xrefs' with mode $mode, you can only rollback features e.g. probe_feature, ProbeAlign or ProbeTranscriptAlign");
|
|
1981 }
|
|
1982
|
|
1983 if($force){
|
|
1984 throw("You cannot 'force' delete the probe2transcript xrefs and 'keep_xrefs' at the same time. Please specify just one.");
|
|
1985 }
|
|
1986 }
|
|
1987
|
|
1988
|
|
1989
|
|
1990
|
|
1991 my ($adaptor, $db, %classes);
|
|
1992
|
|
1993 foreach my $ac(@$acs){
|
|
1994 $adaptor ||= $ac->adaptor || throw('ArrayChip must have an adaptor');
|
|
1995 $db ||= $adaptor->db;
|
|
1996 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ArrayChip', $ac);
|
|
1997
|
|
1998 if(! $ac->get_Array->class){
|
|
1999 throw('The ArrayChip you are trying to rollback does not have a class attribute');
|
|
2000 }
|
|
2001
|
|
2002
|
|
2003 $classes{$ac->get_Array->class} = undef;
|
|
2004
|
|
2005 #if($class && ($class ne $ac->get_Array->class)){
|
|
2006 # throw('You can only rollback_ArrayChips for ArrayChips with the same class');
|
|
2007 #}
|
|
2008 }
|
|
2009
|
|
2010
|
|
2011 #This is always the case as we register the association before we set the Import status
|
|
2012 #Hence the 2nd stage of the import fails as we have an associated ExperimentalChip
|
|
2013 #We need to make sure the ExperimentalChip and Channel have not been imported!!!
|
|
2014 warn "NOTE: rollback_ArrayChips. Need to implement ExperimentlChip check, is the problem that ExperimentalChips are registered before ArrayChips imported?";
|
|
2015 #Check for dependent ExperimentalChips
|
|
2016 #if(my @echips = @{$db->get_ExperimentalChipAdaptor->fetch_all_by_ArrayChip($ac)}){
|
|
2017 # my %exps;
|
|
2018 # my $txt = "Experiment\t\t\t\tExperimentalChip Unique IDs\n";
|
|
2019
|
|
2020 # foreach my $ec(@echips){
|
|
2021 # $exps{$ec->get_Experiment->name} ||= '';
|
|
2022
|
|
2023 # $exps{$ec->get_Experiment->name} .= "\t".$ec->unique_id;
|
|
2024 # }
|
|
2025
|
|
2026 # map {$txt.= "\t".$_.":".$exps{$_}."\n"} keys %exps;
|
|
2027
|
|
2028 # throw("Cannot rollback ArrayChip:\t".$ac->name.
|
|
2029 # "\nFound Dependent Experimental Data:\n".$txt);
|
|
2030 # }
|
|
2031
|
|
2032
|
|
2033 my $ac_names = join(', ', (map { $_->name } @$acs));
|
|
2034 my $ac_ids = join(', ', (map { $_->dbID } @$acs));
|
|
2035
|
|
2036
|
|
2037 $self->log("Rolling back ArrayChips $mode entries:\t$ac_names");
|
|
2038 my ($row_cnt, $probe_join, $sql);
|
|
2039 #$ac->adaptor->revoke_states($ac);#This need to be more specific to the type of rollback
|
|
2040 my $species = $db->species;
|
|
2041
|
|
2042 if(!$species){
|
|
2043 throw('Cannot rollback probe2transcript level xrefs without specifying a species for the DBAdaptor');
|
|
2044 }
|
|
2045 #Will from registry? this return Homo sapiens?
|
|
2046 #Or homo_sapiens
|
|
2047 ($species = lc($species)) =~ s/ /_/;
|
|
2048
|
|
2049 my $transc_edb_name = "${species}_core_Transcript";
|
|
2050 my $genome_edb_name = "${species}_core_Genome";
|
|
2051
|
|
2052 #Maybe we want to rollback ProbeAlign and ProbeTranscriptAlign output separately so we
|
|
2053 #can re-run just one part of the alignment step.
|
|
2054
|
|
2055
|
|
2056 #We want this Probe(Transcript)Align rollback available in the environment
|
|
2057 #So we can do it natively and before we get to the RunnableDB stage,
|
|
2058 #where we would be trying multiple rollbacks in parallel
|
|
2059 #Wrapper script?
|
|
2060 #Or do we keep it simple here and maintain probe_feature wide rollback
|
|
2061 #And just the ProbeAlign/ProbeTranscriptAlign roll back in the environment?
|
|
2062
|
|
2063
|
|
2064 #We can restrict the probe deletes using the ac_id
|
|
2065 #We should test for other ac_ids using the same probe_id
|
|
2066 #Then fail unless we have specified force delete
|
|
2067
|
|
2068 #These should be deleted for all other modes but only if force is set?
|
|
2069 #This may delete xrefs for other ArrayChips
|
|
2070
|
|
2071 #The issues is if we need to specify force for one delete but don't want to delete something else?
|
|
2072 #force should only be used to delete upto and including the mode specified
|
|
2073 #no mode equates to probe mode
|
|
2074 #if no force then we fail if previous levels/modes have xrefs etc...
|
|
2075
|
|
2076
|
|
2077 #Let's grab the edb ids first and use them directly, this will avoid table locks on edb
|
|
2078 #and should also speed query up?
|
|
2079
|
|
2080
|
|
2081 if($mode eq 'probe2transcript' ||
|
|
2082 $force){
|
|
2083
|
|
2084 #Delete ProbeFeature UnmappedObjects
|
|
2085 $self->log("Deleting probe2transcript ProbeFeature UnmappedObjects");
|
|
2086 $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, probe_feature pf, external_db e WHERE a.logic_name ='probe2transcript' AND a.analysis_id=uo.analysis_id AND p.probe_id=pf.probe_id and pf.probe_feature_id=uo.ensembl_id and uo.ensembl_object_type='ProbeFeature' and uo.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)";
|
|
2087 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
|
|
2088
|
|
2089
|
|
2090 #Delete ProbeFeature Xrefs/DBEntries
|
|
2091 $self->log("Deleting probe2transcript ProbeFeature Xrefs");
|
|
2092 $sql = "DELETE ox FROM xref x, object_xref ox, probe p, probe_feature pf, external_db e WHERE x.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND x.xref_id=ox.xref_id AND ox.ensembl_object_type='ProbeFeature' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND ox.linkage_annotation!='ProbeTranscriptAlign' AND p.array_chip_id IN($ac_ids)";
|
|
2093 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up);
|
|
2094
|
|
2095
|
|
2096 #Probe/Set specific entries
|
|
2097 for my $xref_object('Probe', 'ProbeSet'){
|
|
2098 $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id';
|
|
2099
|
|
2100 #Delete Probe/Set UnmappedObjects
|
|
2101
|
|
2102 $self->log("Deleting probe2transcript $xref_object UnmappedObjects");
|
|
2103
|
|
2104 $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, external_db e WHERE a.logic_name='probe2transcript' AND a.analysis_id=uo.analysis_id AND uo.ensembl_object_type='${xref_object}' AND $probe_join=uo.ensembl_id AND uo.external_db_id=e.external_db_id AND e.db_name='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)";
|
|
2105 #.' and edb.db_release="'.$schema_build.'"';
|
|
2106 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
|
|
2107
|
|
2108 #Delete Probe/Set Xrefs/DBEntries
|
|
2109 $sql = "DELETE ox FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' AND ox.ensembl_object_type='${xref_object}' AND ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)";
|
|
2110 $self->log("Deleting probe2transcript $xref_object xref records");
|
|
2111 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up);
|
|
2112 }
|
|
2113 }
|
|
2114 elsif(! $keep_xrefs){#Need to check for existing xrefs if not force
|
|
2115 #we don't know whether this is on probe or probeset level
|
|
2116 #This is a little hacky as there's not way we can guarantee this xref will be from probe2transcript
|
|
2117 #until we get the analysis_id moved from identity_xref to xref
|
|
2118 #We are also using the Probe/Set Xrefs as a proxy for all other Xrefs and UnmappedObjects
|
|
2119 #Do we need to set a status here? Would have problem rolling back the states of associated ArrayChips
|
|
2120
|
|
2121 for my $xref_object('Probe', 'ProbeSet'){
|
|
2122
|
|
2123 $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id';
|
|
2124
|
|
2125 $row_cnt = $db->dbc->db_handle->selectrow_array("SELECT COUNT(*) FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' and ox.ensembl_object_type='${xref_object}' and ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)");
|
|
2126
|
|
2127 if($row_cnt){
|
|
2128 throw("Cannot rollback ArrayChips($ac_names), found $row_cnt $xref_object Xrefs. Pass 'force' argument or 'probe2transcript' mode to delete");
|
|
2129 }
|
|
2130 else{
|
|
2131 #$self->log("Found $row_cnt $xref_object Xrefs");
|
|
2132 }
|
|
2133 }
|
|
2134 }
|
|
2135
|
|
2136
|
|
2137 #ProbeFeatures inc ProbeTranscriptAlign xrefs
|
|
2138
|
|
2139 if($mode ne 'probe2transcript'){
|
|
2140
|
|
2141 if(($mode eq 'probe' && $force) ||
|
|
2142 $mode eq 'probe_feature' ||
|
|
2143 $mode eq 'ProbeAlign' ||
|
|
2144 $mode eq 'ProbeTranscriptAlign'){
|
|
2145
|
|
2146
|
|
2147 #Should really revoke some state here but we only have IMPORTED
|
|
2148
|
|
2149 #ProbeTranscriptAlign Xref/DBEntries
|
|
2150
|
|
2151 #my (@anal_ids) = @{$db->get_AnalysisAdaptor->generic_fetch("a.module='ProbeAlign'")};
|
|
2152 #Grrrr! AnalysisAdaptor is not a standard BaseAdaptor implementation
|
|
2153 #my @anal_ids = @{$db->dbc->db_handle->selectall_arrayref('select analysis_id from analysis where module like "%ProbeAlign"')};
|
|
2154 #@anal_ids = map {$_= "@$_"} @anal_ids;
|
|
2155
|
|
2156 if($mode ne 'ProbeAlign'){
|
|
2157 my $lnames = join(', ', (map { "'${_}_ProbeTranscriptAlign'" } keys(%classes)));
|
|
2158
|
|
2159 $sql = "DELETE ox from object_xref ox, xref x, probe p, probe_feature pf, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
|
|
2160 $self->log("Deleting ProbeFeature Xref/DBEntry records for:\t$lnames");
|
|
2161 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up);
|
|
2162
|
|
2163
|
|
2164 #Can't include uo.type='ProbeTranscriptAlign' in these deletes yet as uo.type is enum'd to xref or probe2transcript
|
|
2165 #will have to join to analysis and do a like "%ProbeTranscriptAlign" on the the logic name?
|
|
2166 #or/and ur.summary_description='Promiscuous probe'?
|
|
2167
|
|
2168 $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name in (${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${transc_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
|
|
2169
|
|
2170 $self->log("Deleting UnmappedObjects for:\t${lnames}");
|
|
2171 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
|
|
2172
|
|
2173
|
|
2174 #Now the actual ProbeFeatures
|
|
2175 $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
|
|
2176 $self->log("Deleting ProbeFeatures for:\t${lnames}");
|
|
2177 $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up);
|
|
2178 }
|
|
2179
|
|
2180 if($mode ne 'ProbeTranscriptAlign'){
|
|
2181 my $lnames = join(', ', (map { "'${_}_ProbeAlign'" } keys(%classes)));
|
|
2182
|
|
2183 $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name=(${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${genome_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
|
|
2184 $self->log("Deleting UnmappedObjects for:\t${lnames}");
|
|
2185 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
|
|
2186
|
|
2187
|
|
2188 $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
|
|
2189 $self->log("Deleting ProbeFeatures for:\t${lnames}");
|
|
2190 $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up);
|
|
2191 }
|
|
2192 }
|
|
2193 else{
|
|
2194 #Need to count to see if we can carry on with a unforced probe rollback?
|
|
2195 #Do we need this level of control here
|
|
2196 #Can't we assume that if you want probe you also want probe_feature?
|
|
2197 #Leave for safety, at least until we get the dependant ExperimetnalChip test sorted
|
|
2198 #What about if we only want to delete one array from an associated set?
|
|
2199 #This would delete all the features from the rest?
|
|
2200
|
|
2201 $sql = "select count(*) from object_xref ox, xref x, probe p, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
|
|
2202 $row_cnt = $db->dbc->db_handle->selectrow_array($sql);
|
|
2203
|
|
2204 if($row_cnt){
|
|
2205 throw("Cannot rollback ArrayChips($ac_names), found $row_cnt ProbeFeatures. Pass 'force' argument or 'probe_feature' mode to delete");
|
|
2206 }
|
|
2207 else{
|
|
2208 $self->log("Found $row_cnt ProbeFeatures");
|
|
2209 }
|
|
2210 }
|
|
2211
|
|
2212 if($mode eq 'probe'){
|
|
2213 #Don't need to rollback on a CS as we have no dependant EChips?
|
|
2214 #Is this true? Should we enforce a 3rd CoordSystem argument, 'all' string we delete all?
|
|
2215
|
|
2216 foreach my $ac(@$acs){
|
|
2217 $ac->adaptor->revoke_states($ac);#Do we need to change this to revoke specific states?
|
|
2218 #Current states are only IMPORTED, so not just yet, but we could change this for safety?
|
|
2219 }
|
|
2220
|
|
2221 #ProbeSets
|
|
2222 $sql = "DELETE ps from probe p, probe_set ps where p.array_chip_id IN($ac_ids) and p.probe_set_id=ps.probe_set_id";
|
|
2223 $self->rollback_table($sql, 'probe_set', 'probe_set_id', $db, $no_clean_up);
|
|
2224
|
|
2225 #Probes
|
|
2226 $sql = "DELETE from probe where array_chip_id IN($ac_ids)";
|
|
2227 $self->rollback_table($sql, 'probe', 'probe_id', $db, $no_clean_up);
|
|
2228 }
|
|
2229 }
|
|
2230
|
|
2231 $self->log("Finished $mode roll back for ArrayChip:\t$ac_names");
|
|
2232 return;
|
|
2233 }
|
|
2234
|
|
2235
|
|
2236 #This will just fail silently if the reset value
|
|
2237 #Is less than the true autoinc value
|
|
2238 #i.e. if there are parallel inserts going on
|
|
2239 #So we can never assume that the $new_auto_inc will be used
|
|
2240
|
|
2241
|
|
2242 sub rollback_table{
|
|
2243 my ($self, $sql, $table, $id_field, $db, $no_clean_up, $force_clean_up) = @_;
|
|
2244
|
|
2245 my $row_cnt;
|
|
2246 eval { $row_cnt = $db->dbc->do($sql) };
|
|
2247
|
|
2248 if($@){
|
|
2249 throw("Failed to rollback table $table using sql:\t$sql\n$@");
|
|
2250 }
|
|
2251
|
|
2252 $row_cnt = 0 if $row_cnt eq '0E0';
|
|
2253 $self->log("Deleted $row_cnt $table records");
|
|
2254
|
|
2255 if($force_clean_up ||
|
|
2256 ($row_cnt && ! $no_clean_up)){
|
|
2257 $self->refresh_table($table, $id_field, $db);
|
|
2258 }
|
|
2259
|
|
2260 return;
|
|
2261 }
|
|
2262
|
|
2263 #Now separated so that we can do this once at the end of a rollback of many Sets
|
|
2264
|
|
2265 sub refresh_table{
|
|
2266 my ($self, $table, $id_field, $db) = @_;
|
|
2267
|
|
2268 #This only works if the new calue is available
|
|
2269 #i.e. do not need lock for this to be safe
|
|
2270 $self->reset_table_autoinc($table, $id_field, $db) if $id_field;
|
|
2271
|
|
2272 $self->log("Optimizing and Analyzing $table");
|
|
2273
|
|
2274 $db->dbc->do("optimize table $table");#defrag data, sorts indices, updates table stats
|
|
2275 $db->dbc->do("analyze table $table");#analyses key distribution
|
|
2276
|
|
2277 return;
|
|
2278 }
|
|
2279
|
|
2280
|
|
2281
|
|
2282 sub reset_table_autoinc{
|
|
2283 #Is this called elsewhere or can we merge with
|
|
2284 my($self, $table_name, $autoinc_field, $db) = @_;
|
|
2285
|
|
2286 if(! ($table_name && $autoinc_field && $db)){
|
|
2287 throw('You must pass a table_name and an autoinc_field to reset the autoinc value');
|
|
2288 }
|
|
2289
|
|
2290 if(! (ref($db) && $db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){
|
|
2291 throw('Must pass a valid Bio::EnsEMBL::DBSQL::DBAdaptor');
|
|
2292 }
|
|
2293
|
|
2294 #my $sql = "show table status where name='$table_name'";
|
|
2295 #my ($autoinc) = ${$db->dbc->db_handle->selectrow_array($sql)}[11];
|
|
2296 #11 is the field in the show table status table
|
|
2297 #We cannot select just the Auto_increment, so this will fail if the table format changes
|
|
2298
|
|
2299 #Why do we need autoinc here?
|
|
2300
|
|
2301 my $sql = "select $autoinc_field from $table_name order by $autoinc_field desc limit 1";
|
|
2302 my ($current_auto_inc) = $db->dbc->db_handle->selectrow_array($sql);
|
|
2303 my $new_autoinc = ($current_auto_inc) ? ($current_auto_inc + 1) : 1;
|
|
2304 $sql = "ALTER TABLE $table_name AUTO_INCREMENT=$new_autoinc";
|
|
2305 $db->dbc->do($sql);
|
|
2306 return;
|
|
2307 }
|
|
2308
|
|
2309
|
|
2310
|
|
2311
|
|
2312 =head2 get_core_display_name_by_stable_id
|
|
2313
|
|
2314 Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
2315 Args [2] : stable ID from core DB.
|
|
2316 Args [3] : stable feature type e.g. gene, transcript, translation
|
|
2317 Example : $self->validate_and_store_feature_types;
|
|
2318 Description: Builds a cache of stable ID to display names.
|
|
2319 Returntype : string - display name
|
|
2320 Exceptions : Throws is type is not valid.
|
|
2321 Caller : General
|
|
2322 Status : At risk
|
|
2323
|
|
2324 =cut
|
|
2325
|
|
2326 # --------------------------------------------------------------------------------
|
|
2327 # Build a cache of ensembl stable ID -> display_name
|
|
2328 # Return hashref keyed on {$type}{$stable_id}
|
|
2329 #Need to update cache if we're doing more than one 'type' at a time
|
|
2330 # as it will never get loaded for the new type!
|
|
2331
|
|
2332 sub get_core_display_name_by_stable_id{
|
|
2333 my ($self, $cdb, $stable_id, $type) = @_;
|
|
2334
|
|
2335 $type = lc($type);
|
|
2336
|
|
2337 if($type !~ /(gene|transcript|translation)/){
|
|
2338 throw("Cannot get display_name for stable_id $stable_id with type $type");
|
|
2339 }
|
|
2340
|
|
2341 if(! exists $self->{'display_name_cache'}->{$stable_id}){
|
|
2342 ($self->{'display_name_cache'}->{$stable_id}) = $cdb->dbc->db_handle->selectrow_array("SELECT x.display_label FROM $type t, xref x where t.display_xref_id=x.xref_id and t.stable_id='${stable_id}'");
|
|
2343 }
|
|
2344
|
|
2345 return $self->{'display_name_cache'}->{$stable_id};
|
|
2346 }
|
|
2347
|
|
2348
|
|
2349 =head2 get_core_stable_id_by_display_name
|
|
2350
|
|
2351 Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
2352 Args [2] : display name (e.g. from core DB or GNC name)
|
|
2353 Example :
|
|
2354 Description: Builds a cache of stable ID to display names.
|
|
2355 Returntype : string - gene stable ID
|
|
2356 Exceptions : None
|
|
2357 Caller : General
|
|
2358 Status : At risk
|
|
2359
|
|
2360 =cut
|
|
2361
|
|
2362 # --------------------------------------------------------------------------------
|
|
2363 # Build a cache of ensembl stable ID -> display_name
|
|
2364 # Return hashref keyed on {$type}{$stable_id}
|
|
2365 #Need to update cache if we're doing more than one 'type' at a time
|
|
2366 # as it will never get loaded for the new type!
|
|
2367
|
|
2368 sub get_core_stable_id_by_display_name{
|
|
2369 my ($self, $cdb, $display_name) = @_;
|
|
2370
|
|
2371 #if($type !~ /(gene|transcript|translation)/){
|
|
2372 # throw("Cannot get display_name for stable_id $stable_id with type $type");
|
|
2373 # }
|
|
2374
|
|
2375 if(! exists $self->{'stable_id_cache'}->{$display_name}){
|
|
2376 ($self->{'stable_id_cache'}->{$display_name}) = $cdb->dbc->db_handle->selectrow_array("SELECT g.stable_id FROM gene g, xref x where g.display_xref_id=x.xref_id and and x.display_label='${display_name}'");
|
|
2377 }
|
|
2378
|
|
2379 return $self->{'stable_id_cache'}->{$display_name};
|
|
2380 }
|
|
2381
|
|
2382
|
|
2383
|
|
2384
|
|
2385
|
|
2386
|
|
2387 1;
|
|
2388
|