Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/Helper.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2011 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <ensembl-dev@ebi.ac.uk>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 | |
| 20 =head1 NAME | |
| 21 | |
| 22 Bio::EnsEMBL::Funcgen::Utils::Helper | |
| 23 | |
| 24 =head1 SYNOPSIS | |
| 25 | |
| 26 | |
| 27 e.g. | |
| 28 | |
| 29 | |
| 30 my $object = Bio::EnsEMBL::Object->new | |
| 31 ( | |
| 32 logging => 1, | |
| 33 log_file => "/tmp/Misc.log", | |
| 34 debug_level => 2, | |
| 35 debug_file => "/tmp/Misc.dbg", | |
| 36 ); | |
| 37 | |
| 38 $object->log("This is a log message."); | |
| 39 $object->debug(1,"This is a debug message."); | |
| 40 $object->system("rmdir /tmp/test"); | |
| 41 | |
| 42 | |
| 43 ---------------------------------------------------------------------------- | |
| 44 | |
| 45 | |
| 46 =head1 OPTIONS | |
| 47 | |
| 48 =over 8 | |
| 49 | |
| 50 | |
| 51 =item B<-debug> | |
| 52 | |
| 53 Turns on and defines the verbosity of debugging output, 1-3, default = 0 = off | |
| 54 | |
| 55 =over 8 | |
| 56 | |
| 57 =item B<-log_file|l> | |
| 58 | |
| 59 Defines the log file, default = "${instance}.log" | |
| 60 | |
| 61 =item B<-help> | |
| 62 | |
| 63 Print a brief help message and exits. | |
| 64 | |
| 65 =item B<-man> | |
| 66 | |
| 67 Prints the manual page and exits. | |
| 68 | |
| 69 =back | |
| 70 | |
| 71 =head1 DESCRIPTION | |
| 72 | |
| 73 B<This program> performs several debugging and logging functions, aswell as providing several inheritable EFGUtils methods. | |
| 74 | |
| 75 =cut | |
| 76 | |
| 77 ################################################################################ | |
| 78 | |
| 79 package Bio::EnsEMBL::Funcgen::Utils::Helper; | |
| 80 | |
| 81 use Bio::Root::Root; | |
| 82 use Data::Dumper; | |
| 83 use Bio::EnsEMBL::Utils::Exception qw (throw stack_trace); | |
| 84 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
| 85 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (get_date); | |
| 86 #use Devel::Timer; | |
| 87 use Carp;#? Can't use unless we can get it to redirect | |
| 88 use File::Basename; | |
| 89 | |
| 90 | |
| 91 use strict; | |
| 92 use vars qw(@ISA); | |
| 93 @ISA = qw(Bio::Root::Root); | |
| 94 | |
| 95 #List of valid rollback levels | |
| 96 #To be used in conjunction with -full_delete | |
| 97 my @rollback_tables = ('data_set', 'feature_set', 'result_set', 'input_set', 'experiment', 'array', 'array_chip', 'experimental_chip'); | |
| 98 | |
| 99 #Some local filevars to avoid assigning to package typeglobs | |
| 100 my ($DBGFILE, $LOGFILE); | |
| 101 | |
| 102 ################################################################################ | |
| 103 | |
| 104 =head2 new | |
| 105 | |
| 106 Description : Constructor method to create a new object with passed or | |
| 107 default attributes. | |
| 108 | |
| 109 Arg [1] : hash containing optional attributes :- | |
| 110 log_file - name of log file (default = undef -> STDOUT) | |
| 111 debug_level - level of detail of debug message [1-3] (default = 0 = off) | |
| 112 debug_file - name of debug file (default = undef -> STDERR) | |
| 113 | |
| 114 ReturnType : Helper | |
| 115 | |
| 116 Example : my $Helper = new Bio::EnsEMBL::Helper( | |
| 117 debug_level => 3, | |
| 118 debug_file => "/tmp/efg.debug", | |
| 119 log_file => "/tmp/efg.log", | |
| 120 ); | |
| 121 | |
| 122 Exceptions : throws exception if failed to open debug file | |
| 123 : throws exception if failed to open log file | |
| 124 | |
| 125 =cut | |
| 126 | |
| 127 ################################################################################ | |
| 128 | |
| 129 #To do , change to rearrange | |
| 130 | |
| 131 sub new{ | |
| 132 my ($caller, %args) = @_; | |
| 133 | |
| 134 my ($self, %attrdata, $argname); | |
| 135 my $class = ref($caller) || $caller; | |
| 136 | |
| 137 #Create object from parent class | |
| 138 $self = $class->SUPER::new(%args); | |
| 139 | |
| 140 #we need to mirror ensembl behaviour here | |
| 141 #use rearrange and set default afterwards if not defined | |
| 142 | |
| 143 # objects private data and default values | |
| 144 #Not all of these need to be in main | |
| 145 | |
| 146 %attrdata = ( | |
| 147 _tee => $main::_tee, | |
| 148 _debug_level => $main::_debug_level, | |
| 149 _debug_file => $main::_debug_file, | |
| 150 _log_file => $main::_log_file,#default should be set in caller | |
| 151 _no_log => $main::_no_log,#suppresses log file generation if log file not defined | |
| 152 _default_log_dir => $main::_default_log_dir, | |
| 153 ); | |
| 154 | |
| 155 # set each class attribute using passed value or default value | |
| 156 foreach my $attrname (keys %attrdata){ | |
| 157 ($argname = $attrname) =~ s/^_//; # remove leading underscore | |
| 158 $self->{$attrname} = (exists $args{$argname}) ? $args{$argname} : $attrdata{$attrname}; | |
| 159 } | |
| 160 | |
| 161 | |
| 162 $self->{'_tee'} = 1 if $self->{'_no_log'}; | |
| 163 #should we undef log_file here too? | |
| 164 #This currently only turns off default logging | |
| 165 | |
| 166 $self->{_default_log_dir} ||= $ENV{'HOME'}.'/logs'; | |
| 167 $self->{'_report'} = []; | |
| 168 | |
| 169 | |
| 170 # DEBUG OUTPUT & STDERR | |
| 171 | |
| 172 #should default to lowest or highest debug level here! | |
| 173 | |
| 174 if(defined $self->{_debug_level} && $self->{_debug_level}){ | |
| 175 $main::_debug_level = $self->{_debug_level}; | |
| 176 | |
| 177 if(defined $self->{_debug_file}){ | |
| 178 $main::_debug_file = $self->{_debug_file}; | |
| 179 | |
| 180 open($DBGFILE, '>>', $self->{_debug_file}) | |
| 181 or throw("Failed to open debug file : $!"); | |
| 182 | |
| 183 #open (DBGFILE, "<STDERR | tee -a ".$self->{_debug_file});#Mirrors STDERR to debug file | |
| 184 } | |
| 185 else{ | |
| 186 open($DBGFILE, '>&STDERR'); | |
| 187 } | |
| 188 | |
| 189 select $DBGFILE; $| = 1; # make debug file unbuffered | |
| 190 | |
| 191 $self->debug(1,"Debugging started ".localtime()." on $0 at level ".$self->{_debug_level}." ..."); | |
| 192 } | |
| 193 | |
| 194 my $log_file = $self->{_log_file}; | |
| 195 | |
| 196 | |
| 197 # LOG OUTPUT | |
| 198 if (defined $self->{_log_file}){ | |
| 199 | |
| 200 #This causes print on unopened file as we try and log in the DESTROY | |
| 201 throw('You have specified mutually exclusive parameters log_file and no_log') if($self->{'_no_log'}); | |
| 202 $main::_log_file = $self->{_log_file}; | |
| 203 | |
| 204 #we need to implment tee here | |
| 205 if($self->{'_tee'}){ | |
| 206 open($LOGFILE, ' | tee -a '.$log_file); | |
| 207 } | |
| 208 else{ | |
| 209 open($LOGFILE, '>>', $log_file) | |
| 210 or throw("Failed to open log file : $log_file\nError: $!"); | |
| 211 } | |
| 212 } | |
| 213 else{ | |
| 214 #Change this to get the name of the control script and append with PID.out | |
| 215 #This is to ensure that we always capture output | |
| 216 #We need to also log params | |
| 217 #We will have to call this from the child class. | |
| 218 | |
| 219 | |
| 220 #Only do this if we don't have supress default logs set | |
| 221 #To avoid loads of loags during testing | |
| 222 if(! $self->{'_no_log'}){ | |
| 223 | |
| 224 my @stack = stack_trace(); | |
| 225 my $top_level = $stack[$#stack]; | |
| 226 my (undef, $file) = @{$top_level}; | |
| 227 $file =~ s/.*\///; | |
| 228 | |
| 229 $self->run_system_cmd('mkdir '.$self->{_default_log_dir}) if(! -e $self->{_default_log_dir}); | |
| 230 $self->{'_log_file'} = $self->{_default_log_dir}.'/'.$file.'.'.$$.'.log'; | |
| 231 warn "No log file defined, defaulting to:\t".$self->{'_log_file'}."\n"; | |
| 232 | |
| 233 #we should still tee here | |
| 234 if($self->{'_tee'}){ | |
| 235 open($LOGFILE, '| tee -a '.$self->{'_log_file'}); | |
| 236 } | |
| 237 else{ | |
| 238 open($LOGFILE, '>', $self->{'_log_file'}) | |
| 239 or throw('Failed to open log file : '.$self->{'_log_file'}."\nError: $!"); | |
| 240 } | |
| 241 | |
| 242 } | |
| 243 else{ | |
| 244 #Have to include STD filehandles in operator | |
| 245 open($LOGFILE, '>&STDOUT'); | |
| 246 } | |
| 247 } | |
| 248 | |
| 249 select $LOGFILE; $| = 1; # make log file unbuffered | |
| 250 $self->log("\n\nLogging started at ".localtime()."..."); | |
| 251 | |
| 252 # RESET STDOUT TO DEFAULT | |
| 253 select STDOUT; $| = 1; | |
| 254 | |
| 255 $self->debug(2,"Helper class instance created."); | |
| 256 | |
| 257 return $self; | |
| 258 } | |
| 259 | |
| 260 | |
| 261 ################################################################################ | |
| 262 | |
| 263 =head2 DESTROY | |
| 264 | |
| 265 Description : Called by gargbage collection to enable tidy up before object deleted | |
| 266 | |
| 267 ReturnType : none | |
| 268 | |
| 269 Example : none - should not be called directly | |
| 270 | |
| 271 Exceptions : none | |
| 272 | |
| 273 =cut | |
| 274 | |
| 275 ################################################################################ | |
| 276 | |
| 277 sub DESTROY{ | |
| 278 my ($self) = @_; | |
| 279 | |
| 280 | |
| 281 $self->report; | |
| 282 | |
| 283 if($self->{_log_file}){ | |
| 284 $self->log("Logging complete ".localtime()."."); | |
| 285 $self->log('Virtual Memory '.`ps -p $$ -o vsz |tail -1`); | |
| 286 $self->log('Resident Memory '.`ps -p $$ -o rss |tail -1`); | |
| 287 | |
| 288 | |
| 289 | |
| 290 | |
| 291 # close LOGFILE; # if inherited object then cannot close filehandle !!! | |
| 292 } | |
| 293 | |
| 294 if($self->{_debug_level}){ | |
| 295 $self->debug(1,"Debugging complete ".localtime()."."); | |
| 296 # close DBGFILE; # if inherited object then cannot close filehandle !!! | |
| 297 } | |
| 298 | |
| 299 if(defined $self->{'_timer'}){ | |
| 300 $self->{'_timer'}->report(); | |
| 301 } | |
| 302 | |
| 303 $self->debug(2,"Bio::EnsEMBL::Helper class instance destroyed."); | |
| 304 | |
| 305 return; | |
| 306 } | |
| 307 | |
| 308 | |
| 309 | |
| 310 | |
| 311 ##Need generic method in here to get stack and line info | |
| 312 ###Use Root.pm stack methods! | |
| 313 # and replace this with caller line method for logging | |
| 314 sub _get_stack{ | |
| 315 my ($self) = shift; | |
| 316 | |
| 317 | |
| 318 #need to resolve this method with that in debug, pass log or debug arg for different format | |
| 319 | |
| 320 my @prog = (caller(2)) ? caller(2) : (caller(1)) ? caller(1) : (undef,"undef",0); | |
| 321 | |
| 322 return "[".localtime()." - ".basename($prog[1]).":$prog[2]]"; | |
| 323 } | |
| 324 | |
| 325 | |
| 326 ################################################################################ | |
| 327 | |
| 328 =head2 log | |
| 329 | |
| 330 Arg[0] : string - log message. | |
| 331 Arg[1] : boolean - memory usage, appends current process memory stats | |
| 332 Description : Method to write messages to a previously set up log file. | |
| 333 Return type : none | |
| 334 Example : $root->log("Processing file $filename ...", 1); | |
| 335 Exceptions : none | |
| 336 | |
| 337 =cut | |
| 338 | |
| 339 ################################################################################ | |
| 340 | |
| 341 sub log{ | |
| 342 my ($self, $message, $mem, $date, $no_return) = @_; | |
| 343 | |
| 344 if($mem){ | |
| 345 $message.= " :: ".`ps -p $$ -o vsz |tail -1`; | |
| 346 chomp $message; | |
| 347 $message .= " KB"; | |
| 348 } | |
| 349 | |
| 350 if($date){ | |
| 351 my $time = localtime(); | |
| 352 chomp($time); | |
| 353 $message .= ' - '.localtime(); | |
| 354 } | |
| 355 | |
| 356 $message .= "\n" if ! $no_return; | |
| 357 | |
| 358 print $LOGFILE "::\t$message"; | |
| 359 | |
| 360 # Add to debug file if not printing to STDERR? | |
| 361 # only if verbose? | |
| 362 # this would double print everything to STDOUT if tee and debug has not redefined STDERR | |
| 363 | |
| 364 $self->debug(1,$message); | |
| 365 } | |
| 366 | |
| 367 ################################################################################ | |
| 368 | |
| 369 | |
| 370 =head2 report | |
| 371 | |
| 372 Arg[0] : optional string - log message. | |
| 373 Arg[1] : optional boolean - memory usage, appends current process memory stats | |
| 374 Description : Wrapper method for log, which also stores message for summary reporting | |
| 375 Return type : none | |
| 376 Example : $root->report("WARNING: You have not done this or that and want it reported at the end of a script"); | |
| 377 Exceptions : none | |
| 378 | |
| 379 =cut | |
| 380 | |
| 381 ################################################################################ | |
| 382 | |
| 383 sub report{ | |
| 384 my ($self, $message, $mem) = @_; | |
| 385 | |
| 386 if(defined $message){ | |
| 387 | |
| 388 $self->log($message, $mem); | |
| 389 | |
| 390 push @{$self->{'_report'}}, $message; | |
| 391 } | |
| 392 elsif(scalar(@{$self->{'_report'}})){ | |
| 393 print $LOGFILE "\n::\tSUMMARY REPORT\t::\n"; | |
| 394 print $LOGFILE join("\n", @{$self->{'_report'}})."\n"; | |
| 395 | |
| 396 $self->{'_report'} = []; | |
| 397 } | |
| 398 | |
| 399 return; | |
| 400 } | |
| 401 | |
| 402 | |
| 403 | |
| 404 | |
| 405 | |
| 406 | |
| 407 ################################################################################ | |
| 408 | |
| 409 =head2 log_header | |
| 410 | |
| 411 Arg[0] : string - log message. | |
| 412 Arg[1] : boolean - memory usage, appends current process memory stats | |
| 413 Description : Wrapper method to format a log as a header line | |
| 414 Return type : none | |
| 415 Example : $root->log("Processing file $filename ...", 1); | |
| 416 Exceptions : none | |
| 417 | |
| 418 =cut | |
| 419 | |
| 420 ################################################################################ | |
| 421 | |
| 422 sub log_header{ | |
| 423 my ($self, $message, $mem, $date) = @_; | |
| 424 | |
| 425 print $LOGFILE "\n\n"; | |
| 426 $self->log("::\t$message\t::\t::", $mem, $date); | |
| 427 print $LOGFILE "\n"; | |
| 428 } | |
| 429 | |
| 430 | |
| 431 | |
| 432 | |
| 433 | |
| 434 ################################################################################ | |
| 435 | |
| 436 =head2 debug | |
| 437 | |
| 438 Description : Method to write debug info to a previously set up debug file. | |
| 439 Over-rides Root.pm on/off style debugging | |
| 440 | |
| 441 Args : int: debug level and string: log message. | |
| 442 | |
| 443 ReturnType : none | |
| 444 | |
| 445 Example : $root->debug(2,"dir=$dir file=$file"); | |
| 446 | |
| 447 Exceptions : none | |
| 448 | |
| 449 =cut | |
| 450 | |
| 451 ################################################################################ | |
| 452 | |
| 453 sub debug{ | |
| 454 my ($self,$level,$message) = @_; | |
| 455 | |
| 456 | |
| 457 | |
| 458 #Can we not detect whther message is a scalar, array or hash and Dump or print accordingly? | |
| 459 | |
| 460 my (@call,$cnt,$prog_name,$prog_line,$call_name,$call_line); | |
| 461 | |
| 462 $prog_name = $call_name = "undef"; | |
| 463 $prog_line = $call_line = $cnt = 0; | |
| 464 | |
| 465 # if debug on at the requested level then output the passed message | |
| 466 if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){ | |
| 467 | |
| 468 ######Replace this with Carp method? | |
| 469 while (@call = caller($cnt++)){ | |
| 470 | |
| 471 if ($cnt == 2){ | |
| 472 $call_name = basename($call[1]); | |
| 473 $call_line = $call[2] | |
| 474 } | |
| 475 | |
| 476 $prog_name = basename($call[1]); | |
| 477 $prog_line = $call[2]; | |
| 478 } | |
| 479 | |
| 480 #This still attempts to print if file not opened | |
| 481 print $DBGFILE "debug $message\t: [$$ - $prog_name:$prog_line $call_name:$call_line]\n"; | |
| 482 | |
| 483 #carp("carping $message"); | |
| 484 } | |
| 485 } | |
| 486 | |
| 487 | |
| 488 ################################################################################ | |
| 489 | |
| 490 =head2 debug_hash | |
| 491 | |
| 492 Description : Method to write the contents of passed hash to debug output. | |
| 493 | |
| 494 Args : int: debug level and hashref. | |
| 495 | |
| 496 ReturnType : none | |
| 497 | |
| 498 Example : $Helper->debug_hash(3,\%hash); | |
| 499 | |
| 500 Exceptions : none | |
| 501 | |
| 502 =cut | |
| 503 | |
| 504 ################################################################################ | |
| 505 | |
| 506 sub debug_hash{ | |
| 507 my ($self,$level,$hashref) = @_; | |
| 508 | |
| 509 my ($attr); | |
| 510 | |
| 511 # if debug on at the requested level then output the passed hash | |
| 512 if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){ | |
| 513 print $DBGFILE Data::Dumper::Dumper(\$hashref)."\n"; | |
| 514 } | |
| 515 } | |
| 516 | |
| 517 | |
| 518 | |
| 519 ################################################################################ | |
| 520 | |
| 521 =head2 run_system_cmd | |
| 522 | |
| 523 Description : Method to control the execution of the standard system() command | |
| 524 | |
| 525 ReturnType : none | |
| 526 | |
| 527 Example : $Helper->debug(2,"dir=$dir file=$file"); | |
| 528 | |
| 529 Exceptions : throws exception if system command returns none zero | |
| 530 | |
| 531 =cut | |
| 532 | |
| 533 ################################################################################ | |
| 534 | |
| 535 | |
| 536 #Move most of this to EFGUtils.pm | |
| 537 #Maintain wrapper here with throws, only warn in EFGUtils | |
| 538 | |
| 539 sub run_system_cmd{ | |
| 540 my ($self, $command, $no_exit) = @_; | |
| 541 | |
| 542 my $redirect = ''; | |
| 543 | |
| 544 $self->debug(3, "system($command)"); | |
| 545 | |
| 546 # decide where the command line output should be redirected | |
| 547 | |
| 548 #This should account for redirects | |
| 549 #This just sends everything to 1 no? | |
| 550 | |
| 551 if (defined $self->{_debug_level} && $self->{_debug_level} >= 3){ | |
| 552 | |
| 553 if (defined $self->{_debug_file}){ | |
| 554 $redirect = " >>".$self->{_debug_file}." 2>&1"; | |
| 555 } | |
| 556 else{ | |
| 557 $redirect = ""; | |
| 558 } | |
| 559 } | |
| 560 else{ | |
| 561 #$redirect = " > /dev/null 2>&1"; | |
| 562 } | |
| 563 | |
| 564 # execute the passed system command | |
| 565 my $status = system("$command $redirect"); | |
| 566 my $exit_code = $status >> 8; | |
| 567 | |
| 568 if ($status == -1) { | |
| 569 warn "Failed to execute: $!\n"; | |
| 570 } | |
| 571 elsif ($status & 127) { | |
| 572 warn sprintf("Child died with signal %d, %s coredump\nError:\t$!",($status & 127),($status & 128) ? 'with' : 'without'); | |
| 573 } | |
| 574 elsif($status != 0) { | |
| 575 warn sprintf("Child exited with value %d\nError:\t$!\n", $exit_code); #get the true exit code | |
| 576 } | |
| 577 | |
| 578 if ($exit_code != 0){ | |
| 579 | |
| 580 if (! $no_exit){ | |
| 581 throw("System command failed:\t$command\nExit code:\t$exit_code\n$!"); | |
| 582 } | |
| 583 else{ | |
| 584 warn("System command returned non-zero exit code:\t$command\nExit code:\t$exit_code\n$!"); | |
| 585 } | |
| 586 } | |
| 587 | |
| 588 #reverse boolean logic for perl...can't do this anymore due to tab2mage successful non-zero exit codes :/ | |
| 589 | |
| 590 return $exit_code; | |
| 591 } | |
| 592 | |
| 593 | |
| 594 #add sys_get method ehre to handle system calls which retrieve data? | |
| 595 #i.e.backtick commands `find . -name *fasta` | |
| 596 #or use want or flag with above method? | |
| 597 #should open pipe instead to capture error? | |
| 598 | |
| 599 sub get_data{ | |
| 600 my ($self, $data_type, $data_name) = @_; | |
| 601 | |
| 602 #This method is just to provide standard checking for specific get_data/config methods | |
| 603 | |
| 604 if(defined $data_name){ | |
| 605 throw("Defs data name $data_name for type '$data_type' does not exist\n") if (! exists $self->{"${data_type}"}{$data_name}); | |
| 606 }else{ | |
| 607 throw("Defs data type $data_type does not exist\n") if (! exists $self->{"${data_type}"}); | |
| 608 } | |
| 609 | |
| 610 return (defined $data_name) ? $self->{"${data_type}"}{$data_name} : $self->{"${data_type}"}; | |
| 611 } | |
| 612 | |
| 613 | |
| 614 #sub Timer{ | |
| 615 # my ($self) = shift; | |
| 616 | |
| 617 # $self->{'_timer'} = new Devel::Timer() if(! defined $self->{'_timer'}); | |
| 618 | |
| 619 # return $self->{'_timer'}; | |
| 620 | |
| 621 #} | |
| 622 | |
| 623 | |
| 624 sub set_header_hash{ | |
| 625 my ($self, $header_ref, $fields) = @_; | |
| 626 | |
| 627 my %hpos; | |
| 628 | |
| 629 for my $x(0..$#{$header_ref}){ | |
| 630 $hpos{$header_ref->[$x]} = $x; | |
| 631 } | |
| 632 | |
| 633 | |
| 634 if($fields){ | |
| 635 | |
| 636 foreach my $field(@$fields){ | |
| 637 | |
| 638 if(! exists $hpos{$field}){ | |
| 639 throw("Header does not contain mandatory field:\t${field}"); | |
| 640 } | |
| 641 } | |
| 642 } | |
| 643 | |
| 644 return \%hpos; | |
| 645 } | |
| 646 | |
| 647 #Move this to EFGUtils? | |
| 648 | |
| 649 sub backup_file{ | |
| 650 my ($self, $file_path) = @_; | |
| 651 | |
| 652 throw("Must define a file path to backup") if(! $file_path); | |
| 653 | |
| 654 if (-f $file_path) { | |
| 655 $self->log("Backing up:\t$file_path"); | |
| 656 system ("mv ${file_path} ${file_path}.".`date '+%T'`); | |
| 657 } | |
| 658 | |
| 659 return; | |
| 660 | |
| 661 } | |
| 662 | |
| 663 #This should move to Utils | |
| 664 #as it is a simple string manipulation | |
| 665 | |
| 666 sub get_schema_and_build{ | |
| 667 my ($self, $dbname) = @_; | |
| 668 my @dbname = split/_/, $dbname; | |
| 669 return [$dbname[($#dbname -1)], $dbname[($#dbname )]]; | |
| 670 } | |
| 671 | |
| 672 =head2 get_regbuild_set_states | |
| 673 | |
| 674 Arg [1] : Bio::EnsEMBL::DBAdaptor | |
| 675 Example : my ($dset_states, $rset_states, $fset_states) = $helper->get_regbuild_set_states($db); | |
| 676 Description: Returns Array refs of appropriate states for sets use din the regulatory build | |
| 677 Returntype : Array | |
| 678 Exceptions : Warns if cannot find chromosome CoordSystem | |
| 679 Caller : HealthChecker & regulatory build code | |
| 680 Status : At risk | |
| 681 | |
| 682 =cut | |
| 683 | |
| 684 | |
| 685 sub get_regbuild_set_states{ | |
| 686 my ($self, $db) = @_; | |
| 687 | |
| 688 my $cs_a = $db->get_CoordSystemAdaptor; | |
| 689 | |
| 690 #These states need to be mirrored in RegulatorySets.java | |
| 691 | |
| 692 my $chrom_cs = $cs_a->fetch_by_name('chromosome'); | |
| 693 my (@dset_states, @rset_states, @fset_states); | |
| 694 | |
| 695 if(! defined $chrom_cs){ | |
| 696 #This species most likely does not have a regbuild | |
| 697 #really just need to get the 'highest' level here | |
| 698 warn "Could not find Chromosome CoordSystem. ".$db->dbc->dbname.". most likely does not contain a RegulatoryBuild"; | |
| 699 } | |
| 700 else{ | |
| 701 my $imp_cs_status = 'IMPORTED_'.$cs_a->fetch_by_name('chromosome')->version; | |
| 702 | |
| 703 #What about non-chromosome assemblies? | |
| 704 #top level will not return version...why not? | |
| 705 @dset_states = ('DISPLAYABLE'); | |
| 706 @rset_states = (@dset_states, 'DAS_DISPLAYABLE', $imp_cs_status); | |
| 707 @fset_states = (@rset_states, 'MART_DISPLAYABLE'); | |
| 708 } | |
| 709 | |
| 710 return (\@dset_states, \@rset_states, \@fset_states); | |
| 711 } | |
| 712 | |
| 713 | |
| 714 | |
| 715 =head2 define_and_validate_sets | |
| 716 | |
| 717 Arg [1] : hash - set constructor parameters: | |
| 718 -dbadaptor Bio::EnsEMBL::Funcgen::DBAdaptor | |
| 719 -name Data/FeatureSet/ResultSet name to create | |
| 720 -feature_type Bio::EnsEMBL::Funcgen::FeatureType | |
| 721 -cell_type Bio::EnsEMBL::Funcgen::CellType | |
| 722 -analysis FeatureSet Bio::EnsEMBL::Analysis | |
| 723 -feature_class e.g. annotated or regulatory | |
| 724 -description FeatureSet description | |
| 725 -recovery Allows definition of extant sets so long as they match | |
| 726 -append Boolean - Forces import on top of previously imported data | |
| 727 -rollback Rolls back product feature set. | |
| 728 -supporting_sets Complete set of pre-stored supporting or input sets for this DataSet | |
| 729 -slices ARRAYREF of Slices to rollback | |
| 730 Example : my $dset = $self->define_and_validate_Set(%params); | |
| 731 Description: Checks whether set is already in DB based on set name, rolls back features | |
| 732 if roll back flag set. Or creates new DataSet and Feature|ResultSet if not present. | |
| 733 Returntype : Bio::EnsEMBL::Funcgen::DataSet | |
| 734 Exceptions : Throws if DBAdaptor param not valid | |
| 735 Caller : Importers and Parsers | |
| 736 Status : At risk | |
| 737 | |
| 738 =cut | |
| 739 | |
| 740 sub define_and_validate_sets{ | |
| 741 my $self = shift; | |
| 742 | |
| 743 #change slice to slices to support multi slice import from InputSet::define_sets | |
| 744 #Can't do full rollback in slice mode | |
| 745 #This may not be safe in slice mode as we will then have mixed inputs/outputs | |
| 746 | |
| 747 my ($name, $anal, $ftype, $ctype, $type, $append, $db, $ssets, $description, $rollback, $recovery, $slices, $display_label) = rearrange(['NAME', 'ANALYSIS', 'FEATURE_TYPE', 'CELL_TYPE', 'FEATURE_CLASS', 'APPEND', | |
| 748 'DBADAPTOR', 'SUPPORTING_SETS', 'DESCRIPTION', 'ROLLBACK', 'RECOVERY', 'SLICES', 'DISPLAY_LABEL'], @_); | |
| 749 | |
| 750 | |
| 751 #VALIDATE CONFIG HASH | |
| 752 #$config_hash ||= {};#default so exists will work without testing | |
| 753 #if(keys %{$config_hash}){ | |
| 754 # #There is a module to handle config hashes somewhere! | |
| 755 # throw('config_hash not yet implemented for define_and_validate_sets'); | |
| 756 #my @known_config = ('full_delete');#We never want full delete here as this is a create method! | |
| 757 #Can we set vars from has by refs like getopts? | |
| 758 #map { | |
| 759 # throw("Found unsupported config hash parameter:\t$_") if ! grep(/^${_}$/, @known_config); | |
| 760 #} keys %{$config_hash}; | |
| 761 # } | |
| 762 | |
| 763 #define rollback level | |
| 764 #extract this to _set_rollback_level($rollback_mode, $feature_class) | |
| 765 my $rollback_level = 0; | |
| 766 | |
| 767 #These should be globally defined so all rollback methods can use them | |
| 768 my %valid_rollback_modes = | |
| 769 ( | |
| 770 product_features => 1, | |
| 771 #Just product features and FeatureSet status, what about DataSet status? | |
| 772 #full delete does nothing here? | |
| 773 | |
| 774 sets => 2, | |
| 775 #Includes product_features and | |
| 776 #deletes supporting_sets entries unless we specify append | |
| 777 #revoke all states on Feature/Data/InputSets | |
| 778 #Full delete removes Feature/Data/InputSet entries | |
| 779 #Never includes ResultSets! | |
| 780 | |
| 781 supporting_features => 3, | |
| 782 #Includes product_feature and sets | |
| 783 #Removes all states and supporting features | |
| 784 #inc. ResultSet results/ResultFeatures | |
| 785 #Full_delete remove supporting set entries | |
| 786 #Otherwise just rollback states for affected sets | |
| 787 ); | |
| 788 | |
| 789 if($rollback){ | |
| 790 if(! exists $valid_rollback_modes{$rollback}){ | |
| 791 #Default to some sensible values | |
| 792 $rollback = 'product_features';#default for FeatureSets | |
| 793 | |
| 794 #Always want overwrite supporting sets if there is a difference | |
| 795 $rollback = 'sets' if ($type eq 'regulatory'); | |
| 796 $rollback = 'supporting_sets' if ($type eq 'result'); | |
| 797 | |
| 798 warn ("You have not set a valid rollback mode(product_features|sets|supporting_features), defaulting to $rollback for feature class $type\n"); | |
| 799 } | |
| 800 | |
| 801 $rollback_level = $valid_rollback_modes{$rollback}; | |
| 802 } | |
| 803 | |
| 804 | |
| 805 if($slices && (ref($slices) ne 'ARRAY')){ | |
| 806 throw('-slices param must be an ARRAYREF of Bio::EnsEMBL::Slice objects'); | |
| 807 #Rest of validation done in other methods | |
| 808 } | |
| 809 | |
| 810 | |
| 811 | |
| 812 #But how are we going to resolve the append behaviour when we also want to validate the ssets? | |
| 813 #Can't, so append also functions to enable addition in the absence of some or all previous data/esets? | |
| 814 #No this is not true, we want to be able to fetch an extant set for import, | |
| 815 #we just need to be aware of sset IMPORTED status? | |
| 816 #This should be a recovery thing, allow fetch, but validate sets? | |
| 817 | |
| 818 | |
| 819 #Check mandatory params | |
| 820 if(! (ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){ | |
| 821 throw('Must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'); | |
| 822 } | |
| 823 | |
| 824 throw('Must provide a -name ') if(! defined $name); | |
| 825 | |
| 826 #Not necessarily, just do rollback then append? | |
| 827 #But then we'd potentially have a supporting set associated which has had it's data removed from the feature set. | |
| 828 #Generating sets for an ExpSet will always have append set | |
| 829 #This could be valid for generically grabing/creating sets for adding new supporting sets e.g. reg build | |
| 830 throw('-append and -rollback are mutually exclusive') if $rollback_level && $append; | |
| 831 | |
| 832 #This will never happen due to previous test? append will always fail? | |
| 833 #warn('You are defining a pre-existing FeatureSet without rolling back'. | |
| 834 # ' previous data, this could result in data duplication') if $append && ! $rollback_level; | |
| 835 #Is this really possible, surely the supporting set will fail to store due to unique key? | |
| 836 | |
| 837 | |
| 838 #Should we warn here about append && recovery? | |
| 839 #Aren't these mutually exclusive? | |
| 840 #Do we know if we have new data? append should override recovery, or just specifiy append | |
| 841 #This will stop the import and highlight the issue to the user | |
| 842 #We need to be able to run with both otherwise the import will not work | |
| 843 | |
| 844 | |
| 845 throw('Must provide a -feature_class e.g. annotated, external, result or regulatory') if(! defined $type); | |
| 846 #Check for annotated, external, regulatory etc here? | |
| 847 #Should never be external as we don't have DataSets for external sets? | |
| 848 | |
| 849 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureType', $ftype); | |
| 850 if (defined $ctype){ | |
| 851 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::CellType', $ctype); | |
| 852 } | |
| 853 elsif($type ne 'regulatory'){ | |
| 854 throw('Only Data/FeatureSets with type \'regulatory\' can have an undefined CellType'); | |
| 855 #Coudl extend this to core set by name eq 'RegulatoryFeatures'? | |
| 856 } | |
| 857 | |
| 858 $db->is_stored_and_valid('Bio::EnsEMBL::Analysis', $anal); | |
| 859 | |
| 860 my $dset_adaptor = $db->get_DataSetAdaptor; | |
| 861 my $fset_adaptor = $db->get_FeatureSetAdaptor; | |
| 862 my $rset_adaptor = $db->get_ResultSetAdaptor; | |
| 863 | |
| 864 #DataSet centric definition to enable multiple DataSets | |
| 865 #to be generated from the same supporting sets | |
| 866 my $dset = $dset_adaptor->fetch_by_name($name); | |
| 867 my ($fset, $rset, @input_sets); | |
| 868 | |
| 869 #Validate stored vs passed set data | |
| 870 | |
| 871 if(defined $dset){ | |
| 872 $self->log('Found Stored DataSet '.$dset->name); | |
| 873 | |
| 874 if($type ne 'result'){#i.e. annotated | |
| 875 | |
| 876 #Does this account for regulatory? | |
| 877 | |
| 878 $fset = $dset->product_FeatureSet; | |
| 879 #Here we have the possiblity that a feature_set with a different name may have | |
| 880 #been associated with the DataSet | |
| 881 | |
| 882 if(defined $fset){ | |
| 883 $self->log("Found associated product FeatureSet:\t".$fset->name); | |
| 884 | |
| 885 #if(! $clobber && | |
| 886 if($fset->name ne $name){ | |
| 887 throw('Invalid product FeatureSet name ('.$fset->name.') for DataSet ('.$name.'). Rollback will overwrite the FeatureSet and mismatched name will be retained.'); | |
| 888 #Need to clobber both or give explicit name for datasets or rename dataset??? | |
| 889 #Force this throw for now, make this fix manual as we may end up automatically overwriting data | |
| 890 } | |
| 891 } | |
| 892 | |
| 893 #This needs to be modified to support InputSets in ResultSets? | |
| 894 #Would never have mixed Input/ResultSets so no need | |
| 895 #Could potential need to do it for mixed Result/FeatureSets | |
| 896 #if we ever use an analysis which uses both set types | |
| 897 | |
| 898 #check supporting_sets here if defined | |
| 899 #We have the problem here of wanting to add ssets to a previously existing dset | |
| 900 #we may not know the original sset, or which of the ssets are new | |
| 901 #Hence there is a likelihood of a mismatch. | |
| 902 | |
| 903 #Much of this is replicated in store_udpated sets | |
| 904 | |
| 905 | |
| 906 if(defined $ssets){ | |
| 907 my @sorted_ssets = sort {$a->dbID <=> $b->dbID} @{$ssets}; | |
| 908 my @stored_ssets = sort {$a->dbID <=> $b->dbID} @{$dset->get_supporting_sets}; | |
| 909 my $mismatch = 0; | |
| 910 | |
| 911 $mismatch = 1 if(scalar(@sorted_ssets) != scalar(@stored_ssets)); | |
| 912 | |
| 913 if(! $mismatch){ | |
| 914 | |
| 915 for my $i(0..$#stored_ssets){ | |
| 916 | |
| 917 if($stored_ssets[$i]->dbID != $sorted_ssets[$i]->dbID){ | |
| 918 $mismatch=1; | |
| 919 last; | |
| 920 } | |
| 921 } | |
| 922 } | |
| 923 | |
| 924 | |
| 925 | |
| 926 | |
| 927 if($mismatch){ | |
| 928 #We're really print this names here which may hide the true cell/feature/anal type differences. | |
| 929 my $mismatch = 'There is a (name/type/analysis) mismatch between the supplied supporting_sets and the'. | |
| 930 ' supporting_sets in the DB for DataSet '.$dset->name."\n\nStored:\n" | |
| 931 .join(', ', (map { $_->name } @stored_ssets))."\n\nSupplied supporting_sets:\n" | |
| 932 .join(', ', (map { $_->name } @sorted_ssets)); | |
| 933 | |
| 934 | |
| 935 if($append){ | |
| 936 warn($mismatch."\n\nAppending supporting set data to unvalidated supporting sets"); | |
| 937 } | |
| 938 elsif($rollback_level > 1){#supporting set rollback | |
| 939 warn($mismatch."\n\nReplacing previously stored supporting sets with newly defined sets\n"); | |
| 940 | |
| 941 if($slices){ | |
| 942 warn("WARNING:\tPerforming supporting_set rollback in slice mode. This may corrupt the supporting_set definition for other slices in this DataSet if they are not re-generated using the same supporting_sets\n"); | |
| 943 } | |
| 944 | |
| 945 #Remove supporting_set entries | |
| 946 #This should be in a rollback_DataSet method | |
| 947 #This has moved to DataSetAdaptor::store_update_sets | |
| 948 | |
| 949 #Reset supporting sets | |
| 950 $dset->{'supporting_sets'} = undef; | |
| 951 $dset->add_supporting_sets(\@sorted_ssets); | |
| 952 #Move this to last block? | |
| 953 #This will currently fail as it test for product_FeatureSet | |
| 954 #How do we get around this? Remove IMPORTED status and only throw if fset has IMPORTED status? | |
| 955 | |
| 956 #warn "pre store sset ".@{$dset->get_supporting_sets}; | |
| 957 | |
| 958 #($dset) = @{$dset_adaptor->store_updated_sets([$dset], $rollback_level)}; | |
| 959 #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; | |
| 960 } | |
| 961 else{ | |
| 962 throw($mismatch); | |
| 963 } | |
| 964 } | |
| 965 } | |
| 966 else{ | |
| 967 warn("No supporting sets defined, skipping supporting set validation for definition of DataSet:\t".$name); | |
| 968 } | |
| 969 } | |
| 970 else{#result_features from InputSet | |
| 971 #Do we ever pass supporting sets here? | |
| 972 #Do we need to test vs stored_sets? | |
| 973 | |
| 974 | |
| 975 #There is the potential for more than one ResultSet to be associated with DataSet | |
| 976 #But as we are using the same name, this restricts the number wrt the cardinality | |
| 977 #of the name field. i.e. 1 name per analysis/cell_type/feature_type. | |
| 978 #This now works slightly differently to the rest of this method as we | |
| 979 #need to treat the ResultSet as we are currently treating the FeatureSet below. | |
| 980 | |
| 981 #However, the use case of this method is for one InputSet giving rise to one ResultSet | |
| 982 #Hence just throw if we find more than one or have a name mismatch??? | |
| 983 my @stored_sets = @{$dset->get_supporting_sets}; | |
| 984 | |
| 985 | |
| 986 | |
| 987 #THis assumes we will always have supporting sets | |
| 988 #and is failing as we have removed this test in DataSet::new | |
| 989 #But where are we storing it without the supporting set? | |
| 990 | |
| 991 if(scalar(@stored_sets) > 1){ | |
| 992 throw('define_and_validate_sets does not yet support DataSets with multiple supporting ResultSets for result_features'); | |
| 993 } | |
| 994 elsif(! @stored_sets){ | |
| 995 throw("DataSet($name) does not have any stored supporting sets. These should have been defined when storing the DataSet"); | |
| 996 #Or should we handle this? | |
| 997 } | |
| 998 | |
| 999 $rset = $stored_sets[0]; | |
| 1000 | |
| 1001 if($rset->set_type ne 'result'){ | |
| 1002 throw("DataSet already contains a supporting set which is not a ResultSet:\t".$rset->set_type."\t".$stored_sets[0]->name); | |
| 1003 } | |
| 1004 elsif($ssets){ | |
| 1005 #Do we ever pass supporting sets, test for completeness | |
| 1006 | |
| 1007 #Just test we have the same supplied ssets if it is defined | |
| 1008 if(scalar(@$ssets) != 1){ | |
| 1009 throw("ResultFeature data sets currently only support one supporting ResultSet.\nSupproting sets:\t". | |
| 1010 join(', ', (map { $_->name.'('.$_->set_type } @$ssets))); | |
| 1011 } | |
| 1012 elsif(! ($rset->dbID == $ssets->[0]->dbID) && | |
| 1013 ($ssets->[0]->set_type eq 'result')){ | |
| 1014 throw('Supplied supporting set('.$ssets->[0]->name.') does not match stored supporting set('.$rset->name.')'); | |
| 1015 } | |
| 1016 } | |
| 1017 | |
| 1018 @input_sets = @{$rset->get_InputSets}; | |
| 1019 } | |
| 1020 } | |
| 1021 | |
| 1022 | |
| 1023 | |
| 1024 if($type eq 'result'){ | |
| 1025 | |
| 1026 #Validate the defined InputSets | |
| 1027 if (scalar(@$ssets) > 1) { | |
| 1028 throw("define_and_validate_sets does not yet support multiple InputSets for defining a ResultSet:\t".$name); | |
| 1029 | |
| 1030 } | |
| 1031 | |
| 1032 if ($ssets->[0]->set_type ne 'input') { | |
| 1033 throw("To define a ResultSet($name) containing result_features, you must provide and InputSet as a supporting set\nArray based ResultSets(i.e. experimental_chip/channel) are not defined using this method, see specific Import Parsers."); | |
| 1034 } | |
| 1035 | |
| 1036 | |
| 1037 #Try and grab the rset just in case it has been orphaned somehow | |
| 1038 if (! defined $rset) { | |
| 1039 $rset = $rset_adaptor->fetch_all_by_name($name, $ftype, $ctype, $anal)->[0]; | |
| 1040 #Should only ever be one given all parts of unique key | |
| 1041 @input_sets = @{$rset->get_InputSets} if $rset; | |
| 1042 | |
| 1043 } | |
| 1044 | |
| 1045 | |
| 1046 if (defined $rset) { #Validate stored InputSets | |
| 1047 | |
| 1048 if (scalar(@input_sets) != scalar(@$ssets)) { | |
| 1049 throw('Found mismatch between number of previously stored InputSets('.scalar(@input_sets).') and defined InputSets('.scalar(@$ssets).'). You must provide a complete list of InputSets to define your ResultSet.'); | |
| 1050 } | |
| 1051 | |
| 1052 if ($input_sets[0]->dbID != $ssets->[0]->dbID) { | |
| 1053 throw('Found dbID mismatch between previously stored InputSet('.$input_sets[0]->name.') and define InputSet('.$ssets->[0]->name.')'); | |
| 1054 } | |
| 1055 | |
| 1056 #rollback ResultSet/InputSet here? | |
| 1057 if($rollback_level > 2){ | |
| 1058 warn "rollback not yet fully implemented for Result/InputSets"; | |
| 1059 | |
| 1060 #Does this need to be by slice? | |
| 1061 #What about states if we are running in parallel? | |
| 1062 | |
| 1063 if($slices){ | |
| 1064 map {$self->rollback_ResultSet($rset, $rollback, $_)} @$slices; | |
| 1065 } | |
| 1066 else{ | |
| 1067 $self->rollback_ResultSet($rset, $rollback); | |
| 1068 } | |
| 1069 | |
| 1070 } | |
| 1071 | |
| 1072 } | |
| 1073 else{#define ResultSet | |
| 1074 ($rset) = @{$rset_adaptor->store(Bio::EnsEMBL::Funcgen::ResultSet->new | |
| 1075 ( | |
| 1076 -name => $name, | |
| 1077 -feature_type => $ftype, | |
| 1078 -cell_type => $ctype, | |
| 1079 -table_name => 'input_set', | |
| 1080 -table_id => $ssets->[0]->dbID, | |
| 1081 -analysis => $anal | |
| 1082 ) | |
| 1083 )}; | |
| 1084 | |
| 1085 } | |
| 1086 } | |
| 1087 else{#annotated/regulatory/external i.e. FeatureSet | |
| 1088 | |
| 1089 #Try and grab the fset just in case it has been orphaned somehow | |
| 1090 if(! defined $fset){ | |
| 1091 $fset = $fset_adaptor->fetch_by_name($name); | |
| 1092 | |
| 1093 if(defined $fset){ | |
| 1094 #Now we need to test whether it is attached to a dset | |
| 1095 #Will be incorrect dset if it is as we couldn't get it before | |
| 1096 #else we test the types and rollback | |
| 1097 $self->log("Found stored orphan FeatureSet:\t".$fset->name); | |
| 1098 | |
| 1099 my $stored_dset = $dset_adaptor->fetch_by_product_FeatureSet($fset); | |
| 1100 | |
| 1101 if(defined $stored_dset){ | |
| 1102 throw('Found FeatureSet('.$name.') associated with incorrect DataSet('.$stored_dset->name. | |
| 1103 ").\nTry using another -name in the set parameters hash"); | |
| 1104 | |
| 1105 } | |
| 1106 } | |
| 1107 } | |
| 1108 | |
| 1109 #Rollback or create FeatureSet | |
| 1110 if(defined $fset){ | |
| 1111 | |
| 1112 if($rollback_level){ | |
| 1113 #Don't check for IMPORTED here as we want to rollback anyway | |
| 1114 #Not forcing delete here as this may be used as a supporting set itself. | |
| 1115 | |
| 1116 $self->rollback_FeatureSet($fset, undef, $slices); | |
| 1117 } | |
| 1118 elsif ($append || $recovery) { | |
| 1119 #This is only true if we have an sset mismatch | |
| 1120 | |
| 1121 #Do we need to revoke IMPORTED here too? | |
| 1122 #This behaves differently dependant on the supporting set. | |
| 1123 #InputSet status refers to loading in FeatureSet, where as ResultSet status refers to loading into result table | |
| 1124 | |
| 1125 #So we really want to revoke it | |
| 1126 #But this leaves us vulnerable to losing data if the import crashes after this point | |
| 1127 #because we have no way of assesing which is complete data and which is incomplete data | |
| 1128 #within a feature set. | |
| 1129 #This means we need a status on supporting_set, not InputSet or ResultSet | |
| 1130 #as this has to be in the context of a dataset. | |
| 1131 #Grrr, this means we need a SupportingSet class which simply wraps the InputSet/ResultSet | |
| 1132 #We also need a single dbID for the supporting_set table | |
| 1133 #Which means we will have to do some wierdity with the normal dbID implementation | |
| 1134 #i.e. Have supporting_set_id, so we can still access all the normal dbID method for the given Set class | |
| 1135 #This will have to be hardcoded into the state methods | |
| 1136 #Also will need to specify when we want to store as supporting_status or normal set status. | |
| 1137 | |
| 1138 #This is an awful lot to protect against vulnerability | |
| 1139 #Also as there easy way to track what features came from which supporting set | |
| 1140 #There isn't currently a viable way to rollback, hence will have to redo the whole set. | |
| 1141 | |
| 1142 #Maybe we can enforce this by procedure? | |
| 1143 #By simply not associating the supporting set until it has been loaded into the feature set? | |
| 1144 #This may cause even more tracking problems | |
| 1145 | |
| 1146 #Right then, simply warn and do not revoke feature_set IMPORTED to protect old data? | |
| 1147 #Parsers should identify supporting_sets(InputSets) which exist but do not have IMPORTED | |
| 1148 #status and fail, specifying -recover which will rollback_FeatureSet which will revoke the IMPORTED status | |
| 1149 | |
| 1150 #This can mean a failed import can leave a partially imported feature set with the IMPORTED status!!! | |
| 1151 | |
| 1152 #We just need to handle InputSets and ResultSets differently. | |
| 1153 #In parsers or here? | |
| 1154 #Probably best in the parsers as this is where the states are set. | |
| 1155 | |
| 1156 | |
| 1157 #Should we throw here for ResultSet? | |
| 1158 #Force rollback of FeatureSet first or create new one? | |
| 1159 #And throw for InputSet? | |
| 1160 #This again comes back to whether we will ever have more than one file | |
| 1161 #for a give InputSet, currently not. | |
| 1162 | |
| 1163 $self->log("WARNING\t::\tAdding data to a extant FeatureSet:\t".$fset->name); | |
| 1164 } else { | |
| 1165 throw('Found extant FeatureSet '.$fset->name.'. Maybe you want to specify the rollback, append or recovery parameter or roll back the FeatureSet separately?'); | |
| 1166 } | |
| 1167 } else { | |
| 1168 #create a new one | |
| 1169 $self->log("Creating new FeatureSet:\t".$name); | |
| 1170 | |
| 1171 $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new( | |
| 1172 -name => $name, | |
| 1173 -feature_type => $ftype, | |
| 1174 -cell_type => $ctype, | |
| 1175 -analysis => $anal, | |
| 1176 -feature_class => $type, | |
| 1177 -description => $description, | |
| 1178 -display_label => $display_label, | |
| 1179 ); | |
| 1180 ($fset) = @{$fset_adaptor->store($fset)}; | |
| 1181 } | |
| 1182 } | |
| 1183 | |
| 1184 #Create/Update the DataSet | |
| 1185 if(defined $dset){ | |
| 1186 #Could do these updates above? | |
| 1187 #But delayed to reduce redundancy | |
| 1188 | |
| 1189 if($type ne 'result'){ | |
| 1190 | |
| 1191 if(! defined $dset->product_FeatureSet){ | |
| 1192 $self->log("Updating DataSet with new product FeatureSet:\t".$fset->name); | |
| 1193 $dset->product_FeatureSet($fset); | |
| 1194 } | |
| 1195 | |
| 1196 $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0]; | |
| 1197 #This cannot store the focus sets as we don't know which are which yet | |
| 1198 #Only the script knows this | |
| 1199 # $dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; | |
| 1200 } | |
| 1201 else{ | |
| 1202 #We may have the case where we have a DataSet(with a FeatureSet) but no ResultSet | |
| 1203 #i.e. Load result_features after peak calls | |
| 1204 #So update dset with ResultSet | |
| 1205 | |
| 1206 if(! @{$dset->get_supporting_sets}){ | |
| 1207 $self->log("Updating DataSet with new ResultSet:\t".$rset->name); | |
| 1208 $dset->add_supporting_sets([$rset]); | |
| 1209 $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0]; | |
| 1210 } | |
| 1211 } | |
| 1212 } | |
| 1213 else{ | |
| 1214 $self->log("Creating new ${type}_feature DataSet:\t".$name); | |
| 1215 | |
| 1216 if($type ne 'result'){ | |
| 1217 ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new | |
| 1218 ( | |
| 1219 -name => $name, | |
| 1220 -feature_set => $fset, | |
| 1221 -supporting_sets => $ssets, | |
| 1222 ))}; | |
| 1223 #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; | |
| 1224 } | |
| 1225 else{ | |
| 1226 warn "creating dataset $name with supporting set $rset"; | |
| 1227 ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new | |
| 1228 ( | |
| 1229 -name => $name, | |
| 1230 -supporting_sets => [$rset], | |
| 1231 ))}; | |
| 1232 } | |
| 1233 } | |
| 1234 | |
| 1235 return $dset; | |
| 1236 } | |
| 1237 | |
| 1238 | |
| 1239 #Rollback/load methods migrated from DBAdaptor | |
| 1240 #Move to SetAdaptors, better located and will remove cyclical dependancy | |
| 1241 | |
| 1242 =head2 rollback_FeatureSet | |
| 1243 | |
| 1244 Arg [0] : Bio::EnsEMBL::Funcgen::FeatureSet | |
| 1245 Arg [1] : optional - boolean force delete flag, if this FeatureSet is use as a support | |
| 1246 for another DataSet. | |
| 1247 Arg [2] : optional - arrayref of Bio::EnsEMBL::Slice objects to rollback | |
| 1248 Arg [3] : optional - boolean flag to perform full rollback i.e. default will just remove feature | |
| 1249 specifying this with also delete the feature_set record | |
| 1250 Example : $self->rollback_FeatureSet($fset); | |
| 1251 Description: Deletes all status and feature entries for this FeatureSet. | |
| 1252 Checks whether FeatureSet is a supporting set in any other DataSet. | |
| 1253 Returntype : none | |
| 1254 Exceptions : Throws if any deletes fails or if db method unavailable | |
| 1255 Caller : Importers and Parsers | |
| 1256 Status : At risk | |
| 1257 | |
| 1258 =cut | |
| 1259 | |
| 1260 | |
| 1261 sub rollback_FeatureSet{ | |
| 1262 my ($self, $fset, $force_delete, $slices, $full_delete) = @_; | |
| 1263 | |
| 1264 #Remove force delete and just throw? | |
| 1265 #Currently only used in project_feature_set. | |
| 1266 #May want to keep an old RegBuild for mapping/comparison? | |
| 1267 #Coudl get around this by simply deleting the data_set? Unknown impact. | |
| 1268 #Move to config hash? | |
| 1269 #No need for rollback_level here as we always want to do the same thing | |
| 1270 | |
| 1271 my ($sql, $slice_name); | |
| 1272 my $slice_join = ''; | |
| 1273 my $table = $fset->feature_class.'_feature'; | |
| 1274 my $adaptor = $fset->adaptor || throw('FeatureSet must have an adaptor'); | |
| 1275 my $db = $adaptor->db; | |
| 1276 #Cyclical dpendancy here, so not strictly necessary. | |
| 1277 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset); | |
| 1278 | |
| 1279 | |
| 1280 $self->log_header('Rolling back '.$fset->feature_class." FeatureSet:\t".$fset->name); | |
| 1281 | |
| 1282 if($slices){ | |
| 1283 | |
| 1284 if($full_delete){ | |
| 1285 throw("Cannot specify a full_delete for a Slice based rollback:\t".$fset->name); | |
| 1286 } | |
| 1287 | |
| 1288 | |
| 1289 if(! ref($slices) eq 'ARRAY'){ | |
| 1290 throw('Slices must be an ARRAYREF of Slice objects'); | |
| 1291 } | |
| 1292 | |
| 1293 map { throw("Must pass a valid Bio::EnsEMBL::Slice") if (! (ref($_) && $_->isa('Bio::EnsEMBL::Slice'))) } @$slices; | |
| 1294 $self->log("Restricting to slices:\n\t\t".join("\n\t\t", (map { $_->name } @$slices)) ); | |
| 1295 #Allow subslice rollback only for one slice at a time | |
| 1296 my $subslice = (scalar(@$slices) == 1) ? 1 : 0; | |
| 1297 my @sr_ids; | |
| 1298 | |
| 1299 foreach my $slice(@$slices){ | |
| 1300 my $efg_sr_id = $fset->get_FeatureAdaptor->get_seq_region_id_by_Slice($slice); | |
| 1301 | |
| 1302 if(! $efg_sr_id){ | |
| 1303 $self->log("Slice is not present in eFG DB:\t".$slice->name); | |
| 1304 }else{ | |
| 1305 | |
| 1306 if(! $subslice){#Test is not subslice | |
| 1307 my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name); | |
| 1308 | |
| 1309 if(($slice->start != 1) || | |
| 1310 ($full_slice->end != $slice->end)){ | |
| 1311 throw("Can only rollback subslices one at a time:\nRollback slice:\t" | |
| 1312 .$slice->name."\nFull slice:\t".$full_slice->name); | |
| 1313 } | |
| 1314 } | |
| 1315 | |
| 1316 push @sr_ids, $efg_sr_id; | |
| 1317 } | |
| 1318 } | |
| 1319 | |
| 1320 if(scalar(@sr_ids) == 1){ | |
| 1321 #Allow sub slice rollback | |
| 1322 #add range here from meta coord? | |
| 1323 $slice_join = " and f.seq_region_id=$sr_ids[0] and f.seq_region_start<=".$slices->[0]->end.' and f.seq_region_end>='.$slices->[0]->start; | |
| 1324 } | |
| 1325 else{ | |
| 1326 $slice_join = ' and f.seq_region_id in ('.join(', ', @sr_ids).')'; | |
| 1327 } | |
| 1328 } | |
| 1329 | |
| 1330 | |
| 1331 | |
| 1332 #Check whether this is a supporting set for another data_set | |
| 1333 my @dsets = @{$db->get_DataSetAdaptor->fetch_all_by_supporting_set($fset)}; | |
| 1334 | |
| 1335 if(@dsets){ | |
| 1336 my $txt = $fset->name." is a supporting set of the following DataSets:\t".join(', ', (map {$_->name} @dsets)); | |
| 1337 | |
| 1338 if($force_delete){ | |
| 1339 $self->log("WARNING:\t$txt\n"); | |
| 1340 } | |
| 1341 else{ | |
| 1342 throw($txt."\nPlease resolve or specify the force_delete argument") | |
| 1343 } | |
| 1344 } | |
| 1345 | |
| 1346 #Remove states | |
| 1347 if(! $slices){ | |
| 1348 $fset->adaptor->revoke_states($fset); | |
| 1349 | |
| 1350 #Revoke InputSet states here as this refers to whether | |
| 1351 #they are imported in the FeatureSet | |
| 1352 #Do this in FeatureSet->revoke_states? | |
| 1353 | |
| 1354 my $dset = $db->get_DataSetAdaptor->fetch_by_product_FeatureSet($fset); | |
| 1355 | |
| 1356 #Account for absent dset if we have an external_feature set | |
| 1357 | |
| 1358 if((! defined $dset) && | |
| 1359 $fset->feature_class ne 'external'){ | |
| 1360 warn "WARNING:\tFeatureSet ".$fset->name." does not have an associated DataSet. Rollback may be incomplete"; | |
| 1361 } | |
| 1362 | |
| 1363 if($dset){ | |
| 1364 | |
| 1365 foreach my $sset(@{$dset->get_supporting_sets}){ | |
| 1366 #Maybe skip this if we defined slice? | |
| 1367 | |
| 1368 #??? Do we want to do this? | |
| 1369 #This is dependant on the feature_class of the InputSet | |
| 1370 #result InputSets may have been imported as ResultFeatureCollections | |
| 1371 #So we want to leave those in place | |
| 1372 #annotated feature_class InputSets are directly imports, so the status of these refers | |
| 1373 #to the FeatureSet import status | |
| 1374 #Where is the imported status set for SWEmbl? | |
| 1375 | |
| 1376 if(($sset->feature_class eq 'annotated') && | |
| 1377 $sset->isa('Bio::EnsEMBL::Funcgen::InputSet')){ | |
| 1378 | |
| 1379 $self->rollback_InputSet($sset) if $sset->isa('Bio::EnsEMBL::Funcgen::InputSet'); | |
| 1380 $self->rollback_InputSet($sset);#add full delete here? | |
| 1381 #Do not want to rollback here for other type of sset | |
| 1382 } | |
| 1383 } | |
| 1384 } | |
| 1385 } | |
| 1386 else{ | |
| 1387 $self->log('Skipping '.$fset->name.' revoke_states for partial Slice rollback, maybe revoke IMPORTED? '); | |
| 1388 } | |
| 1389 | |
| 1390 #should add some log statements here? | |
| 1391 | |
| 1392 my $row_cnt; | |
| 1393 | |
| 1394 #Rollback reg attributes | |
| 1395 if($fset->feature_class eq 'regulatory'){ | |
| 1396 $sql = "DELETE ra from regulatory_attribute ra, $table f where f.${table}_id=ra.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join; | |
| 1397 $self->rollback_table($sql, 'regulatory_attribute', undef, $db); | |
| 1398 | |
| 1399 | |
| 1400 | |
| 1401 if($full_delete){ | |
| 1402 #Now delete meta entries | |
| 1403 #This is messy as we use the following meta_key nomencalture | |
| 1404 #which do not match the fset names | |
| 1405 #regbuild.feature_set_ids_v5 | |
| 1406 #regbuild.feature_type_ids_v5 | |
| 1407 #regbuild.focus_feature_set_ids | |
| 1408 #regbuild.initial_release_date_v6 | |
| 1409 #regbuild.last_annotation_update_v6 | |
| 1410 #regbuild.version NEED TO ADD THIS | |
| 1411 #Also need to revise how these are generated by build_reg_feats. | |
| 1412 #WHat about new cell_type level feature sets? | |
| 1413 #How will we model these in the meta table? | |
| 1414 | |
| 1415 warn "Need to revise meta table entries before we add a delete here, remove manually for now for:\t".$fset->name; | |
| 1416 | |
| 1417 #We would only remove meta entries if we are performing a full rollback | |
| 1418 my $version; | |
| 1419 ($version = $fset->name) =~ s/.*_v([0-9]+)$/$1/; | |
| 1420 $version = ($version eq $fset->name) ? '' : "_v${version}"; | |
| 1421 | |
| 1422 #These are versionless meta_keys and apply to all sets | |
| 1423 #handle these in reg build script | |
| 1424 #'regbuild.initial_release_date', | |
| 1425 #'regbuild.last_annotation_update' | |
| 1426 #'regbuild.version' | |
| 1427 | |
| 1428 foreach my $mkey('regbuild.%s.feature_set_ids', | |
| 1429 'regbuild.%s.feature_type_ids', | |
| 1430 'regbuild.%s.focus_feature_set_ids'){ | |
| 1431 | |
| 1432 my $meta_key = sprintf($mkey, $fset->cell_type->name).$version; | |
| 1433 $sql = "DELETE from meta where meta_key='${meta_key}'"; | |
| 1434 $self->rollback_table($sql, 'meta', undef, $db); | |
| 1435 } | |
| 1436 } | |
| 1437 } | |
| 1438 | |
| 1439 | |
| 1440 #Need to remove object xrefs here | |
| 1441 #Do not remove xrefs as these may be used by something else! | |
| 1442 $sql = "DELETE ox from object_xref ox, $table f where ox.ensembl_object_type='".ucfirst($fset->feature_class)."Feature' and ox.ensembl_id=f.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join; | |
| 1443 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db); | |
| 1444 | |
| 1445 | |
| 1446 #Remove associated_feature_type records | |
| 1447 #Do not remove actual feature_type records as they may be used by something else. | |
| 1448 | |
| 1449 $sql ="DELETE aft from associated_feature_type aft, $table f where f.feature_set_id=".$fset->dbID." and f.${table}_id=aft.table_id and aft.table_name='".$fset->feature_class."_feature'".$slice_join; | |
| 1450 $self->rollback_table($sql, 'associated_feature_type', undef, $db); | |
| 1451 | |
| 1452 | |
| 1453 | |
| 1454 #Remove features | |
| 1455 $sql = "DELETE f from $table f where f.feature_set_id=".$fset->dbID.$slice_join; | |
| 1456 $self->rollback_table($sql, $table, "${table}_id", $db); | |
| 1457 | |
| 1458 if($full_delete){ #Also delete feature/data_set records | |
| 1459 | |
| 1460 $sql = "DELETE from feature_set where feature_set_id=".$fset->dbID; | |
| 1461 $self->rollback_table($sql, 'feature_set', 'feature_set_id', $db); | |
| 1462 $self->log("Deleted feature_set entry for:\t".$fset->name); | |
| 1463 | |
| 1464 | |
| 1465 $sql = "DELETE from data_set where feature_set_id=".$fset->dbID; | |
| 1466 $self->rollback_table($sql, 'data_set', 'data_set_id', $db); | |
| 1467 $self->log("Deleted associated data_set entry for:\t".$fset->name); | |
| 1468 } | |
| 1469 | |
| 1470 return; | |
| 1471 } | |
| 1472 | |
| 1473 | |
| 1474 =head2 rollback_ResultSet | |
| 1475 | |
| 1476 Arg[1] : Bio::EnsEMBL::Funcgen::ResultSet | |
| 1477 Arg[2] : Boolean - optional flag to roll back array results | |
| 1478 Example : $self->rollback_ResultSet($rset); | |
| 1479 Description: Deletes all status. chip_channel and result_set entries for this ResultSet. | |
| 1480 Will also rollback_results sets if rollback_results specified. This will also | |
| 1481 update or delete associated ResultSets where appropriate. | |
| 1482 Returntype : Arrayref containing the ResultSet and associated DataSet which have not been rolled back | |
| 1483 Exceptions : Throws if ResultSet not valid | |
| 1484 Throws is result_rollback flag specified but associated product FeatureSet found. | |
| 1485 Caller : General | |
| 1486 Status : At risk | |
| 1487 | |
| 1488 =cut | |
| 1489 | |
| 1490 #Need to change slice to slices ref here | |
| 1491 #Need to add full rollback, which will specify to remove all sets | |
| 1492 #as well as results and | |
| 1493 #These params need clarifying as their nature changes between input_set and array rsets | |
| 1494 #Don't we always want to rollback_results? | |
| 1495 #force should only really be used to rollback InputSet ResultFeature sets | |
| 1496 #i.e. Read collections which are not used as direct input for the linked product FeatureSet | |
| 1497 #This should fail with array data associated with a product feature set | |
| 1498 | |
| 1499 #Do we want to separate ResultFeature rollback from result rollback? | |
| 1500 #Currently the array based collection rollback is done by hand | |
| 1501 #Could be done via the ResultFeature Collector, but should probably use this method. | |
| 1502 | |
| 1503 | |
| 1504 #rollback_results is only used in the MAGE parser to identify sets which have an | |
| 1505 #associated product fset. | |
| 1506 #Can't really separate due to integrated functionality | |
| 1507 | |
| 1508 sub rollback_ResultSet{ | |
| 1509 my ($self, $rset, $rollback_results, $slice, $force, $full_delete) = @_; | |
| 1510 | |
| 1511 if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){ | |
| 1512 throw('Must provide a valid stored Bio::EnsEMBL::ResultSet'); | |
| 1513 } | |
| 1514 | |
| 1515 if($slice && $rset->table_name ne 'input_set'){ | |
| 1516 throw('Can only rollback_ResultSet by Slice if the ResultSet contains InputSets'); | |
| 1517 } | |
| 1518 | |
| 1519 #We're still validating against itself?? | |
| 1520 #And reciprocating part of the test :| | |
| 1521 my $sql; | |
| 1522 my $db = $rset->adaptor->db;#This needs to be tested | |
| 1523 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset); | |
| 1524 $self->log("Rolling back ResultSet:\t".$rset->name); | |
| 1525 my $dset_adaptor = $self->db->get_DataSetAdaptor; | |
| 1526 my $rset_adaptor = $self->db->get_ResultSetAdaptor; | |
| 1527 my @skipped_sets; | |
| 1528 | |
| 1529 ### Check if this ResultSet is part of a DataSet with a product feature set | |
| 1530 | |
| 1531 foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($rset)}){ | |
| 1532 | |
| 1533 if (defined $dset){ | |
| 1534 $self->log('Found linked DataSet('.$dset->name.") for ResultSet:\t".$rset->log_label); | |
| 1535 | |
| 1536 if(my $fset = $dset->product_FeatureSet){ | |
| 1537 @skipped_sets = ($rset,$dset); | |
| 1538 | |
| 1539 #What impact does this have on result_rollback? | |
| 1540 #None as we never get there | |
| 1541 #But what if we have specified rollback results? | |
| 1542 #We should throw here as we can't perform the rollback | |
| 1543 | |
| 1544 if($rollback_results){ | |
| 1545 | |
| 1546 if($rset->table_name ne 'input_set' || | |
| 1547 (! $force)){#is an input_set/reads collection | |
| 1548 #This will always throws for non-input_set ResultSets | |
| 1549 | |
| 1550 throw("Could not rollback supporting ResultSet and results for:\t".$rset->log_label. | |
| 1551 "\nEither manually resolve the supporting/feature set relationship or set the 'force' flag.\n"); | |
| 1552 # ."Alternatively omit the rollback_results argument if you simply want to redefine the ResultSet without loading any new data"); | |
| 1553 #This last bit is no longer true | |
| 1554 #Remove rollback_results? | |
| 1555 } | |
| 1556 else{ | |
| 1557 @skipped_sets = (); | |
| 1558 $self->log("Forcing results rollback for InputSet based ResultSet:\t".$rset->log_label); | |
| 1559 } | |
| 1560 } | |
| 1561 | |
| 1562 if(@skipped_sets){ | |
| 1563 $self->log('Skipping rollback. Found product FeatureSet('.$fset->name.") for supporting ResultSet:\t".$rset->log_label); | |
| 1564 } | |
| 1565 | |
| 1566 } | |
| 1567 elsif((! defined $slice) && | |
| 1568 $full_delete){ | |
| 1569 #Found rset in dset, but not yet processed so can remove safely. | |
| 1570 $self->unlink_ResultSet_DataSet($rset, $dset); | |
| 1571 } | |
| 1572 } | |
| 1573 } | |
| 1574 | |
| 1575 | |
| 1576 #Now do similar for all associated ResultSets | |
| 1577 if(! @skipped_sets){ | |
| 1578 | |
| 1579 | |
| 1580 #Rollback results if required | |
| 1581 if($rollback_results){ | |
| 1582 | |
| 1583 $self->log("Rolling back results for ResultSet:\t".$rset->log_label); | |
| 1584 #Check result_set_input_ids are present in other result sets. | |
| 1585 my @assoc_rsets = @{$rset_adaptor->fetch_all_linked_by_ResultSet($rset)}; | |
| 1586 my $feature_supporting = 0; | |
| 1587 | |
| 1588 foreach my $assoc_rset(@assoc_rsets){ | |
| 1589 | |
| 1590 foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($assoc_rset)}){ | |
| 1591 | |
| 1592 #Check for other product_FeatureSets | |
| 1593 if(my $fset = $dset->product_FeatureSet){ | |
| 1594 $feature_supporting++; | |
| 1595 $self->log('Found product FeatureSet('.$fset->name. | |
| 1596 ") for associated supporting ResultSet:\t".$rset->log_label); | |
| 1597 | |
| 1598 if($rset->table_name ne 'input_set' || | |
| 1599 (! $force)){#is an input_set/reads collection | |
| 1600 $feature_supporting++; | |
| 1601 } | |
| 1602 } | |
| 1603 } | |
| 1604 } | |
| 1605 | |
| 1606 | |
| 1607 if(! $feature_supporting){ | |
| 1608 | |
| 1609 #RollBack result_feature table first | |
| 1610 $self->rollback_ResultFeatures($rset, $slice); | |
| 1611 | |
| 1612 #Now rollback other states | |
| 1613 $rset->adaptor->revoke_states($rset); | |
| 1614 | |
| 1615 | |
| 1616 #This also handles Echip status rollback | |
| 1617 if ($rset->table_name ne 'input_set'){ | |
| 1618 $self->log("Rolling back result table for ResultSet:\t".$rset->log_label); | |
| 1619 $self->rollback_results($rset->result_set_input_ids); | |
| 1620 } | |
| 1621 | |
| 1622 $self->log('Removing result_set_input entries from associated ResultSets') if @assoc_rsets; | |
| 1623 | |
| 1624 if((! $slice) && | |
| 1625 $full_delete){ | |
| 1626 | |
| 1627 #Now remove result_set_input_ids from associated rsets. | |
| 1628 foreach my $assoc_rset(@assoc_rsets){ | |
| 1629 $sql = 'DELETE from result_set_input where result_set_id='.$assoc_rset->dbID. | |
| 1630 ' and result_set_input_id in('.join', ', @{$assoc_rset->result_set_input_ids}.')'; | |
| 1631 $db->dbc->do($sql); | |
| 1632 | |
| 1633 # we need to delete complete subsets from the result_set table. | |
| 1634 my $subset = 1; | |
| 1635 | |
| 1636 foreach my $cc_id(@{$assoc_rset->result_set_input_ids}){ | |
| 1637 | |
| 1638 if(! grep { /$cc_id/ } @{$rset->result_set_input_ids}){ | |
| 1639 $subset = 0; | |
| 1640 last; | |
| 1641 } | |
| 1642 } | |
| 1643 | |
| 1644 #$assoc_rset is complete subset of $rset so can delete | |
| 1645 #We know this does not have an assoicated product feature set | |
| 1646 #Only if it is not derived from an input_set | |
| 1647 if($subset){ | |
| 1648 $self->log("Deleting associated subset ResultSet:\t".$assoc_rset->log_label); | |
| 1649 | |
| 1650 #Delete status entries first | |
| 1651 $assoc_rset->adaptor->revoke_states($assoc_rset); | |
| 1652 | |
| 1653 #All cc records will have already been deleted | |
| 1654 $sql = 'DELETE from result_set where result_set_id='.$assoc_rset->dbID; | |
| 1655 $db->dbc->do($sql); | |
| 1656 } | |
| 1657 } | |
| 1658 } | |
| 1659 | |
| 1660 | |
| 1661 #Now warn about Echips in Experiments which may need removing. | |
| 1662 if($rset->table_name ne 'input_set'){ | |
| 1663 my %experiment_chips; | |
| 1664 | |
| 1665 foreach my $echip(@{$rset->get_ExperimentalChips}){ | |
| 1666 $experiment_chips{$echip->experiment->name}{$echip->unique_id} = undef; | |
| 1667 } | |
| 1668 | |
| 1669 foreach my $exp(keys %experiment_chips){ | |
| 1670 $self->log("Experiment $exp has had ".scalar(values %{$experiment_chips{$exp}}). | |
| 1671 " ExperimentalChips rolled back:\t".join('; ', values %{$experiment_chips{$exp}}). | |
| 1672 ".\nTo fully remove these, use the rollback_experiment.pl (with -chip_ids) script"); | |
| 1673 } | |
| 1674 } | |
| 1675 else{ | |
| 1676 #Should only be one to rollback | |
| 1677 foreach my $iset(@{$rset->get_InputSets}){ | |
| 1678 $self->rollback_InputSet($iset); | |
| 1679 } | |
| 1680 } | |
| 1681 } | |
| 1682 else{ | |
| 1683 #$self->log("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label); | |
| 1684 #warn("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label); | |
| 1685 #do we need to return this info in skipped_rsets? | |
| 1686 #This is just to allow importer to know which ones | |
| 1687 #weren't rolled back to avoid naming clashes. | |
| 1688 #so no. | |
| 1689 | |
| 1690 #But the results persist on the same chip_channel_ids | |
| 1691 #So not returning this rset may result in loading of more data | |
| 1692 #This should fail as status entries will not have been removed | |
| 1693 #Still we should throw here as we'll most likely want to manually resolve this | |
| 1694 #Besides this would be obfuscating the function | |
| 1695 | |
| 1696 throw("Could not rollback ResultSet and results, found $feature_supporting associated supporting ". | |
| 1697 "ResultSets for:\t".$rset->log_label."\nManually resolve the supporting/feature set relationship or omit the ". | |
| 1698 "rollback_results argument if you simply want to redefine the ResultSet without loading any new data"); | |
| 1699 } | |
| 1700 } | |
| 1701 else{ | |
| 1702 $self->log('Skipping results rollback'); | |
| 1703 | |
| 1704 if($rset->name =~ /_IMPORT$/){ | |
| 1705 throw("Rolling back an IMPORT set without rolling back the result can result in ophaning result records for a whole experiment. Specify the result_rollback flag if you want to rollback the results for:\t".$rset->log_label); | |
| 1706 } | |
| 1707 } | |
| 1708 | |
| 1709 #Delete chip_channel and result_set records | |
| 1710 #This should only be done with full delete | |
| 1711 if((! $slice) && | |
| 1712 $full_delete){ | |
| 1713 $sql = 'DELETE from result_set_input where result_set_id='.$rset->dbID; | |
| 1714 $self->rollback_table($sql, 'result_set_input', 'result_set_input_id', $db); | |
| 1715 | |
| 1716 $sql = 'DELETE from result_set where result_set_id='.$rset->dbID; | |
| 1717 $db->dbc->do($sql); | |
| 1718 $self->rollback_table($sql, 'result_set', 'result_set_id', $db); | |
| 1719 } | |
| 1720 } | |
| 1721 | |
| 1722 return \@skipped_sets; | |
| 1723 } | |
| 1724 | |
| 1725 | |
| 1726 | |
| 1727 sub unlink_ResultSet_DataSet{ | |
| 1728 my ($self, $rset, $dset, $new_name) = @_; | |
| 1729 | |
| 1730 #validate set vars | |
| 1731 | |
| 1732 my $db = $rset->adaptor->db; | |
| 1733 | |
| 1734 $self->log("Removing supporting ResultSet from DataSet:\t".$dset->name."\tResultSet:".$rset->log_label); | |
| 1735 my $sql = 'DELETE from supporting_set where data_set_id='.$dset->dbID. | |
| 1736 ' and type="result" and supporting_set_id='.$rset->dbID; | |
| 1737 | |
| 1738 warn "Removing ".$rset->log_label." as a supporting set to DataSet:\t".$dset->name. | |
| 1739 "\nThis may result in a DataSet with no supporting sets"; | |
| 1740 $db->dbc->do($sql); | |
| 1741 | |
| 1742 if($new_name){ | |
| 1743 #We risk overwriting any previously renamed result sets. | |
| 1744 #Should use datestamp? | |
| 1745 $sql = 'UPDATE result_set set name="OLD_'.$rset->name.'" where result_set_id='.$rset->dbID; | |
| 1746 $self->db->dbc->do($sql); | |
| 1747 | |
| 1748 if($dset->product_FeatureSet){ | |
| 1749 $self->log('Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data'); | |
| 1750 warn 'Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data'; | |
| 1751 } | |
| 1752 } | |
| 1753 | |
| 1754 return; | |
| 1755 } | |
| 1756 | |
| 1757 =head2 rollback_InputSet | |
| 1758 | |
| 1759 Arg[1] : Bio::EnsEMBL::Funcgen::InputSet | |
| 1760 Example : $self->rollback_InputSet($eset); | |
| 1761 Description: Deletes all status entries for this InputSet and it's Subsets | |
| 1762 Returntype : none | |
| 1763 Exceptions : Throws if any deletes fails or if db method unavailable | |
| 1764 Caller : Importers and Parsers | |
| 1765 Status : At risk | |
| 1766 | |
| 1767 =cut | |
| 1768 | |
| 1769 | |
| 1770 sub rollback_InputSet{ | |
| 1771 my ($self, $eset, $force_delete, $full_delete) = @_; | |
| 1772 | |
| 1773 | |
| 1774 #Need to implement force_delete!!!!!!!!!!!!!!!!!!!!!! | |
| 1775 #Need to check this is not used in a DataSet/ResultSet | |
| 1776 | |
| 1777 my $adaptor = $eset->adaptor || throw('InputSet must have an adaptor'); | |
| 1778 my $db = $adaptor->db; | |
| 1779 | |
| 1780 | |
| 1781 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::InputSet', $eset); | |
| 1782 | |
| 1783 $self->log("Rolling back InputSet:\t".$eset->name); | |
| 1784 | |
| 1785 #SubSets | |
| 1786 foreach my $esset(@{$eset->get_InputSubsets}){ | |
| 1787 $esset->adaptor->revoke_states($esset); | |
| 1788 } | |
| 1789 | |
| 1790 #InputSet | |
| 1791 $eset->adaptor->revoke_states($eset); | |
| 1792 | |
| 1793 return; | |
| 1794 } | |
| 1795 | |
| 1796 | |
| 1797 =head2 rollback_results | |
| 1798 | |
| 1799 Arg[1] : Arrayref of chip_channel ids | |
| 1800 Example : $self->rollback_results($rset->chip_channels_ids); | |
| 1801 Description: Deletes all result records for the given chip_channel ids. | |
| 1802 Also deletes all status records for associated experimental_chips or channels | |
| 1803 Returntype : None | |
| 1804 Exceptions : Throws if no chip_channel ids provided | |
| 1805 Caller : General | |
| 1806 Status : At risk | |
| 1807 | |
| 1808 =cut | |
| 1809 | |
| 1810 #changed implementation to take arrayref | |
| 1811 | |
| 1812 sub rollback_results{ | |
| 1813 my ($self, $cc_ids) = @_; | |
| 1814 | |
| 1815 my @cc_ids = @{$cc_ids}; | |
| 1816 | |
| 1817 #Need to test for $self->db here? | |
| 1818 | |
| 1819 | |
| 1820 if(! scalar(@cc_ids) >0){ | |
| 1821 throw('Must pass an array ref of result_set_input_ids to rollback'); | |
| 1822 } | |
| 1823 | |
| 1824 #Rollback status entries | |
| 1825 #Cannot use revoke_states here? | |
| 1826 #We can if we retrieve the Chip or Channel first | |
| 1827 #Add to ResultSet adaptor | |
| 1828 my $sql = 'DELETE s from status s, result_set_input rsi WHERE rsi.result_set_input_id IN ('.join(',', @cc_ids). | |
| 1829 ') AND rsi.table_id=s.table_id AND rsi.table_name=s.table_name'; | |
| 1830 | |
| 1831 if(! $self->db->dbc->do($sql)){ | |
| 1832 throw("Status rollback failed for result_set_input_ids:\t@cc_ids\n".$self->db->dbc->db_handle->errstr()); | |
| 1833 } | |
| 1834 | |
| 1835 | |
| 1836 #Rollback result entries | |
| 1837 $sql = 'DELETE from result where result_set_input_id in ('.join(',', @cc_ids).');'; | |
| 1838 $self->rollback_table($sql, 'result', 'result_id', $self->db); | |
| 1839 return; | |
| 1840 } | |
| 1841 | |
| 1842 | |
| 1843 =head2 rollback_ResultFeatures | |
| 1844 | |
| 1845 Arg[0] : Bio::EnsEMBL::Funcgen::ResultSet | |
| 1846 Arg[1] : Optional - Bio::EnsEMBL::Slice | |
| 1847 Arg[2] : Optional - no_revoke Boolean. This is only used when generating new windows | |
| 1848 from a 0 window size which has been projected from a previous assembly. | |
| 1849 Example : $self->rollback_result_features($rset); | |
| 1850 Description: Deletes all result_feature records for the given ResultSet. | |
| 1851 Also deletes 'RESULT_FEATURE_SET' status. | |
| 1852 Returntype : None | |
| 1853 Exceptions : Throws if ResultSet not provided | |
| 1854 Caller : General | |
| 1855 Status : At risk | |
| 1856 | |
| 1857 =cut | |
| 1858 | |
| 1859 | |
| 1860 sub rollback_ResultFeatures{ | |
| 1861 my ($self, $rset, $slice, $no_revoke) = @_; | |
| 1862 | |
| 1863 if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){ | |
| 1864 throw('Must provide a valid stored Bio::EnsEMBL::ResultSet'); | |
| 1865 } | |
| 1866 | |
| 1867 if(! $slice && $no_revoke){ | |
| 1868 throw("Cannot rollback_ResultFeatures with no_reovke unless you specify a Slice"); | |
| 1869 } | |
| 1870 #else warn if slice and no_revoke? | |
| 1871 | |
| 1872 my ($sql, $slice_name, $slice_constraint); | |
| 1873 | |
| 1874 if($slice){ | |
| 1875 | |
| 1876 if(ref($slice) && $slice->isa('Bio::EnsEMBL::Slice')){ | |
| 1877 my $sr_id = $rset->adaptor->db->get_ResultFeatureAdaptor->get_seq_region_id_by_Slice($slice); | |
| 1878 | |
| 1879 if($sr_id){ | |
| 1880 | |
| 1881 #Need to test for full slice here | |
| 1882 my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name); | |
| 1883 $slice_name = "\t".$slice->name; | |
| 1884 $slice_constraint = ' and seq_region_id='.$sr_id; | |
| 1885 | |
| 1886 if(($slice->start != 1) || | |
| 1887 ($slice->end != $full_slice->end)){ | |
| 1888 | |
| 1889 throw("rollback_ResultFeatures does not yet support non-full length Slices:\t".$slice_name); | |
| 1890 | |
| 1891 #Need to test whether we have non-0 wsize collections without the exact seq_region values | |
| 1892 #$sql='SELECT window_size from result_feature where result_feature_id='.$rset->dbID. | |
| 1893 # ' and window_size!=0 and seq_region_start!='.$slice->start.' and seq_region_end!='.$slice->end.$slice_constraint; | |
| 1894 } | |
| 1895 } | |
| 1896 else{#seq_region is not yet present in DB | |
| 1897 return; | |
| 1898 } | |
| 1899 } | |
| 1900 else{ | |
| 1901 throw('slice argument must be a valid Bio::EnsEMBL::Slice'); | |
| 1902 } | |
| 1903 } | |
| 1904 | |
| 1905 #We're still validating against itself?? | |
| 1906 #And reciprocating part of the test :| | |
| 1907 my $db = $rset->adaptor->db; | |
| 1908 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset); | |
| 1909 | |
| 1910 #Do this conditionally on whether it is a result_feature_set? | |
| 1911 #This may break if we have removed the status but not finished the rollback so no! | |
| 1912 $self->log("Rolling back result_feature table for ResultSet:\t".$rset->name.$slice_name); | |
| 1913 | |
| 1914 #Rollback status entry | |
| 1915 if($rset->has_status('RESULT_FEATURE_SET') && ! $no_revoke){ | |
| 1916 $rset->adaptor->revoke_status('RESULT_FEATURE_SET', $rset); | |
| 1917 } | |
| 1918 | |
| 1919 #Cannot use revoke_states here? | |
| 1920 #We can if we retrieve the Chip or Channel first | |
| 1921 #Add to ResultSet adaptor | |
| 1922 $sql = 'DELETE from result_feature where result_set_id='.$rset->dbID.$slice_constraint; | |
| 1923 $self->rollback_table($sql, 'result_feature', 'result_feature_id', $db); | |
| 1924 | |
| 1925 return; | |
| 1926 } | |
| 1927 | |
| 1928 | |
| 1929 | |
| 1930 =head2 rollback_ArrayChips | |
| 1931 | |
| 1932 Arg[1] : ARRAYREF: Bio::EnsEMBL::Funcgen::ArrayChip objects | |
| 1933 Example : $self->rollback_ArrayChips([$achip1, $achip2]); | |
| 1934 Description: Deletes all Probes, ProbeSets, ProbeFeatures and | |
| 1935 states associated with this ArrayChip | |
| 1936 Returntype : None | |
| 1937 Exceptions : Throws if ArrayChip not valid and stored | |
| 1938 Throws if ArrayChips are not of same class | |
| 1939 Caller : General | |
| 1940 Status : At risk | |
| 1941 | |
| 1942 =cut | |
| 1943 | |
| 1944 #This should be tied to a CS id!!! | |
| 1945 #And analysis dependant? | |
| 1946 #We may not want to delete alignment by different analyses? | |
| 1947 #In practise the slice methods ignore analysis_id for this table | |
| 1948 #So we currently never use this! | |
| 1949 #So IMPORTED status should be tied to CS id and Analysis id? | |
| 1950 | |
| 1951 sub rollback_ArrayChips{ | |
| 1952 my ($self, $acs, $mode, $force, $keep_xrefs, $no_clean_up, $force_clean_up) = @_; | |
| 1953 | |
| 1954 #no_clean_up and force_clean_up allow analyze/optimize to be skipped until the last rollback | |
| 1955 #We could get around this by specifying all ArrayChips for all formats at the same time? | |
| 1956 #Need to implement in RollbackArrays | |
| 1957 | |
| 1958 $mode ||= 'probe'; | |
| 1959 | |
| 1960 if($mode && ($mode ne 'probe' && | |
| 1961 $mode ne 'probe_feature' && | |
| 1962 $mode ne 'ProbeAlign' && | |
| 1963 $mode ne 'ProbeTranscriptAlign' && | |
| 1964 $mode ne 'probe2transcript')){ | |
| 1965 throw("You have passed an invalid mode argument($mode), you must omit or specify either 'probe2transcript', 'probe', 'ProbeAlign, 'ProbeTranscriptAlign' or 'probe_feature' for all of the Align output"); | |
| 1966 } | |
| 1967 | |
| 1968 if($force && ($force ne 'force')){ | |
| 1969 throw("You have not specified a valid force argument($force), you must specify 'force' or omit"); | |
| 1970 } | |
| 1971 | |
| 1972 if($keep_xrefs && ($keep_xrefs ne 'keep_xrefs')){ | |
| 1973 throw("You have not specified a valid keep_xrefs argument($keep_xrefs), you must specify 'keep_xrefs' or omit"); | |
| 1974 } | |
| 1975 | |
| 1976 | |
| 1977 if($keep_xrefs){ | |
| 1978 | |
| 1979 if($mode eq 'probe' || $mode eq 'probe2transcript'){ | |
| 1980 throw("You cannot specify 'keep_xrefs' with mode $mode, you can only rollback features e.g. probe_feature, ProbeAlign or ProbeTranscriptAlign"); | |
| 1981 } | |
| 1982 | |
| 1983 if($force){ | |
| 1984 throw("You cannot 'force' delete the probe2transcript xrefs and 'keep_xrefs' at the same time. Please specify just one."); | |
| 1985 } | |
| 1986 } | |
| 1987 | |
| 1988 | |
| 1989 | |
| 1990 | |
| 1991 my ($adaptor, $db, %classes); | |
| 1992 | |
| 1993 foreach my $ac(@$acs){ | |
| 1994 $adaptor ||= $ac->adaptor || throw('ArrayChip must have an adaptor'); | |
| 1995 $db ||= $adaptor->db; | |
| 1996 $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ArrayChip', $ac); | |
| 1997 | |
| 1998 if(! $ac->get_Array->class){ | |
| 1999 throw('The ArrayChip you are trying to rollback does not have a class attribute'); | |
| 2000 } | |
| 2001 | |
| 2002 | |
| 2003 $classes{$ac->get_Array->class} = undef; | |
| 2004 | |
| 2005 #if($class && ($class ne $ac->get_Array->class)){ | |
| 2006 # throw('You can only rollback_ArrayChips for ArrayChips with the same class'); | |
| 2007 #} | |
| 2008 } | |
| 2009 | |
| 2010 | |
| 2011 #This is always the case as we register the association before we set the Import status | |
| 2012 #Hence the 2nd stage of the import fails as we have an associated ExperimentalChip | |
| 2013 #We need to make sure the ExperimentalChip and Channel have not been imported!!! | |
| 2014 warn "NOTE: rollback_ArrayChips. Need to implement ExperimentlChip check, is the problem that ExperimentalChips are registered before ArrayChips imported?"; | |
| 2015 #Check for dependent ExperimentalChips | |
| 2016 #if(my @echips = @{$db->get_ExperimentalChipAdaptor->fetch_all_by_ArrayChip($ac)}){ | |
| 2017 # my %exps; | |
| 2018 # my $txt = "Experiment\t\t\t\tExperimentalChip Unique IDs\n"; | |
| 2019 | |
| 2020 # foreach my $ec(@echips){ | |
| 2021 # $exps{$ec->get_Experiment->name} ||= ''; | |
| 2022 | |
| 2023 # $exps{$ec->get_Experiment->name} .= "\t".$ec->unique_id; | |
| 2024 # } | |
| 2025 | |
| 2026 # map {$txt.= "\t".$_.":".$exps{$_}."\n"} keys %exps; | |
| 2027 | |
| 2028 # throw("Cannot rollback ArrayChip:\t".$ac->name. | |
| 2029 # "\nFound Dependent Experimental Data:\n".$txt); | |
| 2030 # } | |
| 2031 | |
| 2032 | |
| 2033 my $ac_names = join(', ', (map { $_->name } @$acs)); | |
| 2034 my $ac_ids = join(', ', (map { $_->dbID } @$acs)); | |
| 2035 | |
| 2036 | |
| 2037 $self->log("Rolling back ArrayChips $mode entries:\t$ac_names"); | |
| 2038 my ($row_cnt, $probe_join, $sql); | |
| 2039 #$ac->adaptor->revoke_states($ac);#This need to be more specific to the type of rollback | |
| 2040 my $species = $db->species; | |
| 2041 | |
| 2042 if(!$species){ | |
| 2043 throw('Cannot rollback probe2transcript level xrefs without specifying a species for the DBAdaptor'); | |
| 2044 } | |
| 2045 #Will from registry? this return Homo sapiens? | |
| 2046 #Or homo_sapiens | |
| 2047 ($species = lc($species)) =~ s/ /_/; | |
| 2048 | |
| 2049 my $transc_edb_name = "${species}_core_Transcript"; | |
| 2050 my $genome_edb_name = "${species}_core_Genome"; | |
| 2051 | |
| 2052 #Maybe we want to rollback ProbeAlign and ProbeTranscriptAlign output separately so we | |
| 2053 #can re-run just one part of the alignment step. | |
| 2054 | |
| 2055 | |
| 2056 #We want this Probe(Transcript)Align rollback available in the environment | |
| 2057 #So we can do it natively and before we get to the RunnableDB stage, | |
| 2058 #where we would be trying multiple rollbacks in parallel | |
| 2059 #Wrapper script? | |
| 2060 #Or do we keep it simple here and maintain probe_feature wide rollback | |
| 2061 #And just the ProbeAlign/ProbeTranscriptAlign roll back in the environment? | |
| 2062 | |
| 2063 | |
| 2064 #We can restrict the probe deletes using the ac_id | |
| 2065 #We should test for other ac_ids using the same probe_id | |
| 2066 #Then fail unless we have specified force delete | |
| 2067 | |
| 2068 #These should be deleted for all other modes but only if force is set? | |
| 2069 #This may delete xrefs for other ArrayChips | |
| 2070 | |
| 2071 #The issues is if we need to specify force for one delete but don't want to delete something else? | |
| 2072 #force should only be used to delete upto and including the mode specified | |
| 2073 #no mode equates to probe mode | |
| 2074 #if no force then we fail if previous levels/modes have xrefs etc... | |
| 2075 | |
| 2076 | |
| 2077 #Let's grab the edb ids first and use them directly, this will avoid table locks on edb | |
| 2078 #and should also speed query up? | |
| 2079 | |
| 2080 | |
| 2081 if($mode eq 'probe2transcript' || | |
| 2082 $force){ | |
| 2083 | |
| 2084 #Delete ProbeFeature UnmappedObjects | |
| 2085 $self->log("Deleting probe2transcript ProbeFeature UnmappedObjects"); | |
| 2086 $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, probe_feature pf, external_db e WHERE a.logic_name ='probe2transcript' AND a.analysis_id=uo.analysis_id AND p.probe_id=pf.probe_id and pf.probe_feature_id=uo.ensembl_id and uo.ensembl_object_type='ProbeFeature' and uo.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)"; | |
| 2087 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); | |
| 2088 | |
| 2089 | |
| 2090 #Delete ProbeFeature Xrefs/DBEntries | |
| 2091 $self->log("Deleting probe2transcript ProbeFeature Xrefs"); | |
| 2092 $sql = "DELETE ox FROM xref x, object_xref ox, probe p, probe_feature pf, external_db e WHERE x.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND x.xref_id=ox.xref_id AND ox.ensembl_object_type='ProbeFeature' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND ox.linkage_annotation!='ProbeTranscriptAlign' AND p.array_chip_id IN($ac_ids)"; | |
| 2093 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); | |
| 2094 | |
| 2095 | |
| 2096 #Probe/Set specific entries | |
| 2097 for my $xref_object('Probe', 'ProbeSet'){ | |
| 2098 $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id'; | |
| 2099 | |
| 2100 #Delete Probe/Set UnmappedObjects | |
| 2101 | |
| 2102 $self->log("Deleting probe2transcript $xref_object UnmappedObjects"); | |
| 2103 | |
| 2104 $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, external_db e WHERE a.logic_name='probe2transcript' AND a.analysis_id=uo.analysis_id AND uo.ensembl_object_type='${xref_object}' AND $probe_join=uo.ensembl_id AND uo.external_db_id=e.external_db_id AND e.db_name='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)"; | |
| 2105 #.' and edb.db_release="'.$schema_build.'"'; | |
| 2106 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); | |
| 2107 | |
| 2108 #Delete Probe/Set Xrefs/DBEntries | |
| 2109 $sql = "DELETE ox FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' AND ox.ensembl_object_type='${xref_object}' AND ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)"; | |
| 2110 $self->log("Deleting probe2transcript $xref_object xref records"); | |
| 2111 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); | |
| 2112 } | |
| 2113 } | |
| 2114 elsif(! $keep_xrefs){#Need to check for existing xrefs if not force | |
| 2115 #we don't know whether this is on probe or probeset level | |
| 2116 #This is a little hacky as there's not way we can guarantee this xref will be from probe2transcript | |
| 2117 #until we get the analysis_id moved from identity_xref to xref | |
| 2118 #We are also using the Probe/Set Xrefs as a proxy for all other Xrefs and UnmappedObjects | |
| 2119 #Do we need to set a status here? Would have problem rolling back the states of associated ArrayChips | |
| 2120 | |
| 2121 for my $xref_object('Probe', 'ProbeSet'){ | |
| 2122 | |
| 2123 $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id'; | |
| 2124 | |
| 2125 $row_cnt = $db->dbc->db_handle->selectrow_array("SELECT COUNT(*) FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' and ox.ensembl_object_type='${xref_object}' and ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)"); | |
| 2126 | |
| 2127 if($row_cnt){ | |
| 2128 throw("Cannot rollback ArrayChips($ac_names), found $row_cnt $xref_object Xrefs. Pass 'force' argument or 'probe2transcript' mode to delete"); | |
| 2129 } | |
| 2130 else{ | |
| 2131 #$self->log("Found $row_cnt $xref_object Xrefs"); | |
| 2132 } | |
| 2133 } | |
| 2134 } | |
| 2135 | |
| 2136 | |
| 2137 #ProbeFeatures inc ProbeTranscriptAlign xrefs | |
| 2138 | |
| 2139 if($mode ne 'probe2transcript'){ | |
| 2140 | |
| 2141 if(($mode eq 'probe' && $force) || | |
| 2142 $mode eq 'probe_feature' || | |
| 2143 $mode eq 'ProbeAlign' || | |
| 2144 $mode eq 'ProbeTranscriptAlign'){ | |
| 2145 | |
| 2146 | |
| 2147 #Should really revoke some state here but we only have IMPORTED | |
| 2148 | |
| 2149 #ProbeTranscriptAlign Xref/DBEntries | |
| 2150 | |
| 2151 #my (@anal_ids) = @{$db->get_AnalysisAdaptor->generic_fetch("a.module='ProbeAlign'")}; | |
| 2152 #Grrrr! AnalysisAdaptor is not a standard BaseAdaptor implementation | |
| 2153 #my @anal_ids = @{$db->dbc->db_handle->selectall_arrayref('select analysis_id from analysis where module like "%ProbeAlign"')}; | |
| 2154 #@anal_ids = map {$_= "@$_"} @anal_ids; | |
| 2155 | |
| 2156 if($mode ne 'ProbeAlign'){ | |
| 2157 my $lnames = join(', ', (map { "'${_}_ProbeTranscriptAlign'" } keys(%classes))); | |
| 2158 | |
| 2159 $sql = "DELETE ox from object_xref ox, xref x, probe p, probe_feature pf, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; | |
| 2160 $self->log("Deleting ProbeFeature Xref/DBEntry records for:\t$lnames"); | |
| 2161 $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); | |
| 2162 | |
| 2163 | |
| 2164 #Can't include uo.type='ProbeTranscriptAlign' in these deletes yet as uo.type is enum'd to xref or probe2transcript | |
| 2165 #will have to join to analysis and do a like "%ProbeTranscriptAlign" on the the logic name? | |
| 2166 #or/and ur.summary_description='Promiscuous probe'? | |
| 2167 | |
| 2168 $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name in (${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${transc_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; | |
| 2169 | |
| 2170 $self->log("Deleting UnmappedObjects for:\t${lnames}"); | |
| 2171 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); | |
| 2172 | |
| 2173 | |
| 2174 #Now the actual ProbeFeatures | |
| 2175 $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; | |
| 2176 $self->log("Deleting ProbeFeatures for:\t${lnames}"); | |
| 2177 $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up); | |
| 2178 } | |
| 2179 | |
| 2180 if($mode ne 'ProbeTranscriptAlign'){ | |
| 2181 my $lnames = join(', ', (map { "'${_}_ProbeAlign'" } keys(%classes))); | |
| 2182 | |
| 2183 $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name=(${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${genome_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; | |
| 2184 $self->log("Deleting UnmappedObjects for:\t${lnames}"); | |
| 2185 $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); | |
| 2186 | |
| 2187 | |
| 2188 $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; | |
| 2189 $self->log("Deleting ProbeFeatures for:\t${lnames}"); | |
| 2190 $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up); | |
| 2191 } | |
| 2192 } | |
| 2193 else{ | |
| 2194 #Need to count to see if we can carry on with a unforced probe rollback? | |
| 2195 #Do we need this level of control here | |
| 2196 #Can't we assume that if you want probe you also want probe_feature? | |
| 2197 #Leave for safety, at least until we get the dependant ExperimetnalChip test sorted | |
| 2198 #What about if we only want to delete one array from an associated set? | |
| 2199 #This would delete all the features from the rest? | |
| 2200 | |
| 2201 $sql = "select count(*) from object_xref ox, xref x, probe p, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; | |
| 2202 $row_cnt = $db->dbc->db_handle->selectrow_array($sql); | |
| 2203 | |
| 2204 if($row_cnt){ | |
| 2205 throw("Cannot rollback ArrayChips($ac_names), found $row_cnt ProbeFeatures. Pass 'force' argument or 'probe_feature' mode to delete"); | |
| 2206 } | |
| 2207 else{ | |
| 2208 $self->log("Found $row_cnt ProbeFeatures"); | |
| 2209 } | |
| 2210 } | |
| 2211 | |
| 2212 if($mode eq 'probe'){ | |
| 2213 #Don't need to rollback on a CS as we have no dependant EChips? | |
| 2214 #Is this true? Should we enforce a 3rd CoordSystem argument, 'all' string we delete all? | |
| 2215 | |
| 2216 foreach my $ac(@$acs){ | |
| 2217 $ac->adaptor->revoke_states($ac);#Do we need to change this to revoke specific states? | |
| 2218 #Current states are only IMPORTED, so not just yet, but we could change this for safety? | |
| 2219 } | |
| 2220 | |
| 2221 #ProbeSets | |
| 2222 $sql = "DELETE ps from probe p, probe_set ps where p.array_chip_id IN($ac_ids) and p.probe_set_id=ps.probe_set_id"; | |
| 2223 $self->rollback_table($sql, 'probe_set', 'probe_set_id', $db, $no_clean_up); | |
| 2224 | |
| 2225 #Probes | |
| 2226 $sql = "DELETE from probe where array_chip_id IN($ac_ids)"; | |
| 2227 $self->rollback_table($sql, 'probe', 'probe_id', $db, $no_clean_up); | |
| 2228 } | |
| 2229 } | |
| 2230 | |
| 2231 $self->log("Finished $mode roll back for ArrayChip:\t$ac_names"); | |
| 2232 return; | |
| 2233 } | |
| 2234 | |
| 2235 | |
| 2236 #This will just fail silently if the reset value | |
| 2237 #Is less than the true autoinc value | |
| 2238 #i.e. if there are parallel inserts going on | |
| 2239 #So we can never assume that the $new_auto_inc will be used | |
| 2240 | |
| 2241 | |
| 2242 sub rollback_table{ | |
| 2243 my ($self, $sql, $table, $id_field, $db, $no_clean_up, $force_clean_up) = @_; | |
| 2244 | |
| 2245 my $row_cnt; | |
| 2246 eval { $row_cnt = $db->dbc->do($sql) }; | |
| 2247 | |
| 2248 if($@){ | |
| 2249 throw("Failed to rollback table $table using sql:\t$sql\n$@"); | |
| 2250 } | |
| 2251 | |
| 2252 $row_cnt = 0 if $row_cnt eq '0E0'; | |
| 2253 $self->log("Deleted $row_cnt $table records"); | |
| 2254 | |
| 2255 if($force_clean_up || | |
| 2256 ($row_cnt && ! $no_clean_up)){ | |
| 2257 $self->refresh_table($table, $id_field, $db); | |
| 2258 } | |
| 2259 | |
| 2260 return; | |
| 2261 } | |
| 2262 | |
| 2263 #Now separated so that we can do this once at the end of a rollback of many Sets | |
| 2264 | |
| 2265 sub refresh_table{ | |
| 2266 my ($self, $table, $id_field, $db) = @_; | |
| 2267 | |
| 2268 #This only works if the new calue is available | |
| 2269 #i.e. do not need lock for this to be safe | |
| 2270 $self->reset_table_autoinc($table, $id_field, $db) if $id_field; | |
| 2271 | |
| 2272 $self->log("Optimizing and Analyzing $table"); | |
| 2273 | |
| 2274 $db->dbc->do("optimize table $table");#defrag data, sorts indices, updates table stats | |
| 2275 $db->dbc->do("analyze table $table");#analyses key distribution | |
| 2276 | |
| 2277 return; | |
| 2278 } | |
| 2279 | |
| 2280 | |
| 2281 | |
| 2282 sub reset_table_autoinc{ | |
| 2283 #Is this called elsewhere or can we merge with | |
| 2284 my($self, $table_name, $autoinc_field, $db) = @_; | |
| 2285 | |
| 2286 if(! ($table_name && $autoinc_field && $db)){ | |
| 2287 throw('You must pass a table_name and an autoinc_field to reset the autoinc value'); | |
| 2288 } | |
| 2289 | |
| 2290 if(! (ref($db) && $db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){ | |
| 2291 throw('Must pass a valid Bio::EnsEMBL::DBSQL::DBAdaptor'); | |
| 2292 } | |
| 2293 | |
| 2294 #my $sql = "show table status where name='$table_name'"; | |
| 2295 #my ($autoinc) = ${$db->dbc->db_handle->selectrow_array($sql)}[11]; | |
| 2296 #11 is the field in the show table status table | |
| 2297 #We cannot select just the Auto_increment, so this will fail if the table format changes | |
| 2298 | |
| 2299 #Why do we need autoinc here? | |
| 2300 | |
| 2301 my $sql = "select $autoinc_field from $table_name order by $autoinc_field desc limit 1"; | |
| 2302 my ($current_auto_inc) = $db->dbc->db_handle->selectrow_array($sql); | |
| 2303 my $new_autoinc = ($current_auto_inc) ? ($current_auto_inc + 1) : 1; | |
| 2304 $sql = "ALTER TABLE $table_name AUTO_INCREMENT=$new_autoinc"; | |
| 2305 $db->dbc->do($sql); | |
| 2306 return; | |
| 2307 } | |
| 2308 | |
| 2309 | |
| 2310 | |
| 2311 | |
| 2312 =head2 get_core_display_name_by_stable_id | |
| 2313 | |
| 2314 Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor | |
| 2315 Args [2] : stable ID from core DB. | |
| 2316 Args [3] : stable feature type e.g. gene, transcript, translation | |
| 2317 Example : $self->validate_and_store_feature_types; | |
| 2318 Description: Builds a cache of stable ID to display names. | |
| 2319 Returntype : string - display name | |
| 2320 Exceptions : Throws is type is not valid. | |
| 2321 Caller : General | |
| 2322 Status : At risk | |
| 2323 | |
| 2324 =cut | |
| 2325 | |
| 2326 # -------------------------------------------------------------------------------- | |
| 2327 # Build a cache of ensembl stable ID -> display_name | |
| 2328 # Return hashref keyed on {$type}{$stable_id} | |
| 2329 #Need to update cache if we're doing more than one 'type' at a time | |
| 2330 # as it will never get loaded for the new type! | |
| 2331 | |
| 2332 sub get_core_display_name_by_stable_id{ | |
| 2333 my ($self, $cdb, $stable_id, $type) = @_; | |
| 2334 | |
| 2335 $type = lc($type); | |
| 2336 | |
| 2337 if($type !~ /(gene|transcript|translation)/){ | |
| 2338 throw("Cannot get display_name for stable_id $stable_id with type $type"); | |
| 2339 } | |
| 2340 | |
| 2341 if(! exists $self->{'display_name_cache'}->{$stable_id}){ | |
| 2342 ($self->{'display_name_cache'}->{$stable_id}) = $cdb->dbc->db_handle->selectrow_array("SELECT x.display_label FROM $type t, xref x where t.display_xref_id=x.xref_id and t.stable_id='${stable_id}'"); | |
| 2343 } | |
| 2344 | |
| 2345 return $self->{'display_name_cache'}->{$stable_id}; | |
| 2346 } | |
| 2347 | |
| 2348 | |
| 2349 =head2 get_core_stable_id_by_display_name | |
| 2350 | |
| 2351 Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor | |
| 2352 Args [2] : display name (e.g. from core DB or GNC name) | |
| 2353 Example : | |
| 2354 Description: Builds a cache of stable ID to display names. | |
| 2355 Returntype : string - gene stable ID | |
| 2356 Exceptions : None | |
| 2357 Caller : General | |
| 2358 Status : At risk | |
| 2359 | |
| 2360 =cut | |
| 2361 | |
| 2362 # -------------------------------------------------------------------------------- | |
| 2363 # Build a cache of ensembl stable ID -> display_name | |
| 2364 # Return hashref keyed on {$type}{$stable_id} | |
| 2365 #Need to update cache if we're doing more than one 'type' at a time | |
| 2366 # as it will never get loaded for the new type! | |
| 2367 | |
| 2368 sub get_core_stable_id_by_display_name{ | |
| 2369 my ($self, $cdb, $display_name) = @_; | |
| 2370 | |
| 2371 #if($type !~ /(gene|transcript|translation)/){ | |
| 2372 # throw("Cannot get display_name for stable_id $stable_id with type $type"); | |
| 2373 # } | |
| 2374 | |
| 2375 if(! exists $self->{'stable_id_cache'}->{$display_name}){ | |
| 2376 ($self->{'stable_id_cache'}->{$display_name}) = $cdb->dbc->db_handle->selectrow_array("SELECT g.stable_id FROM gene g, xref x where g.display_xref_id=x.xref_id and and x.display_label='${display_name}'"); | |
| 2377 } | |
| 2378 | |
| 2379 return $self->{'stable_id_cache'}->{$display_name}; | |
| 2380 } | |
| 2381 | |
| 2382 | |
| 2383 | |
| 2384 | |
| 2385 | |
| 2386 | |
| 2387 1; | |
| 2388 |
