Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/Helper.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/Helper.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,2388 @@ +=head1 LICENSE + + Copyright (c) 1999-2011 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <ensembl-dev@ebi.ac.uk>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + + +=head1 NAME + +Bio::EnsEMBL::Funcgen::Utils::Helper + +=head1 SYNOPSIS + + + e.g. + + + my $object = Bio::EnsEMBL::Object->new + ( + logging => 1, + log_file => "/tmp/Misc.log", + debug_level => 2, + debug_file => "/tmp/Misc.dbg", + ); + + $object->log("This is a log message."); + $object->debug(1,"This is a debug message."); + $object->system("rmdir /tmp/test"); + + + ---------------------------------------------------------------------------- + + +=head1 OPTIONS + +=over 8 + + +=item B<-debug> + +Turns on and defines the verbosity of debugging output, 1-3, default = 0 = off + +=over 8 + +=item B<-log_file|l> + +Defines the log file, default = "${instance}.log" + +=item B<-help> + +Print a brief help message and exits. + +=item B<-man> + +Prints the manual page and exits. + +=back + +=head1 DESCRIPTION + +B<This program> performs several debugging and logging functions, aswell as providing several inheritable EFGUtils methods. + +=cut + +################################################################################ + +package Bio::EnsEMBL::Funcgen::Utils::Helper; + +use Bio::Root::Root; +use Data::Dumper; +use Bio::EnsEMBL::Utils::Exception qw (throw stack_trace); +use Bio::EnsEMBL::Utils::Argument qw( rearrange ); +use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (get_date); +#use Devel::Timer; +use Carp;#? Can't use unless we can get it to redirect +use File::Basename; + + +use strict; +use vars qw(@ISA); +@ISA = qw(Bio::Root::Root); + +#List of valid rollback levels +#To be used in conjunction with -full_delete +my @rollback_tables = ('data_set', 'feature_set', 'result_set', 'input_set', 'experiment', 'array', 'array_chip', 'experimental_chip'); + +#Some local filevars to avoid assigning to package typeglobs +my ($DBGFILE, $LOGFILE); + +################################################################################ + +=head2 new + + Description : Constructor method to create a new object with passed or + default attributes. + + Arg [1] : hash containing optional attributes :- + log_file - name of log file (default = undef -> STDOUT) + debug_level - level of detail of debug message [1-3] (default = 0 = off) + debug_file - name of debug file (default = undef -> STDERR) + + ReturnType : Helper + + Example : my $Helper = new Bio::EnsEMBL::Helper( + debug_level => 3, + debug_file => "/tmp/efg.debug", + log_file => "/tmp/efg.log", + ); + + Exceptions : throws exception if failed to open debug file + : throws exception if failed to open log file + +=cut + +################################################################################ + +#To do , change to rearrange + +sub new{ + my ($caller, %args) = @_; + + my ($self, %attrdata, $argname); + my $class = ref($caller) || $caller; + + #Create object from parent class + $self = $class->SUPER::new(%args); + + #we need to mirror ensembl behaviour here + #use rearrange and set default afterwards if not defined + + # objects private data and default values + #Not all of these need to be in main + + %attrdata = ( + _tee => $main::_tee, + _debug_level => $main::_debug_level, + _debug_file => $main::_debug_file, + _log_file => $main::_log_file,#default should be set in caller + _no_log => $main::_no_log,#suppresses log file generation if log file not defined + _default_log_dir => $main::_default_log_dir, + ); + + # set each class attribute using passed value or default value + foreach my $attrname (keys %attrdata){ + ($argname = $attrname) =~ s/^_//; # remove leading underscore + $self->{$attrname} = (exists $args{$argname}) ? $args{$argname} : $attrdata{$attrname}; + } + + + $self->{'_tee'} = 1 if $self->{'_no_log'}; + #should we undef log_file here too? + #This currently only turns off default logging + + $self->{_default_log_dir} ||= $ENV{'HOME'}.'/logs'; + $self->{'_report'} = []; + + + # DEBUG OUTPUT & STDERR + + #should default to lowest or highest debug level here! + + if(defined $self->{_debug_level} && $self->{_debug_level}){ + $main::_debug_level = $self->{_debug_level}; + + if(defined $self->{_debug_file}){ + $main::_debug_file = $self->{_debug_file}; + + open($DBGFILE, '>>', $self->{_debug_file}) + or throw("Failed to open debug file : $!"); + + #open (DBGFILE, "<STDERR | tee -a ".$self->{_debug_file});#Mirrors STDERR to debug file + } + else{ + open($DBGFILE, '>&STDERR'); + } + + select $DBGFILE; $| = 1; # make debug file unbuffered + + $self->debug(1,"Debugging started ".localtime()." on $0 at level ".$self->{_debug_level}." ..."); + } + + my $log_file = $self->{_log_file}; + + + # LOG OUTPUT + if (defined $self->{_log_file}){ + + #This causes print on unopened file as we try and log in the DESTROY + throw('You have specified mutually exclusive parameters log_file and no_log') if($self->{'_no_log'}); + $main::_log_file = $self->{_log_file}; + + #we need to implment tee here + if($self->{'_tee'}){ + open($LOGFILE, ' | tee -a '.$log_file); + } + else{ + open($LOGFILE, '>>', $log_file) + or throw("Failed to open log file : $log_file\nError: $!"); + } + } + else{ + #Change this to get the name of the control script and append with PID.out + #This is to ensure that we always capture output + #We need to also log params + #We will have to call this from the child class. + + + #Only do this if we don't have supress default logs set + #To avoid loads of loags during testing + if(! $self->{'_no_log'}){ + + my @stack = stack_trace(); + my $top_level = $stack[$#stack]; + my (undef, $file) = @{$top_level}; + $file =~ s/.*\///; + + $self->run_system_cmd('mkdir '.$self->{_default_log_dir}) if(! -e $self->{_default_log_dir}); + $self->{'_log_file'} = $self->{_default_log_dir}.'/'.$file.'.'.$$.'.log'; + warn "No log file defined, defaulting to:\t".$self->{'_log_file'}."\n"; + + #we should still tee here + if($self->{'_tee'}){ + open($LOGFILE, '| tee -a '.$self->{'_log_file'}); + } + else{ + open($LOGFILE, '>', $self->{'_log_file'}) + or throw('Failed to open log file : '.$self->{'_log_file'}."\nError: $!"); + } + + } + else{ + #Have to include STD filehandles in operator + open($LOGFILE, '>&STDOUT'); + } + } + + select $LOGFILE; $| = 1; # make log file unbuffered + $self->log("\n\nLogging started at ".localtime()."..."); + + # RESET STDOUT TO DEFAULT + select STDOUT; $| = 1; + + $self->debug(2,"Helper class instance created."); + + return $self; +} + + +################################################################################ + +=head2 DESTROY + + Description : Called by gargbage collection to enable tidy up before object deleted + + ReturnType : none + + Example : none - should not be called directly + + Exceptions : none + +=cut + +################################################################################ + +sub DESTROY{ + my ($self) = @_; + + + $self->report; + + if($self->{_log_file}){ + $self->log("Logging complete ".localtime()."."); + $self->log('Virtual Memory '.`ps -p $$ -o vsz |tail -1`); + $self->log('Resident Memory '.`ps -p $$ -o rss |tail -1`); + + + + + # close LOGFILE; # if inherited object then cannot close filehandle !!! + } + + if($self->{_debug_level}){ + $self->debug(1,"Debugging complete ".localtime()."."); + # close DBGFILE; # if inherited object then cannot close filehandle !!! + } + + if(defined $self->{'_timer'}){ + $self->{'_timer'}->report(); + } + + $self->debug(2,"Bio::EnsEMBL::Helper class instance destroyed."); + + return; +} + + + + +##Need generic method in here to get stack and line info +###Use Root.pm stack methods! +# and replace this with caller line method for logging +sub _get_stack{ + my ($self) = shift; + + + #need to resolve this method with that in debug, pass log or debug arg for different format + + my @prog = (caller(2)) ? caller(2) : (caller(1)) ? caller(1) : (undef,"undef",0); + + return "[".localtime()." - ".basename($prog[1]).":$prog[2]]"; +} + + +################################################################################ + +=head2 log + + Arg[0] : string - log message. + Arg[1] : boolean - memory usage, appends current process memory stats + Description : Method to write messages to a previously set up log file. + Return type : none + Example : $root->log("Processing file $filename ...", 1); + Exceptions : none + +=cut + +################################################################################ + +sub log{ + my ($self, $message, $mem, $date, $no_return) = @_; + + if($mem){ + $message.= " :: ".`ps -p $$ -o vsz |tail -1`; + chomp $message; + $message .= " KB"; + } + + if($date){ + my $time = localtime(); + chomp($time); + $message .= ' - '.localtime(); + } + + $message .= "\n" if ! $no_return; + + print $LOGFILE "::\t$message"; + + # Add to debug file if not printing to STDERR? + # only if verbose? + # this would double print everything to STDOUT if tee and debug has not redefined STDERR + + $self->debug(1,$message); +} + +################################################################################ + + +=head2 report + + Arg[0] : optional string - log message. + Arg[1] : optional boolean - memory usage, appends current process memory stats + Description : Wrapper method for log, which also stores message for summary reporting + Return type : none + Example : $root->report("WARNING: You have not done this or that and want it reported at the end of a script"); + Exceptions : none + +=cut + +################################################################################ + +sub report{ + my ($self, $message, $mem) = @_; + + if(defined $message){ + + $self->log($message, $mem); + + push @{$self->{'_report'}}, $message; + } + elsif(scalar(@{$self->{'_report'}})){ + print $LOGFILE "\n::\tSUMMARY REPORT\t::\n"; + print $LOGFILE join("\n", @{$self->{'_report'}})."\n"; + + $self->{'_report'} = []; + } + + return; +} + + + + + + +################################################################################ + +=head2 log_header + + Arg[0] : string - log message. + Arg[1] : boolean - memory usage, appends current process memory stats + Description : Wrapper method to format a log as a header line + Return type : none + Example : $root->log("Processing file $filename ...", 1); + Exceptions : none + +=cut + +################################################################################ + +sub log_header{ + my ($self, $message, $mem, $date) = @_; + + print $LOGFILE "\n\n"; + $self->log("::\t$message\t::\t::", $mem, $date); + print $LOGFILE "\n"; +} + + + + + +################################################################################ + +=head2 debug + + Description : Method to write debug info to a previously set up debug file. + Over-rides Root.pm on/off style debugging + + Args : int: debug level and string: log message. + + ReturnType : none + + Example : $root->debug(2,"dir=$dir file=$file"); + + Exceptions : none + +=cut + +################################################################################ + +sub debug{ + my ($self,$level,$message) = @_; + + + + #Can we not detect whther message is a scalar, array or hash and Dump or print accordingly? + + my (@call,$cnt,$prog_name,$prog_line,$call_name,$call_line); + + $prog_name = $call_name = "undef"; + $prog_line = $call_line = $cnt = 0; + + # if debug on at the requested level then output the passed message + if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){ + + ######Replace this with Carp method? + while (@call = caller($cnt++)){ + + if ($cnt == 2){ + $call_name = basename($call[1]); + $call_line = $call[2] + } + + $prog_name = basename($call[1]); + $prog_line = $call[2]; + } + + #This still attempts to print if file not opened + print $DBGFILE "debug $message\t: [$$ - $prog_name:$prog_line $call_name:$call_line]\n"; + + #carp("carping $message"); + } +} + + +################################################################################ + +=head2 debug_hash + + Description : Method to write the contents of passed hash to debug output. + + Args : int: debug level and hashref. + + ReturnType : none + + Example : $Helper->debug_hash(3,\%hash); + + Exceptions : none + +=cut + +################################################################################ + +sub debug_hash{ + my ($self,$level,$hashref) = @_; + + my ($attr); + + # if debug on at the requested level then output the passed hash + if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){ + print $DBGFILE Data::Dumper::Dumper(\$hashref)."\n"; + } +} + + + +################################################################################ + +=head2 run_system_cmd + + Description : Method to control the execution of the standard system() command + + ReturnType : none + + Example : $Helper->debug(2,"dir=$dir file=$file"); + + Exceptions : throws exception if system command returns none zero + +=cut + +################################################################################ + + +#Move most of this to EFGUtils.pm +#Maintain wrapper here with throws, only warn in EFGUtils + +sub run_system_cmd{ + my ($self, $command, $no_exit) = @_; + + my $redirect = ''; + + $self->debug(3, "system($command)"); + + # decide where the command line output should be redirected + + #This should account for redirects + #This just sends everything to 1 no? + + if (defined $self->{_debug_level} && $self->{_debug_level} >= 3){ + + if (defined $self->{_debug_file}){ + $redirect = " >>".$self->{_debug_file}." 2>&1"; + } + else{ + $redirect = ""; + } + } + else{ + #$redirect = " > /dev/null 2>&1"; + } + + # execute the passed system command + my $status = system("$command $redirect"); + my $exit_code = $status >> 8; + + if ($status == -1) { + warn "Failed to execute: $!\n"; + } + elsif ($status & 127) { + warn sprintf("Child died with signal %d, %s coredump\nError:\t$!",($status & 127),($status & 128) ? 'with' : 'without'); + } + elsif($status != 0) { + warn sprintf("Child exited with value %d\nError:\t$!\n", $exit_code); #get the true exit code + } + + if ($exit_code != 0){ + + if (! $no_exit){ + throw("System command failed:\t$command\nExit code:\t$exit_code\n$!"); + } + else{ + warn("System command returned non-zero exit code:\t$command\nExit code:\t$exit_code\n$!"); + } + } + + #reverse boolean logic for perl...can't do this anymore due to tab2mage successful non-zero exit codes :/ + + return $exit_code; +} + + +#add sys_get method ehre to handle system calls which retrieve data? +#i.e.backtick commands `find . -name *fasta` +#or use want or flag with above method? +#should open pipe instead to capture error? + +sub get_data{ + my ($self, $data_type, $data_name) = @_; + + #This method is just to provide standard checking for specific get_data/config methods + + if(defined $data_name){ + throw("Defs data name $data_name for type '$data_type' does not exist\n") if (! exists $self->{"${data_type}"}{$data_name}); + }else{ + throw("Defs data type $data_type does not exist\n") if (! exists $self->{"${data_type}"}); + } + + return (defined $data_name) ? $self->{"${data_type}"}{$data_name} : $self->{"${data_type}"}; +} + + +#sub Timer{ +# my ($self) = shift; + +# $self->{'_timer'} = new Devel::Timer() if(! defined $self->{'_timer'}); + +# return $self->{'_timer'}; + +#} + + +sub set_header_hash{ + my ($self, $header_ref, $fields) = @_; + + my %hpos; + + for my $x(0..$#{$header_ref}){ + $hpos{$header_ref->[$x]} = $x; + } + + + if($fields){ + + foreach my $field(@$fields){ + + if(! exists $hpos{$field}){ + throw("Header does not contain mandatory field:\t${field}"); + } + } + } + + return \%hpos; +} + +#Move this to EFGUtils? + +sub backup_file{ + my ($self, $file_path) = @_; + + throw("Must define a file path to backup") if(! $file_path); + + if (-f $file_path) { + $self->log("Backing up:\t$file_path"); + system ("mv ${file_path} ${file_path}.".`date '+%T'`); + } + + return; + +} + +#This should move to Utils +#as it is a simple string manipulation + +sub get_schema_and_build{ + my ($self, $dbname) = @_; + my @dbname = split/_/, $dbname; + return [$dbname[($#dbname -1)], $dbname[($#dbname )]]; +} + +=head2 get_regbuild_set_states + + Arg [1] : Bio::EnsEMBL::DBAdaptor + Example : my ($dset_states, $rset_states, $fset_states) = $helper->get_regbuild_set_states($db); + Description: Returns Array refs of appropriate states for sets use din the regulatory build + Returntype : Array + Exceptions : Warns if cannot find chromosome CoordSystem + Caller : HealthChecker & regulatory build code + Status : At risk + +=cut + + +sub get_regbuild_set_states{ + my ($self, $db) = @_; + + my $cs_a = $db->get_CoordSystemAdaptor; + + #These states need to be mirrored in RegulatorySets.java + + my $chrom_cs = $cs_a->fetch_by_name('chromosome'); + my (@dset_states, @rset_states, @fset_states); + + if(! defined $chrom_cs){ + #This species most likely does not have a regbuild + #really just need to get the 'highest' level here + warn "Could not find Chromosome CoordSystem. ".$db->dbc->dbname.". most likely does not contain a RegulatoryBuild"; + } + else{ + my $imp_cs_status = 'IMPORTED_'.$cs_a->fetch_by_name('chromosome')->version; + + #What about non-chromosome assemblies? + #top level will not return version...why not? + @dset_states = ('DISPLAYABLE'); + @rset_states = (@dset_states, 'DAS_DISPLAYABLE', $imp_cs_status); + @fset_states = (@rset_states, 'MART_DISPLAYABLE'); + } + + return (\@dset_states, \@rset_states, \@fset_states); +} + + + +=head2 define_and_validate_sets + + Arg [1] : hash - set constructor parameters: + -dbadaptor Bio::EnsEMBL::Funcgen::DBAdaptor + -name Data/FeatureSet/ResultSet name to create + -feature_type Bio::EnsEMBL::Funcgen::FeatureType + -cell_type Bio::EnsEMBL::Funcgen::CellType + -analysis FeatureSet Bio::EnsEMBL::Analysis + -feature_class e.g. annotated or regulatory + -description FeatureSet description + -recovery Allows definition of extant sets so long as they match + -append Boolean - Forces import on top of previously imported data + -rollback Rolls back product feature set. + -supporting_sets Complete set of pre-stored supporting or input sets for this DataSet + -slices ARRAYREF of Slices to rollback + Example : my $dset = $self->define_and_validate_Set(%params); + Description: Checks whether set is already in DB based on set name, rolls back features + if roll back flag set. Or creates new DataSet and Feature|ResultSet if not present. + Returntype : Bio::EnsEMBL::Funcgen::DataSet + Exceptions : Throws if DBAdaptor param not valid + Caller : Importers and Parsers + Status : At risk + +=cut + +sub define_and_validate_sets{ + my $self = shift; + + #change slice to slices to support multi slice import from InputSet::define_sets + #Can't do full rollback in slice mode + #This may not be safe in slice mode as we will then have mixed inputs/outputs + + my ($name, $anal, $ftype, $ctype, $type, $append, $db, $ssets, $description, $rollback, $recovery, $slices, $display_label) = rearrange(['NAME', 'ANALYSIS', 'FEATURE_TYPE', 'CELL_TYPE', 'FEATURE_CLASS', 'APPEND', + 'DBADAPTOR', 'SUPPORTING_SETS', 'DESCRIPTION', 'ROLLBACK', 'RECOVERY', 'SLICES', 'DISPLAY_LABEL'], @_); + + + #VALIDATE CONFIG HASH + #$config_hash ||= {};#default so exists will work without testing + #if(keys %{$config_hash}){ + # #There is a module to handle config hashes somewhere! + # throw('config_hash not yet implemented for define_and_validate_sets'); + #my @known_config = ('full_delete');#We never want full delete here as this is a create method! + #Can we set vars from has by refs like getopts? + #map { + # throw("Found unsupported config hash parameter:\t$_") if ! grep(/^${_}$/, @known_config); + #} keys %{$config_hash}; + # } + + #define rollback level + #extract this to _set_rollback_level($rollback_mode, $feature_class) + my $rollback_level = 0; + + #These should be globally defined so all rollback methods can use them + my %valid_rollback_modes = + ( + product_features => 1, + #Just product features and FeatureSet status, what about DataSet status? + #full delete does nothing here? + + sets => 2, + #Includes product_features and + #deletes supporting_sets entries unless we specify append + #revoke all states on Feature/Data/InputSets + #Full delete removes Feature/Data/InputSet entries + #Never includes ResultSets! + + supporting_features => 3, + #Includes product_feature and sets + #Removes all states and supporting features + #inc. ResultSet results/ResultFeatures + #Full_delete remove supporting set entries + #Otherwise just rollback states for affected sets + ); + + if($rollback){ + if(! exists $valid_rollback_modes{$rollback}){ + #Default to some sensible values + $rollback = 'product_features';#default for FeatureSets + + #Always want overwrite supporting sets if there is a difference + $rollback = 'sets' if ($type eq 'regulatory'); + $rollback = 'supporting_sets' if ($type eq 'result'); + + warn ("You have not set a valid rollback mode(product_features|sets|supporting_features), defaulting to $rollback for feature class $type\n"); + } + + $rollback_level = $valid_rollback_modes{$rollback}; + } + + + if($slices && (ref($slices) ne 'ARRAY')){ + throw('-slices param must be an ARRAYREF of Bio::EnsEMBL::Slice objects'); + #Rest of validation done in other methods + } + + + + #But how are we going to resolve the append behaviour when we also want to validate the ssets? + #Can't, so append also functions to enable addition in the absence of some or all previous data/esets? + #No this is not true, we want to be able to fetch an extant set for import, + #we just need to be aware of sset IMPORTED status? + #This should be a recovery thing, allow fetch, but validate sets? + + + #Check mandatory params + if(! (ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){ + throw('Must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'); + } + + throw('Must provide a -name ') if(! defined $name); + + #Not necessarily, just do rollback then append? + #But then we'd potentially have a supporting set associated which has had it's data removed from the feature set. + #Generating sets for an ExpSet will always have append set + #This could be valid for generically grabing/creating sets for adding new supporting sets e.g. reg build + throw('-append and -rollback are mutually exclusive') if $rollback_level && $append; + + #This will never happen due to previous test? append will always fail? + #warn('You are defining a pre-existing FeatureSet without rolling back'. + # ' previous data, this could result in data duplication') if $append && ! $rollback_level; + #Is this really possible, surely the supporting set will fail to store due to unique key? + + + #Should we warn here about append && recovery? + #Aren't these mutually exclusive? + #Do we know if we have new data? append should override recovery, or just specifiy append + #This will stop the import and highlight the issue to the user + #We need to be able to run with both otherwise the import will not work + + + throw('Must provide a -feature_class e.g. annotated, external, result or regulatory') if(! defined $type); + #Check for annotated, external, regulatory etc here? + #Should never be external as we don't have DataSets for external sets? + + $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureType', $ftype); + if (defined $ctype){ + $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::CellType', $ctype); + } + elsif($type ne 'regulatory'){ + throw('Only Data/FeatureSets with type \'regulatory\' can have an undefined CellType'); + #Coudl extend this to core set by name eq 'RegulatoryFeatures'? + } + + $db->is_stored_and_valid('Bio::EnsEMBL::Analysis', $anal); + + my $dset_adaptor = $db->get_DataSetAdaptor; + my $fset_adaptor = $db->get_FeatureSetAdaptor; + my $rset_adaptor = $db->get_ResultSetAdaptor; + + #DataSet centric definition to enable multiple DataSets + #to be generated from the same supporting sets + my $dset = $dset_adaptor->fetch_by_name($name); + my ($fset, $rset, @input_sets); + + #Validate stored vs passed set data + + if(defined $dset){ + $self->log('Found Stored DataSet '.$dset->name); + + if($type ne 'result'){#i.e. annotated + + #Does this account for regulatory? + + $fset = $dset->product_FeatureSet; + #Here we have the possiblity that a feature_set with a different name may have + #been associated with the DataSet + + if(defined $fset){ + $self->log("Found associated product FeatureSet:\t".$fset->name); + + #if(! $clobber && + if($fset->name ne $name){ + throw('Invalid product FeatureSet name ('.$fset->name.') for DataSet ('.$name.'). Rollback will overwrite the FeatureSet and mismatched name will be retained.'); + #Need to clobber both or give explicit name for datasets or rename dataset??? + #Force this throw for now, make this fix manual as we may end up automatically overwriting data + } + } + + #This needs to be modified to support InputSets in ResultSets? + #Would never have mixed Input/ResultSets so no need + #Could potential need to do it for mixed Result/FeatureSets + #if we ever use an analysis which uses both set types + + #check supporting_sets here if defined + #We have the problem here of wanting to add ssets to a previously existing dset + #we may not know the original sset, or which of the ssets are new + #Hence there is a likelihood of a mismatch. + + #Much of this is replicated in store_udpated sets + + + if(defined $ssets){ + my @sorted_ssets = sort {$a->dbID <=> $b->dbID} @{$ssets}; + my @stored_ssets = sort {$a->dbID <=> $b->dbID} @{$dset->get_supporting_sets}; + my $mismatch = 0; + + $mismatch = 1 if(scalar(@sorted_ssets) != scalar(@stored_ssets)); + + if(! $mismatch){ + + for my $i(0..$#stored_ssets){ + + if($stored_ssets[$i]->dbID != $sorted_ssets[$i]->dbID){ + $mismatch=1; + last; + } + } + } + + + + + if($mismatch){ + #We're really print this names here which may hide the true cell/feature/anal type differences. + my $mismatch = 'There is a (name/type/analysis) mismatch between the supplied supporting_sets and the'. + ' supporting_sets in the DB for DataSet '.$dset->name."\n\nStored:\n" + .join(', ', (map { $_->name } @stored_ssets))."\n\nSupplied supporting_sets:\n" + .join(', ', (map { $_->name } @sorted_ssets)); + + + if($append){ + warn($mismatch."\n\nAppending supporting set data to unvalidated supporting sets"); + } + elsif($rollback_level > 1){#supporting set rollback + warn($mismatch."\n\nReplacing previously stored supporting sets with newly defined sets\n"); + + if($slices){ + warn("WARNING:\tPerforming supporting_set rollback in slice mode. This may corrupt the supporting_set definition for other slices in this DataSet if they are not re-generated using the same supporting_sets\n"); + } + + #Remove supporting_set entries + #This should be in a rollback_DataSet method + #This has moved to DataSetAdaptor::store_update_sets + + #Reset supporting sets + $dset->{'supporting_sets'} = undef; + $dset->add_supporting_sets(\@sorted_ssets); + #Move this to last block? + #This will currently fail as it test for product_FeatureSet + #How do we get around this? Remove IMPORTED status and only throw if fset has IMPORTED status? + + #warn "pre store sset ".@{$dset->get_supporting_sets}; + + #($dset) = @{$dset_adaptor->store_updated_sets([$dset], $rollback_level)}; + #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; + } + else{ + throw($mismatch); + } + } + } + else{ + warn("No supporting sets defined, skipping supporting set validation for definition of DataSet:\t".$name); + } + } + else{#result_features from InputSet + #Do we ever pass supporting sets here? + #Do we need to test vs stored_sets? + + + #There is the potential for more than one ResultSet to be associated with DataSet + #But as we are using the same name, this restricts the number wrt the cardinality + #of the name field. i.e. 1 name per analysis/cell_type/feature_type. + #This now works slightly differently to the rest of this method as we + #need to treat the ResultSet as we are currently treating the FeatureSet below. + + #However, the use case of this method is for one InputSet giving rise to one ResultSet + #Hence just throw if we find more than one or have a name mismatch??? + my @stored_sets = @{$dset->get_supporting_sets}; + + + + #THis assumes we will always have supporting sets + #and is failing as we have removed this test in DataSet::new + #But where are we storing it without the supporting set? + + if(scalar(@stored_sets) > 1){ + throw('define_and_validate_sets does not yet support DataSets with multiple supporting ResultSets for result_features'); + } + elsif(! @stored_sets){ + throw("DataSet($name) does not have any stored supporting sets. These should have been defined when storing the DataSet"); + #Or should we handle this? + } + + $rset = $stored_sets[0]; + + if($rset->set_type ne 'result'){ + throw("DataSet already contains a supporting set which is not a ResultSet:\t".$rset->set_type."\t".$stored_sets[0]->name); + } + elsif($ssets){ + #Do we ever pass supporting sets, test for completeness + + #Just test we have the same supplied ssets if it is defined + if(scalar(@$ssets) != 1){ + throw("ResultFeature data sets currently only support one supporting ResultSet.\nSupproting sets:\t". + join(', ', (map { $_->name.'('.$_->set_type } @$ssets))); + } + elsif(! ($rset->dbID == $ssets->[0]->dbID) && + ($ssets->[0]->set_type eq 'result')){ + throw('Supplied supporting set('.$ssets->[0]->name.') does not match stored supporting set('.$rset->name.')'); + } + } + + @input_sets = @{$rset->get_InputSets}; + } + } + + + + if($type eq 'result'){ + + #Validate the defined InputSets + if (scalar(@$ssets) > 1) { + throw("define_and_validate_sets does not yet support multiple InputSets for defining a ResultSet:\t".$name); + + } + + if ($ssets->[0]->set_type ne 'input') { + throw("To define a ResultSet($name) containing result_features, you must provide and InputSet as a supporting set\nArray based ResultSets(i.e. experimental_chip/channel) are not defined using this method, see specific Import Parsers."); + } + + + #Try and grab the rset just in case it has been orphaned somehow + if (! defined $rset) { + $rset = $rset_adaptor->fetch_all_by_name($name, $ftype, $ctype, $anal)->[0]; + #Should only ever be one given all parts of unique key + @input_sets = @{$rset->get_InputSets} if $rset; + + } + + + if (defined $rset) { #Validate stored InputSets + + if (scalar(@input_sets) != scalar(@$ssets)) { + throw('Found mismatch between number of previously stored InputSets('.scalar(@input_sets).') and defined InputSets('.scalar(@$ssets).'). You must provide a complete list of InputSets to define your ResultSet.'); + } + + if ($input_sets[0]->dbID != $ssets->[0]->dbID) { + throw('Found dbID mismatch between previously stored InputSet('.$input_sets[0]->name.') and define InputSet('.$ssets->[0]->name.')'); + } + + #rollback ResultSet/InputSet here? + if($rollback_level > 2){ + warn "rollback not yet fully implemented for Result/InputSets"; + + #Does this need to be by slice? + #What about states if we are running in parallel? + + if($slices){ + map {$self->rollback_ResultSet($rset, $rollback, $_)} @$slices; + } + else{ + $self->rollback_ResultSet($rset, $rollback); + } + + } + + } + else{#define ResultSet + ($rset) = @{$rset_adaptor->store(Bio::EnsEMBL::Funcgen::ResultSet->new + ( + -name => $name, + -feature_type => $ftype, + -cell_type => $ctype, + -table_name => 'input_set', + -table_id => $ssets->[0]->dbID, + -analysis => $anal + ) + )}; + + } + } + else{#annotated/regulatory/external i.e. FeatureSet + + #Try and grab the fset just in case it has been orphaned somehow + if(! defined $fset){ + $fset = $fset_adaptor->fetch_by_name($name); + + if(defined $fset){ + #Now we need to test whether it is attached to a dset + #Will be incorrect dset if it is as we couldn't get it before + #else we test the types and rollback + $self->log("Found stored orphan FeatureSet:\t".$fset->name); + + my $stored_dset = $dset_adaptor->fetch_by_product_FeatureSet($fset); + + if(defined $stored_dset){ + throw('Found FeatureSet('.$name.') associated with incorrect DataSet('.$stored_dset->name. + ").\nTry using another -name in the set parameters hash"); + + } + } + } + + #Rollback or create FeatureSet + if(defined $fset){ + + if($rollback_level){ + #Don't check for IMPORTED here as we want to rollback anyway + #Not forcing delete here as this may be used as a supporting set itself. + + $self->rollback_FeatureSet($fset, undef, $slices); + } + elsif ($append || $recovery) { + #This is only true if we have an sset mismatch + + #Do we need to revoke IMPORTED here too? + #This behaves differently dependant on the supporting set. + #InputSet status refers to loading in FeatureSet, where as ResultSet status refers to loading into result table + + #So we really want to revoke it + #But this leaves us vulnerable to losing data if the import crashes after this point + #because we have no way of assesing which is complete data and which is incomplete data + #within a feature set. + #This means we need a status on supporting_set, not InputSet or ResultSet + #as this has to be in the context of a dataset. + #Grrr, this means we need a SupportingSet class which simply wraps the InputSet/ResultSet + #We also need a single dbID for the supporting_set table + #Which means we will have to do some wierdity with the normal dbID implementation + #i.e. Have supporting_set_id, so we can still access all the normal dbID method for the given Set class + #This will have to be hardcoded into the state methods + #Also will need to specify when we want to store as supporting_status or normal set status. + + #This is an awful lot to protect against vulnerability + #Also as there easy way to track what features came from which supporting set + #There isn't currently a viable way to rollback, hence will have to redo the whole set. + + #Maybe we can enforce this by procedure? + #By simply not associating the supporting set until it has been loaded into the feature set? + #This may cause even more tracking problems + + #Right then, simply warn and do not revoke feature_set IMPORTED to protect old data? + #Parsers should identify supporting_sets(InputSets) which exist but do not have IMPORTED + #status and fail, specifying -recover which will rollback_FeatureSet which will revoke the IMPORTED status + + #This can mean a failed import can leave a partially imported feature set with the IMPORTED status!!! + + #We just need to handle InputSets and ResultSets differently. + #In parsers or here? + #Probably best in the parsers as this is where the states are set. + + + #Should we throw here for ResultSet? + #Force rollback of FeatureSet first or create new one? + #And throw for InputSet? + #This again comes back to whether we will ever have more than one file + #for a give InputSet, currently not. + + $self->log("WARNING\t::\tAdding data to a extant FeatureSet:\t".$fset->name); + } else { + throw('Found extant FeatureSet '.$fset->name.'. Maybe you want to specify the rollback, append or recovery parameter or roll back the FeatureSet separately?'); + } + } else { + #create a new one + $self->log("Creating new FeatureSet:\t".$name); + + $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new( + -name => $name, + -feature_type => $ftype, + -cell_type => $ctype, + -analysis => $anal, + -feature_class => $type, + -description => $description, + -display_label => $display_label, + ); + ($fset) = @{$fset_adaptor->store($fset)}; + } + } + + #Create/Update the DataSet + if(defined $dset){ + #Could do these updates above? + #But delayed to reduce redundancy + + if($type ne 'result'){ + + if(! defined $dset->product_FeatureSet){ + $self->log("Updating DataSet with new product FeatureSet:\t".$fset->name); + $dset->product_FeatureSet($fset); + } + + $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0]; + #This cannot store the focus sets as we don't know which are which yet + #Only the script knows this + # $dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; + } + else{ + #We may have the case where we have a DataSet(with a FeatureSet) but no ResultSet + #i.e. Load result_features after peak calls + #So update dset with ResultSet + + if(! @{$dset->get_supporting_sets}){ + $self->log("Updating DataSet with new ResultSet:\t".$rset->name); + $dset->add_supporting_sets([$rset]); + $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0]; + } + } + } + else{ + $self->log("Creating new ${type}_feature DataSet:\t".$name); + + if($type ne 'result'){ + ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new + ( + -name => $name, + -feature_set => $fset, + -supporting_sets => $ssets, + ))}; + #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; + } + else{ + warn "creating dataset $name with supporting set $rset"; + ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new + ( + -name => $name, + -supporting_sets => [$rset], + ))}; + } + } + + return $dset; +} + + +#Rollback/load methods migrated from DBAdaptor +#Move to SetAdaptors, better located and will remove cyclical dependancy + +=head2 rollback_FeatureSet + + Arg [0] : Bio::EnsEMBL::Funcgen::FeatureSet + Arg [1] : optional - boolean force delete flag, if this FeatureSet is use as a support + for another DataSet. + Arg [2] : optional - arrayref of Bio::EnsEMBL::Slice objects to rollback + Arg [3] : optional - boolean flag to perform full rollback i.e. default will just remove feature + specifying this with also delete the feature_set record + Example : $self->rollback_FeatureSet($fset); + Description: Deletes all status and feature entries for this FeatureSet. + Checks whether FeatureSet is a supporting set in any other DataSet. + Returntype : none + Exceptions : Throws if any deletes fails or if db method unavailable + Caller : Importers and Parsers + Status : At risk + +=cut + + +sub rollback_FeatureSet{ + my ($self, $fset, $force_delete, $slices, $full_delete) = @_; + + #Remove force delete and just throw? + #Currently only used in project_feature_set. + #May want to keep an old RegBuild for mapping/comparison? + #Coudl get around this by simply deleting the data_set? Unknown impact. + #Move to config hash? + #No need for rollback_level here as we always want to do the same thing + + my ($sql, $slice_name); + my $slice_join = ''; + my $table = $fset->feature_class.'_feature'; + my $adaptor = $fset->adaptor || throw('FeatureSet must have an adaptor'); + my $db = $adaptor->db; + #Cyclical dpendancy here, so not strictly necessary. + $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset); + + + $self->log_header('Rolling back '.$fset->feature_class." FeatureSet:\t".$fset->name); + + if($slices){ + + if($full_delete){ + throw("Cannot specify a full_delete for a Slice based rollback:\t".$fset->name); + } + + + if(! ref($slices) eq 'ARRAY'){ + throw('Slices must be an ARRAYREF of Slice objects'); + } + + map { throw("Must pass a valid Bio::EnsEMBL::Slice") if (! (ref($_) && $_->isa('Bio::EnsEMBL::Slice'))) } @$slices; + $self->log("Restricting to slices:\n\t\t".join("\n\t\t", (map { $_->name } @$slices)) ); + #Allow subslice rollback only for one slice at a time + my $subslice = (scalar(@$slices) == 1) ? 1 : 0; + my @sr_ids; + + foreach my $slice(@$slices){ + my $efg_sr_id = $fset->get_FeatureAdaptor->get_seq_region_id_by_Slice($slice); + + if(! $efg_sr_id){ + $self->log("Slice is not present in eFG DB:\t".$slice->name); + }else{ + + if(! $subslice){#Test is not subslice + my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name); + + if(($slice->start != 1) || + ($full_slice->end != $slice->end)){ + throw("Can only rollback subslices one at a time:\nRollback slice:\t" + .$slice->name."\nFull slice:\t".$full_slice->name); + } + } + + push @sr_ids, $efg_sr_id; + } + } + + if(scalar(@sr_ids) == 1){ + #Allow sub slice rollback + #add range here from meta coord? + $slice_join = " and f.seq_region_id=$sr_ids[0] and f.seq_region_start<=".$slices->[0]->end.' and f.seq_region_end>='.$slices->[0]->start; + } + else{ + $slice_join = ' and f.seq_region_id in ('.join(', ', @sr_ids).')'; + } + } + + + + #Check whether this is a supporting set for another data_set + my @dsets = @{$db->get_DataSetAdaptor->fetch_all_by_supporting_set($fset)}; + + if(@dsets){ + my $txt = $fset->name." is a supporting set of the following DataSets:\t".join(', ', (map {$_->name} @dsets)); + + if($force_delete){ + $self->log("WARNING:\t$txt\n"); + } + else{ + throw($txt."\nPlease resolve or specify the force_delete argument") + } + } + + #Remove states + if(! $slices){ + $fset->adaptor->revoke_states($fset); + + #Revoke InputSet states here as this refers to whether + #they are imported in the FeatureSet + #Do this in FeatureSet->revoke_states? + + my $dset = $db->get_DataSetAdaptor->fetch_by_product_FeatureSet($fset); + + #Account for absent dset if we have an external_feature set + + if((! defined $dset) && + $fset->feature_class ne 'external'){ + warn "WARNING:\tFeatureSet ".$fset->name." does not have an associated DataSet. Rollback may be incomplete"; + } + + if($dset){ + + foreach my $sset(@{$dset->get_supporting_sets}){ + #Maybe skip this if we defined slice? + + #??? Do we want to do this? + #This is dependant on the feature_class of the InputSet + #result InputSets may have been imported as ResultFeatureCollections + #So we want to leave those in place + #annotated feature_class InputSets are directly imports, so the status of these refers + #to the FeatureSet import status + #Where is the imported status set for SWEmbl? + + if(($sset->feature_class eq 'annotated') && + $sset->isa('Bio::EnsEMBL::Funcgen::InputSet')){ + + $self->rollback_InputSet($sset) if $sset->isa('Bio::EnsEMBL::Funcgen::InputSet'); + $self->rollback_InputSet($sset);#add full delete here? + #Do not want to rollback here for other type of sset + } + } + } + } + else{ + $self->log('Skipping '.$fset->name.' revoke_states for partial Slice rollback, maybe revoke IMPORTED? '); + } + + #should add some log statements here? + + my $row_cnt; + + #Rollback reg attributes + if($fset->feature_class eq 'regulatory'){ + $sql = "DELETE ra from regulatory_attribute ra, $table f where f.${table}_id=ra.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join; + $self->rollback_table($sql, 'regulatory_attribute', undef, $db); + + + + if($full_delete){ + #Now delete meta entries + #This is messy as we use the following meta_key nomencalture + #which do not match the fset names + #regbuild.feature_set_ids_v5 + #regbuild.feature_type_ids_v5 + #regbuild.focus_feature_set_ids + #regbuild.initial_release_date_v6 + #regbuild.last_annotation_update_v6 + #regbuild.version NEED TO ADD THIS + #Also need to revise how these are generated by build_reg_feats. + #WHat about new cell_type level feature sets? + #How will we model these in the meta table? + + warn "Need to revise meta table entries before we add a delete here, remove manually for now for:\t".$fset->name; + + #We would only remove meta entries if we are performing a full rollback + my $version; + ($version = $fset->name) =~ s/.*_v([0-9]+)$/$1/; + $version = ($version eq $fset->name) ? '' : "_v${version}"; + + #These are versionless meta_keys and apply to all sets + #handle these in reg build script + #'regbuild.initial_release_date', + #'regbuild.last_annotation_update' + #'regbuild.version' + + foreach my $mkey('regbuild.%s.feature_set_ids', + 'regbuild.%s.feature_type_ids', + 'regbuild.%s.focus_feature_set_ids'){ + + my $meta_key = sprintf($mkey, $fset->cell_type->name).$version; + $sql = "DELETE from meta where meta_key='${meta_key}'"; + $self->rollback_table($sql, 'meta', undef, $db); + } + } + } + + + #Need to remove object xrefs here + #Do not remove xrefs as these may be used by something else! + $sql = "DELETE ox from object_xref ox, $table f where ox.ensembl_object_type='".ucfirst($fset->feature_class)."Feature' and ox.ensembl_id=f.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join; + $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db); + + + #Remove associated_feature_type records + #Do not remove actual feature_type records as they may be used by something else. + + $sql ="DELETE aft from associated_feature_type aft, $table f where f.feature_set_id=".$fset->dbID." and f.${table}_id=aft.table_id and aft.table_name='".$fset->feature_class."_feature'".$slice_join; + $self->rollback_table($sql, 'associated_feature_type', undef, $db); + + + + #Remove features + $sql = "DELETE f from $table f where f.feature_set_id=".$fset->dbID.$slice_join; + $self->rollback_table($sql, $table, "${table}_id", $db); + + if($full_delete){ #Also delete feature/data_set records + + $sql = "DELETE from feature_set where feature_set_id=".$fset->dbID; + $self->rollback_table($sql, 'feature_set', 'feature_set_id', $db); + $self->log("Deleted feature_set entry for:\t".$fset->name); + + + $sql = "DELETE from data_set where feature_set_id=".$fset->dbID; + $self->rollback_table($sql, 'data_set', 'data_set_id', $db); + $self->log("Deleted associated data_set entry for:\t".$fset->name); + } + + return; +} + + +=head2 rollback_ResultSet + + Arg[1] : Bio::EnsEMBL::Funcgen::ResultSet + Arg[2] : Boolean - optional flag to roll back array results + Example : $self->rollback_ResultSet($rset); + Description: Deletes all status. chip_channel and result_set entries for this ResultSet. + Will also rollback_results sets if rollback_results specified. This will also + update or delete associated ResultSets where appropriate. + Returntype : Arrayref containing the ResultSet and associated DataSet which have not been rolled back + Exceptions : Throws if ResultSet not valid + Throws is result_rollback flag specified but associated product FeatureSet found. + Caller : General + Status : At risk + +=cut + +#Need to change slice to slices ref here +#Need to add full rollback, which will specify to remove all sets +#as well as results and +#These params need clarifying as their nature changes between input_set and array rsets +#Don't we always want to rollback_results? +#force should only really be used to rollback InputSet ResultFeature sets +#i.e. Read collections which are not used as direct input for the linked product FeatureSet +#This should fail with array data associated with a product feature set + +#Do we want to separate ResultFeature rollback from result rollback? +#Currently the array based collection rollback is done by hand +#Could be done via the ResultFeature Collector, but should probably use this method. + + +#rollback_results is only used in the MAGE parser to identify sets which have an +#associated product fset. +#Can't really separate due to integrated functionality + +sub rollback_ResultSet{ + my ($self, $rset, $rollback_results, $slice, $force, $full_delete) = @_; + + if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){ + throw('Must provide a valid stored Bio::EnsEMBL::ResultSet'); + } + + if($slice && $rset->table_name ne 'input_set'){ + throw('Can only rollback_ResultSet by Slice if the ResultSet contains InputSets'); + } + + #We're still validating against itself?? + #And reciprocating part of the test :| + my $sql; + my $db = $rset->adaptor->db;#This needs to be tested + $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset); + $self->log("Rolling back ResultSet:\t".$rset->name); + my $dset_adaptor = $self->db->get_DataSetAdaptor; + my $rset_adaptor = $self->db->get_ResultSetAdaptor; + my @skipped_sets; + + ### Check if this ResultSet is part of a DataSet with a product feature set + + foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($rset)}){ + + if (defined $dset){ + $self->log('Found linked DataSet('.$dset->name.") for ResultSet:\t".$rset->log_label); + + if(my $fset = $dset->product_FeatureSet){ + @skipped_sets = ($rset,$dset); + + #What impact does this have on result_rollback? + #None as we never get there + #But what if we have specified rollback results? + #We should throw here as we can't perform the rollback + + if($rollback_results){ + + if($rset->table_name ne 'input_set' || + (! $force)){#is an input_set/reads collection + #This will always throws for non-input_set ResultSets + + throw("Could not rollback supporting ResultSet and results for:\t".$rset->log_label. + "\nEither manually resolve the supporting/feature set relationship or set the 'force' flag.\n"); + # ."Alternatively omit the rollback_results argument if you simply want to redefine the ResultSet without loading any new data"); + #This last bit is no longer true + #Remove rollback_results? + } + else{ + @skipped_sets = (); + $self->log("Forcing results rollback for InputSet based ResultSet:\t".$rset->log_label); + } + } + + if(@skipped_sets){ + $self->log('Skipping rollback. Found product FeatureSet('.$fset->name.") for supporting ResultSet:\t".$rset->log_label); + } + + } + elsif((! defined $slice) && + $full_delete){ + #Found rset in dset, but not yet processed so can remove safely. + $self->unlink_ResultSet_DataSet($rset, $dset); + } + } + } + + + #Now do similar for all associated ResultSets + if(! @skipped_sets){ + + + #Rollback results if required + if($rollback_results){ + + $self->log("Rolling back results for ResultSet:\t".$rset->log_label); + #Check result_set_input_ids are present in other result sets. + my @assoc_rsets = @{$rset_adaptor->fetch_all_linked_by_ResultSet($rset)}; + my $feature_supporting = 0; + + foreach my $assoc_rset(@assoc_rsets){ + + foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($assoc_rset)}){ + + #Check for other product_FeatureSets + if(my $fset = $dset->product_FeatureSet){ + $feature_supporting++; + $self->log('Found product FeatureSet('.$fset->name. + ") for associated supporting ResultSet:\t".$rset->log_label); + + if($rset->table_name ne 'input_set' || + (! $force)){#is an input_set/reads collection + $feature_supporting++; + } + } + } + } + + + if(! $feature_supporting){ + + #RollBack result_feature table first + $self->rollback_ResultFeatures($rset, $slice); + + #Now rollback other states + $rset->adaptor->revoke_states($rset); + + + #This also handles Echip status rollback + if ($rset->table_name ne 'input_set'){ + $self->log("Rolling back result table for ResultSet:\t".$rset->log_label); + $self->rollback_results($rset->result_set_input_ids); + } + + $self->log('Removing result_set_input entries from associated ResultSets') if @assoc_rsets; + + if((! $slice) && + $full_delete){ + + #Now remove result_set_input_ids from associated rsets. + foreach my $assoc_rset(@assoc_rsets){ + $sql = 'DELETE from result_set_input where result_set_id='.$assoc_rset->dbID. + ' and result_set_input_id in('.join', ', @{$assoc_rset->result_set_input_ids}.')'; + $db->dbc->do($sql); + + # we need to delete complete subsets from the result_set table. + my $subset = 1; + + foreach my $cc_id(@{$assoc_rset->result_set_input_ids}){ + + if(! grep { /$cc_id/ } @{$rset->result_set_input_ids}){ + $subset = 0; + last; + } + } + + #$assoc_rset is complete subset of $rset so can delete + #We know this does not have an assoicated product feature set + #Only if it is not derived from an input_set + if($subset){ + $self->log("Deleting associated subset ResultSet:\t".$assoc_rset->log_label); + + #Delete status entries first + $assoc_rset->adaptor->revoke_states($assoc_rset); + + #All cc records will have already been deleted + $sql = 'DELETE from result_set where result_set_id='.$assoc_rset->dbID; + $db->dbc->do($sql); + } + } + } + + + #Now warn about Echips in Experiments which may need removing. + if($rset->table_name ne 'input_set'){ + my %experiment_chips; + + foreach my $echip(@{$rset->get_ExperimentalChips}){ + $experiment_chips{$echip->experiment->name}{$echip->unique_id} = undef; + } + + foreach my $exp(keys %experiment_chips){ + $self->log("Experiment $exp has had ".scalar(values %{$experiment_chips{$exp}}). + " ExperimentalChips rolled back:\t".join('; ', values %{$experiment_chips{$exp}}). + ".\nTo fully remove these, use the rollback_experiment.pl (with -chip_ids) script"); + } + } + else{ + #Should only be one to rollback + foreach my $iset(@{$rset->get_InputSets}){ + $self->rollback_InputSet($iset); + } + } + } + else{ + #$self->log("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label); + #warn("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label); + #do we need to return this info in skipped_rsets? + #This is just to allow importer to know which ones + #weren't rolled back to avoid naming clashes. + #so no. + + #But the results persist on the same chip_channel_ids + #So not returning this rset may result in loading of more data + #This should fail as status entries will not have been removed + #Still we should throw here as we'll most likely want to manually resolve this + #Besides this would be obfuscating the function + + throw("Could not rollback ResultSet and results, found $feature_supporting associated supporting ". + "ResultSets for:\t".$rset->log_label."\nManually resolve the supporting/feature set relationship or omit the ". + "rollback_results argument if you simply want to redefine the ResultSet without loading any new data"); + } + } + else{ + $self->log('Skipping results rollback'); + + if($rset->name =~ /_IMPORT$/){ + throw("Rolling back an IMPORT set without rolling back the result can result in ophaning result records for a whole experiment. Specify the result_rollback flag if you want to rollback the results for:\t".$rset->log_label); + } + } + + #Delete chip_channel and result_set records + #This should only be done with full delete + if((! $slice) && + $full_delete){ + $sql = 'DELETE from result_set_input where result_set_id='.$rset->dbID; + $self->rollback_table($sql, 'result_set_input', 'result_set_input_id', $db); + + $sql = 'DELETE from result_set where result_set_id='.$rset->dbID; + $db->dbc->do($sql); + $self->rollback_table($sql, 'result_set', 'result_set_id', $db); + } + } + + return \@skipped_sets; +} + + + +sub unlink_ResultSet_DataSet{ + my ($self, $rset, $dset, $new_name) = @_; + + #validate set vars + + my $db = $rset->adaptor->db; + + $self->log("Removing supporting ResultSet from DataSet:\t".$dset->name."\tResultSet:".$rset->log_label); + my $sql = 'DELETE from supporting_set where data_set_id='.$dset->dbID. + ' and type="result" and supporting_set_id='.$rset->dbID; + + warn "Removing ".$rset->log_label." as a supporting set to DataSet:\t".$dset->name. + "\nThis may result in a DataSet with no supporting sets"; + $db->dbc->do($sql); + + if($new_name){ + #We risk overwriting any previously renamed result sets. + #Should use datestamp? + $sql = 'UPDATE result_set set name="OLD_'.$rset->name.'" where result_set_id='.$rset->dbID; + $self->db->dbc->do($sql); + + if($dset->product_FeatureSet){ + $self->log('Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data'); + warn 'Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data'; + } + } + + return; +} + +=head2 rollback_InputSet + + Arg[1] : Bio::EnsEMBL::Funcgen::InputSet + Example : $self->rollback_InputSet($eset); + Description: Deletes all status entries for this InputSet and it's Subsets + Returntype : none + Exceptions : Throws if any deletes fails or if db method unavailable + Caller : Importers and Parsers + Status : At risk + +=cut + + +sub rollback_InputSet{ + my ($self, $eset, $force_delete, $full_delete) = @_; + + + #Need to implement force_delete!!!!!!!!!!!!!!!!!!!!!! + #Need to check this is not used in a DataSet/ResultSet + + my $adaptor = $eset->adaptor || throw('InputSet must have an adaptor'); + my $db = $adaptor->db; + + + $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::InputSet', $eset); + + $self->log("Rolling back InputSet:\t".$eset->name); + + #SubSets + foreach my $esset(@{$eset->get_InputSubsets}){ + $esset->adaptor->revoke_states($esset); + } + + #InputSet + $eset->adaptor->revoke_states($eset); + + return; +} + + +=head2 rollback_results + + Arg[1] : Arrayref of chip_channel ids + Example : $self->rollback_results($rset->chip_channels_ids); + Description: Deletes all result records for the given chip_channel ids. + Also deletes all status records for associated experimental_chips or channels + Returntype : None + Exceptions : Throws if no chip_channel ids provided + Caller : General + Status : At risk + +=cut + +#changed implementation to take arrayref + +sub rollback_results{ + my ($self, $cc_ids) = @_; + + my @cc_ids = @{$cc_ids}; + + #Need to test for $self->db here? + + + if(! scalar(@cc_ids) >0){ + throw('Must pass an array ref of result_set_input_ids to rollback'); + } + + #Rollback status entries + #Cannot use revoke_states here? + #We can if we retrieve the Chip or Channel first + #Add to ResultSet adaptor + my $sql = 'DELETE s from status s, result_set_input rsi WHERE rsi.result_set_input_id IN ('.join(',', @cc_ids). + ') AND rsi.table_id=s.table_id AND rsi.table_name=s.table_name'; + + if(! $self->db->dbc->do($sql)){ + throw("Status rollback failed for result_set_input_ids:\t@cc_ids\n".$self->db->dbc->db_handle->errstr()); + } + + + #Rollback result entries + $sql = 'DELETE from result where result_set_input_id in ('.join(',', @cc_ids).');'; + $self->rollback_table($sql, 'result', 'result_id', $self->db); + return; +} + + +=head2 rollback_ResultFeatures + + Arg[0] : Bio::EnsEMBL::Funcgen::ResultSet + Arg[1] : Optional - Bio::EnsEMBL::Slice + Arg[2] : Optional - no_revoke Boolean. This is only used when generating new windows + from a 0 window size which has been projected from a previous assembly. + Example : $self->rollback_result_features($rset); + Description: Deletes all result_feature records for the given ResultSet. + Also deletes 'RESULT_FEATURE_SET' status. + Returntype : None + Exceptions : Throws if ResultSet not provided + Caller : General + Status : At risk + +=cut + + +sub rollback_ResultFeatures{ + my ($self, $rset, $slice, $no_revoke) = @_; + + if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){ + throw('Must provide a valid stored Bio::EnsEMBL::ResultSet'); + } + + if(! $slice && $no_revoke){ + throw("Cannot rollback_ResultFeatures with no_reovke unless you specify a Slice"); + } + #else warn if slice and no_revoke? + + my ($sql, $slice_name, $slice_constraint); + + if($slice){ + + if(ref($slice) && $slice->isa('Bio::EnsEMBL::Slice')){ + my $sr_id = $rset->adaptor->db->get_ResultFeatureAdaptor->get_seq_region_id_by_Slice($slice); + + if($sr_id){ + + #Need to test for full slice here + my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name); + $slice_name = "\t".$slice->name; + $slice_constraint = ' and seq_region_id='.$sr_id; + + if(($slice->start != 1) || + ($slice->end != $full_slice->end)){ + + throw("rollback_ResultFeatures does not yet support non-full length Slices:\t".$slice_name); + + #Need to test whether we have non-0 wsize collections without the exact seq_region values + #$sql='SELECT window_size from result_feature where result_feature_id='.$rset->dbID. + # ' and window_size!=0 and seq_region_start!='.$slice->start.' and seq_region_end!='.$slice->end.$slice_constraint; + } + } + else{#seq_region is not yet present in DB + return; + } + } + else{ + throw('slice argument must be a valid Bio::EnsEMBL::Slice'); + } + } + + #We're still validating against itself?? + #And reciprocating part of the test :| + my $db = $rset->adaptor->db; + $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset); + + #Do this conditionally on whether it is a result_feature_set? + #This may break if we have removed the status but not finished the rollback so no! + $self->log("Rolling back result_feature table for ResultSet:\t".$rset->name.$slice_name); + + #Rollback status entry + if($rset->has_status('RESULT_FEATURE_SET') && ! $no_revoke){ + $rset->adaptor->revoke_status('RESULT_FEATURE_SET', $rset); + } + + #Cannot use revoke_states here? + #We can if we retrieve the Chip or Channel first + #Add to ResultSet adaptor + $sql = 'DELETE from result_feature where result_set_id='.$rset->dbID.$slice_constraint; + $self->rollback_table($sql, 'result_feature', 'result_feature_id', $db); + + return; +} + + + +=head2 rollback_ArrayChips + + Arg[1] : ARRAYREF: Bio::EnsEMBL::Funcgen::ArrayChip objects + Example : $self->rollback_ArrayChips([$achip1, $achip2]); + Description: Deletes all Probes, ProbeSets, ProbeFeatures and + states associated with this ArrayChip + Returntype : None + Exceptions : Throws if ArrayChip not valid and stored + Throws if ArrayChips are not of same class + Caller : General + Status : At risk + +=cut + +#This should be tied to a CS id!!! +#And analysis dependant? +#We may not want to delete alignment by different analyses? +#In practise the slice methods ignore analysis_id for this table +#So we currently never use this! +#So IMPORTED status should be tied to CS id and Analysis id? + +sub rollback_ArrayChips{ + my ($self, $acs, $mode, $force, $keep_xrefs, $no_clean_up, $force_clean_up) = @_; + + #no_clean_up and force_clean_up allow analyze/optimize to be skipped until the last rollback + #We could get around this by specifying all ArrayChips for all formats at the same time? + #Need to implement in RollbackArrays + + $mode ||= 'probe'; + + if($mode && ($mode ne 'probe' && + $mode ne 'probe_feature' && + $mode ne 'ProbeAlign' && + $mode ne 'ProbeTranscriptAlign' && + $mode ne 'probe2transcript')){ + throw("You have passed an invalid mode argument($mode), you must omit or specify either 'probe2transcript', 'probe', 'ProbeAlign, 'ProbeTranscriptAlign' or 'probe_feature' for all of the Align output"); + } + + if($force && ($force ne 'force')){ + throw("You have not specified a valid force argument($force), you must specify 'force' or omit"); + } + + if($keep_xrefs && ($keep_xrefs ne 'keep_xrefs')){ + throw("You have not specified a valid keep_xrefs argument($keep_xrefs), you must specify 'keep_xrefs' or omit"); + } + + + if($keep_xrefs){ + + if($mode eq 'probe' || $mode eq 'probe2transcript'){ + throw("You cannot specify 'keep_xrefs' with mode $mode, you can only rollback features e.g. probe_feature, ProbeAlign or ProbeTranscriptAlign"); + } + + if($force){ + throw("You cannot 'force' delete the probe2transcript xrefs and 'keep_xrefs' at the same time. Please specify just one."); + } + } + + + + + my ($adaptor, $db, %classes); + + foreach my $ac(@$acs){ + $adaptor ||= $ac->adaptor || throw('ArrayChip must have an adaptor'); + $db ||= $adaptor->db; + $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ArrayChip', $ac); + + if(! $ac->get_Array->class){ + throw('The ArrayChip you are trying to rollback does not have a class attribute'); + } + + + $classes{$ac->get_Array->class} = undef; + + #if($class && ($class ne $ac->get_Array->class)){ + # throw('You can only rollback_ArrayChips for ArrayChips with the same class'); + #} + } + + + #This is always the case as we register the association before we set the Import status + #Hence the 2nd stage of the import fails as we have an associated ExperimentalChip + #We need to make sure the ExperimentalChip and Channel have not been imported!!! + warn "NOTE: rollback_ArrayChips. Need to implement ExperimentlChip check, is the problem that ExperimentalChips are registered before ArrayChips imported?"; + #Check for dependent ExperimentalChips + #if(my @echips = @{$db->get_ExperimentalChipAdaptor->fetch_all_by_ArrayChip($ac)}){ +# my %exps; +# my $txt = "Experiment\t\t\t\tExperimentalChip Unique IDs\n"; + +# foreach my $ec(@echips){ +# $exps{$ec->get_Experiment->name} ||= ''; + +# $exps{$ec->get_Experiment->name} .= "\t".$ec->unique_id; +# } + +# map {$txt.= "\t".$_.":".$exps{$_}."\n"} keys %exps; + +# throw("Cannot rollback ArrayChip:\t".$ac->name. +# "\nFound Dependent Experimental Data:\n".$txt); +# } + + + my $ac_names = join(', ', (map { $_->name } @$acs)); + my $ac_ids = join(', ', (map { $_->dbID } @$acs)); + + + $self->log("Rolling back ArrayChips $mode entries:\t$ac_names"); + my ($row_cnt, $probe_join, $sql); + #$ac->adaptor->revoke_states($ac);#This need to be more specific to the type of rollback + my $species = $db->species; + + if(!$species){ + throw('Cannot rollback probe2transcript level xrefs without specifying a species for the DBAdaptor'); + } + #Will from registry? this return Homo sapiens? + #Or homo_sapiens + ($species = lc($species)) =~ s/ /_/; + + my $transc_edb_name = "${species}_core_Transcript"; + my $genome_edb_name = "${species}_core_Genome"; + + #Maybe we want to rollback ProbeAlign and ProbeTranscriptAlign output separately so we + #can re-run just one part of the alignment step. + + + #We want this Probe(Transcript)Align rollback available in the environment + #So we can do it natively and before we get to the RunnableDB stage, + #where we would be trying multiple rollbacks in parallel + #Wrapper script? + #Or do we keep it simple here and maintain probe_feature wide rollback + #And just the ProbeAlign/ProbeTranscriptAlign roll back in the environment? + + + #We can restrict the probe deletes using the ac_id + #We should test for other ac_ids using the same probe_id + #Then fail unless we have specified force delete + + #These should be deleted for all other modes but only if force is set? + #This may delete xrefs for other ArrayChips + + #The issues is if we need to specify force for one delete but don't want to delete something else? + #force should only be used to delete upto and including the mode specified + #no mode equates to probe mode + #if no force then we fail if previous levels/modes have xrefs etc... + + + #Let's grab the edb ids first and use them directly, this will avoid table locks on edb + #and should also speed query up? + + + if($mode eq 'probe2transcript' || + $force){ + + #Delete ProbeFeature UnmappedObjects + $self->log("Deleting probe2transcript ProbeFeature UnmappedObjects"); + $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, probe_feature pf, external_db e WHERE a.logic_name ='probe2transcript' AND a.analysis_id=uo.analysis_id AND p.probe_id=pf.probe_id and pf.probe_feature_id=uo.ensembl_id and uo.ensembl_object_type='ProbeFeature' and uo.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)"; + $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); + + + #Delete ProbeFeature Xrefs/DBEntries + $self->log("Deleting probe2transcript ProbeFeature Xrefs"); + $sql = "DELETE ox FROM xref x, object_xref ox, probe p, probe_feature pf, external_db e WHERE x.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND x.xref_id=ox.xref_id AND ox.ensembl_object_type='ProbeFeature' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND ox.linkage_annotation!='ProbeTranscriptAlign' AND p.array_chip_id IN($ac_ids)"; + $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); + + + #Probe/Set specific entries + for my $xref_object('Probe', 'ProbeSet'){ + $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id'; + + #Delete Probe/Set UnmappedObjects + + $self->log("Deleting probe2transcript $xref_object UnmappedObjects"); + + $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, external_db e WHERE a.logic_name='probe2transcript' AND a.analysis_id=uo.analysis_id AND uo.ensembl_object_type='${xref_object}' AND $probe_join=uo.ensembl_id AND uo.external_db_id=e.external_db_id AND e.db_name='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)"; + #.' and edb.db_release="'.$schema_build.'"'; + $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); + + #Delete Probe/Set Xrefs/DBEntries + $sql = "DELETE ox FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' AND ox.ensembl_object_type='${xref_object}' AND ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)"; + $self->log("Deleting probe2transcript $xref_object xref records"); + $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); + } + } + elsif(! $keep_xrefs){#Need to check for existing xrefs if not force + #we don't know whether this is on probe or probeset level + #This is a little hacky as there's not way we can guarantee this xref will be from probe2transcript + #until we get the analysis_id moved from identity_xref to xref + #We are also using the Probe/Set Xrefs as a proxy for all other Xrefs and UnmappedObjects + #Do we need to set a status here? Would have problem rolling back the states of associated ArrayChips + + for my $xref_object('Probe', 'ProbeSet'){ + + $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id'; + + $row_cnt = $db->dbc->db_handle->selectrow_array("SELECT COUNT(*) FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' and ox.ensembl_object_type='${xref_object}' and ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)"); + + if($row_cnt){ + throw("Cannot rollback ArrayChips($ac_names), found $row_cnt $xref_object Xrefs. Pass 'force' argument or 'probe2transcript' mode to delete"); + } + else{ + #$self->log("Found $row_cnt $xref_object Xrefs"); + } + } + } + + + #ProbeFeatures inc ProbeTranscriptAlign xrefs + + if($mode ne 'probe2transcript'){ + + if(($mode eq 'probe' && $force) || + $mode eq 'probe_feature' || + $mode eq 'ProbeAlign' || + $mode eq 'ProbeTranscriptAlign'){ + + + #Should really revoke some state here but we only have IMPORTED + + #ProbeTranscriptAlign Xref/DBEntries + + #my (@anal_ids) = @{$db->get_AnalysisAdaptor->generic_fetch("a.module='ProbeAlign'")}; + #Grrrr! AnalysisAdaptor is not a standard BaseAdaptor implementation + #my @anal_ids = @{$db->dbc->db_handle->selectall_arrayref('select analysis_id from analysis where module like "%ProbeAlign"')}; + #@anal_ids = map {$_= "@$_"} @anal_ids; + + if($mode ne 'ProbeAlign'){ + my $lnames = join(', ', (map { "'${_}_ProbeTranscriptAlign'" } keys(%classes))); + + $sql = "DELETE ox from object_xref ox, xref x, probe p, probe_feature pf, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; + $self->log("Deleting ProbeFeature Xref/DBEntry records for:\t$lnames"); + $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); + + + #Can't include uo.type='ProbeTranscriptAlign' in these deletes yet as uo.type is enum'd to xref or probe2transcript + #will have to join to analysis and do a like "%ProbeTranscriptAlign" on the the logic name? + #or/and ur.summary_description='Promiscuous probe'? + + $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name in (${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${transc_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; + + $self->log("Deleting UnmappedObjects for:\t${lnames}"); + $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); + + + #Now the actual ProbeFeatures + $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; + $self->log("Deleting ProbeFeatures for:\t${lnames}"); + $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up); + } + + if($mode ne 'ProbeTranscriptAlign'){ + my $lnames = join(', ', (map { "'${_}_ProbeAlign'" } keys(%classes))); + + $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name=(${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${genome_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; + $self->log("Deleting UnmappedObjects for:\t${lnames}"); + $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); + + + $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; + $self->log("Deleting ProbeFeatures for:\t${lnames}"); + $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up); + } + } + else{ + #Need to count to see if we can carry on with a unforced probe rollback? + #Do we need this level of control here + #Can't we assume that if you want probe you also want probe_feature? + #Leave for safety, at least until we get the dependant ExperimetnalChip test sorted + #What about if we only want to delete one array from an associated set? + #This would delete all the features from the rest? + + $sql = "select count(*) from object_xref ox, xref x, probe p, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; + $row_cnt = $db->dbc->db_handle->selectrow_array($sql); + + if($row_cnt){ + throw("Cannot rollback ArrayChips($ac_names), found $row_cnt ProbeFeatures. Pass 'force' argument or 'probe_feature' mode to delete"); + } + else{ + $self->log("Found $row_cnt ProbeFeatures"); + } + } + + if($mode eq 'probe'){ + #Don't need to rollback on a CS as we have no dependant EChips? + #Is this true? Should we enforce a 3rd CoordSystem argument, 'all' string we delete all? + + foreach my $ac(@$acs){ + $ac->adaptor->revoke_states($ac);#Do we need to change this to revoke specific states? + #Current states are only IMPORTED, so not just yet, but we could change this for safety? + } + + #ProbeSets + $sql = "DELETE ps from probe p, probe_set ps where p.array_chip_id IN($ac_ids) and p.probe_set_id=ps.probe_set_id"; + $self->rollback_table($sql, 'probe_set', 'probe_set_id', $db, $no_clean_up); + + #Probes + $sql = "DELETE from probe where array_chip_id IN($ac_ids)"; + $self->rollback_table($sql, 'probe', 'probe_id', $db, $no_clean_up); + } + } + + $self->log("Finished $mode roll back for ArrayChip:\t$ac_names"); + return; +} + + +#This will just fail silently if the reset value +#Is less than the true autoinc value +#i.e. if there are parallel inserts going on +#So we can never assume that the $new_auto_inc will be used + + +sub rollback_table{ + my ($self, $sql, $table, $id_field, $db, $no_clean_up, $force_clean_up) = @_; + + my $row_cnt; + eval { $row_cnt = $db->dbc->do($sql) }; + + if($@){ + throw("Failed to rollback table $table using sql:\t$sql\n$@"); + } + + $row_cnt = 0 if $row_cnt eq '0E0'; + $self->log("Deleted $row_cnt $table records"); + + if($force_clean_up || + ($row_cnt && ! $no_clean_up)){ + $self->refresh_table($table, $id_field, $db); + } + + return; +} + +#Now separated so that we can do this once at the end of a rollback of many Sets + +sub refresh_table{ + my ($self, $table, $id_field, $db) = @_; + + #This only works if the new calue is available + #i.e. do not need lock for this to be safe + $self->reset_table_autoinc($table, $id_field, $db) if $id_field; + + $self->log("Optimizing and Analyzing $table"); + + $db->dbc->do("optimize table $table");#defrag data, sorts indices, updates table stats + $db->dbc->do("analyze table $table");#analyses key distribution + + return; +} + + + +sub reset_table_autoinc{ + #Is this called elsewhere or can we merge with + my($self, $table_name, $autoinc_field, $db) = @_; + + if(! ($table_name && $autoinc_field && $db)){ + throw('You must pass a table_name and an autoinc_field to reset the autoinc value'); + } + + if(! (ref($db) && $db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){ + throw('Must pass a valid Bio::EnsEMBL::DBSQL::DBAdaptor'); + } + + #my $sql = "show table status where name='$table_name'"; + #my ($autoinc) = ${$db->dbc->db_handle->selectrow_array($sql)}[11]; + #11 is the field in the show table status table + #We cannot select just the Auto_increment, so this will fail if the table format changes + + #Why do we need autoinc here? + + my $sql = "select $autoinc_field from $table_name order by $autoinc_field desc limit 1"; + my ($current_auto_inc) = $db->dbc->db_handle->selectrow_array($sql); + my $new_autoinc = ($current_auto_inc) ? ($current_auto_inc + 1) : 1; + $sql = "ALTER TABLE $table_name AUTO_INCREMENT=$new_autoinc"; + $db->dbc->do($sql); + return; +} + + + + +=head2 get_core_display_name_by_stable_id + + Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor + Args [2] : stable ID from core DB. + Args [3] : stable feature type e.g. gene, transcript, translation + Example : $self->validate_and_store_feature_types; + Description: Builds a cache of stable ID to display names. + Returntype : string - display name + Exceptions : Throws is type is not valid. + Caller : General + Status : At risk + +=cut + +# -------------------------------------------------------------------------------- +# Build a cache of ensembl stable ID -> display_name +# Return hashref keyed on {$type}{$stable_id} +#Need to update cache if we're doing more than one 'type' at a time +# as it will never get loaded for the new type! + +sub get_core_display_name_by_stable_id{ + my ($self, $cdb, $stable_id, $type) = @_; + + $type = lc($type); + + if($type !~ /(gene|transcript|translation)/){ + throw("Cannot get display_name for stable_id $stable_id with type $type"); + } + + if(! exists $self->{'display_name_cache'}->{$stable_id}){ + ($self->{'display_name_cache'}->{$stable_id}) = $cdb->dbc->db_handle->selectrow_array("SELECT x.display_label FROM $type t, xref x where t.display_xref_id=x.xref_id and t.stable_id='${stable_id}'"); + } + + return $self->{'display_name_cache'}->{$stable_id}; +} + + +=head2 get_core_stable_id_by_display_name + + Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor + Args [2] : display name (e.g. from core DB or GNC name) + Example : + Description: Builds a cache of stable ID to display names. + Returntype : string - gene stable ID + Exceptions : None + Caller : General + Status : At risk + +=cut + +# -------------------------------------------------------------------------------- +# Build a cache of ensembl stable ID -> display_name +# Return hashref keyed on {$type}{$stable_id} +#Need to update cache if we're doing more than one 'type' at a time +# as it will never get loaded for the new type! + +sub get_core_stable_id_by_display_name{ + my ($self, $cdb, $display_name) = @_; + + #if($type !~ /(gene|transcript|translation)/){ +# throw("Cannot get display_name for stable_id $stable_id with type $type"); +# } + + if(! exists $self->{'stable_id_cache'}->{$display_name}){ + ($self->{'stable_id_cache'}->{$display_name}) = $cdb->dbc->db_handle->selectrow_array("SELECT g.stable_id FROM gene g, xref x where g.display_xref_id=x.xref_id and and x.display_label='${display_name}'"); + } + + return $self->{'stable_id_cache'}->{$display_name}; +} + + + + + + +1; +