ensembl: variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/Helper.pm comparison

comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/Helper.pm @ 0:1f6dce3d34e0

Uploaded

author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:1f6dce3d34e0
+=head1 LICENSE
+Copyright (c) 1999-2011 The European Bioinformatics Institute and
+Genome Research Limited.  All rights reserved.
+This software is distributed under a modified Apache license.
+For license details, please see
+http://www.ensembl.org/info/about/code_licence.html
+=head1 CONTACT
+Please email comments or questions to the public Ensembl
+developers list at <ensembl-dev@ebi.ac.uk>.
+Questions may also be sent to the Ensembl help desk at
+<helpdesk@ensembl.org>.
+=head1 NAME
+Bio::EnsEMBL::Funcgen::Utils::Helper
+=head1 SYNOPSIS
+e.g.
+my $object = Bio::EnsEMBL::Object->new
+(
+logging     => 1,
+log_file    => "/tmp/Misc.log",
+debug_level => 2,
+debug_file  => "/tmp/Misc.dbg",
+);
+$object->log("This is a log message.");
+$object->debug(1,"This is a debug message.");
+$object->system("rmdir /tmp/test");
+----------------------------------------------------------------------------
+=head1 OPTIONS
+=over 8
+=item B<-debug>
+Turns on and defines the verbosity of debugging output, 1-3, default = 0 = off
+=over 8
+=item B<-log_file|l>
+Defines the log file, default = "${instance}.log"
+=item B<-help>
+Print a brief help message and exits.
+=item B<-man>
+Prints the manual page and exits.
+=back
+=head1 DESCRIPTION
+B<This program> performs several debugging and logging functions, aswell as providing several inheritable EFGUtils methods.
+=cut
+################################################################################
+package Bio::EnsEMBL::Funcgen::Utils::Helper;
+use Bio::Root::Root;
+use Data::Dumper;
+use Bio::EnsEMBL::Utils::Exception qw (throw stack_trace);
+use Bio::EnsEMBL::Utils::Argument qw( rearrange );
+use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (get_date);
+#use Devel::Timer;
+use Carp;#? Can't use unless we can get it to redirect
+use File::Basename;
+use strict;
+use vars qw(@ISA);
+@ISA = qw(Bio::Root::Root);
+#List of valid rollback levels
+#To be used in conjunction with -full_delete
+my @rollback_tables = ('data_set', 'feature_set', 'result_set', 'input_set', 'experiment', 'array', 'array_chip', 'experimental_chip');
+#Some local filevars to avoid assigning to package typeglobs
+my ($DBGFILE, $LOGFILE);
+################################################################################
+=head2 new
+Description : Constructor method to create a new object with passed or
+default attributes.
+Arg  [1]    : hash containing optional attributes :-
+log_file    - name of log file (default = undef -> STDOUT)
+debug_level - level of detail of debug message [1-3] (default = 0 = off)
+debug_file  - name of debug file (default = undef -> STDERR)
+ReturnType  : Helper
+Example     : my $Helper = new Bio::EnsEMBL::Helper(
+debug_level => 3,
+debug_file  => "/tmp/efg.debug",
+log_file    => "/tmp/efg.log",
+);
+Exceptions  : throws exception if failed to open debug file
+: throws exception if failed to open log   file
+=cut
+################################################################################
+#To do , change to rearrange
+sub new{
+my ($caller, %args) = @_;
+my ($self, %attrdata, $argname);
+my $class = ref($caller) || $caller;
+#Create object from parent class
+$self = $class->SUPER::new(%args);
+	#we need to mirror ensembl behaviour here
+	#use rearrange and set default afterwards if not defined
+# objects private data and default values
+	#Not all of these need to be in main
+%attrdata = (
+				 _tee          => $main::_tee,
+				 _debug_level  => $main::_debug_level,
+				 _debug_file   => $main::_debug_file,
+				 _log_file     => $main::_log_file,#default should be set in caller
+				 _no_log       => $main::_no_log,#suppresses log file generation if log file not defined
+				 _default_log_dir => $main::_default_log_dir,
+		);
+# set each class attribute using passed value or default value
+foreach my $attrname (keys %attrdata){
+	  ($argname = $attrname) =~ s/^_//; # remove leading underscore
+	  $self->{$attrname} = (exists $args{$argname}) ? $args{$argname} : $attrdata{$attrname};
+}
+	$self->{'_tee'} = 1 if $self->{'_no_log'};
+	#should we undef log_file here too?
+	#This currently only turns off default logging
+	$self->{_default_log_dir} ||= $ENV{'HOME'}.'/logs';
+	$self->{'_report'} = [];
+# DEBUG OUTPUT & STDERR
+	#should default to lowest or highest debug level here!
+if(defined $self->{_debug_level} && $self->{_debug_level}){
+$main::_debug_level = $self->{_debug_level};
+if(defined $self->{_debug_file}){
+			$main::_debug_file = $self->{_debug_file};
+open($DBGFILE, '>>', $self->{_debug_file})
+			  or throw("Failed to open debug file : $!");
+			#open (DBGFILE, "<STDERR | tee -a ".$self->{_debug_file});#Mirrors STDERR to debug file
+}
+else{
+open($DBGFILE, '>&STDERR');
+}
+select $DBGFILE; $| = 1;  # make debug file unbuffered
+$self->debug(1,"Debugging started ".localtime()." on $0 at level ".$self->{_debug_level}." ...");
+}
+	my $log_file =  $self->{_log_file};
+	# LOG OUTPUT
+	if (defined $self->{_log_file}){
+	  #This causes print on unopened file as we try and log in the DESTROY
+	  throw('You have specified mutually exclusive parameters log_file and no_log') if($self->{'_no_log'});
+	  $main::_log_file = $self->{_log_file};
+	  #we need to implment tee here
+	  if($self->{'_tee'}){
+	    open($LOGFILE, ' | tee -a '.$log_file);
+	  }
+	  else{
+		open($LOGFILE, '>>', $log_file)
+		  or throw("Failed to open log file : $log_file\nError: $!");
+	  }
+	}
+	else{
+	  #Change this to get the name of the control script and append with PID.out
+	  #This is to ensure that we always capture output
+	  #We need to also log params
+	  #We will have to call this from the child class.
+	  #Only do this if we don't have supress default logs set
+	  #To avoid loads of loags during testing
+	  if(! $self->{'_no_log'}){
+		my @stack = stack_trace();
+		my $top_level = $stack[$#stack];
+		my (undef, $file) = @{$top_level};
+		$file =~ s/.*\///;
+		$self->run_system_cmd('mkdir '.$self->{_default_log_dir}) if(! -e $self->{_default_log_dir});
+		$self->{'_log_file'} = $self->{_default_log_dir}.'/'.$file.'.'.$$.'.log';
+		warn "No log file defined, defaulting to:\t".$self->{'_log_file'}."\n";
+		#we should still tee here
+		if($self->{'_tee'}){
+		  open($LOGFILE, '| tee -a '.$self->{'_log_file'});
+		}
+		else{
+		  open($LOGFILE, '>', $self->{'_log_file'})
+			or throw('Failed to open log file : '.$self->{'_log_file'}."\nError: $!");
+		}
+	  }
+	  else{
+		#Have to include STD filehandles in operator
+		open($LOGFILE, '>&STDOUT');
+	  }
+	}
+	select $LOGFILE; $| = 1;  # make log file unbuffered
+	$self->log("\n\nLogging started at ".localtime()."...");
+# RESET STDOUT TO DEFAULT
+select STDOUT; $| = 1;
+$self->debug(2,"Helper class instance created.");
+return $self;
+}
+################################################################################
+=head2 DESTROY
+Description : Called by gargbage collection to enable tidy up before object deleted
+ReturnType  : none
+Example     : none - should not be called directly
+Exceptions  : none
+=cut
+################################################################################
+sub DESTROY{
+my ($self) = @_;
+	$self->report;
+if($self->{_log_file}){
+$self->log("Logging complete ".localtime().".");
+		$self->log('Virtual Memory '.`ps -p $$ -o vsz |tail -1`);
+		$self->log('Resident Memory '.`ps -p $$ -o rss |tail -1`);
+		#       close LOGFILE;  # if inherited object then cannot close filehandle !!!
+}
+if($self->{_debug_level}){
+$self->debug(1,"Debugging complete ".localtime().".");
+		#       close DBGFILE;  # if inherited object then cannot close filehandle !!!
+}
+	if(defined $self->{'_timer'}){
+		$self->{'_timer'}->report();
+	}
+	$self->debug(2,"Bio::EnsEMBL::Helper class instance destroyed.");
+return;
+}
+##Need generic method in here to get stack and line info
+###Use Root.pm stack methods!
+# and replace this with caller line method for logging
+sub _get_stack{
+my ($self) = shift;
+#need to resolve this method with that in debug, pass log or debug arg for different format
+my @prog = (caller(2)) ? caller(2) : (caller(1)) ? caller(1) : (undef,"undef",0);
+return "[".localtime()." - ".basename($prog[1]).":$prog[2]]";
+}
+################################################################################
+=head2 log
+Arg[0]      : string  - log message.
+Arg[1]      : boolean - memory usage, appends current process memory stats
+Description : Method to write messages to a previously set up log file.
+Return type : none
+Example     : $root->log("Processing file $filename ...", 1);
+Exceptions  : none
+=cut
+################################################################################
+sub log{
+my ($self, $message, $mem, $date, $no_return) = @_;
+if($mem){
+	$message.= " :: ".`ps -p $$ -o vsz |tail -1`;
+	chomp $message;
+	$message .= " KB";
+}
+if($date){
+	my $time = localtime();
+	chomp($time);
+	$message .= ' - '.localtime();
+}
+$message .= "\n" if ! $no_return;
+print $LOGFILE "::\t$message";
+# Add to debug file if not printing to STDERR?
+# only if verbose?
+# this would double print everything to STDOUT if tee and debug has not redefined STDERR
+$self->debug(1,$message);
+}
+################################################################################
+=head2 report
+Arg[0]      : optional string  - log message.
+Arg[1]      : optional boolean - memory usage, appends current process memory stats
+Description : Wrapper method for log, which also stores message for summary reporting
+Return type : none
+Example     : $root->report("WARNING: You have not done this or that and want it reported at the end of a script");
+Exceptions  : none
+=cut
+################################################################################
+sub report{
+my ($self, $message, $mem) = @_;
+if(defined $message){
+	$self->log($message, $mem);
+	push @{$self->{'_report'}}, $message;
+}
+elsif(scalar(@{$self->{'_report'}})){
+	print $LOGFILE "\n::\tSUMMARY REPORT\t::\n";
+	print $LOGFILE join("\n", @{$self->{'_report'}})."\n";
+	$self->{'_report'} = [];
+}
+return;
+}
+################################################################################
+=head2 log_header
+Arg[0]      : string  - log message.
+Arg[1]      : boolean - memory usage, appends current process memory stats
+Description : Wrapper method to format a log as a header line
+Return type : none
+Example     : $root->log("Processing file $filename ...", 1);
+Exceptions  : none
+=cut
+################################################################################
+sub log_header{
+my ($self, $message, $mem, $date) = @_;
+print $LOGFILE "\n\n";
+$self->log("::\t$message\t::\t::", $mem, $date);
+print $LOGFILE "\n";
+}
+################################################################################
+=head2 debug
+Description : Method to write debug info to a previously set up debug file.
+Over-rides Root.pm on/off style debugging
+Args        : int: debug level and string: log message.
+ReturnType  : none
+Example     : $root->debug(2,"dir=$dir file=$file");
+Exceptions  : none
+=cut
+################################################################################
+sub debug{
+my ($self,$level,$message) = @_;
+#Can we not detect whther message is a scalar, array or hash and Dump or print accordingly?
+my (@call,$cnt,$prog_name,$prog_line,$call_name,$call_line);
+$prog_name = $call_name = "undef";
+$prog_line = $call_line = $cnt = 0;
+# if debug on at the requested level then output the passed message
+if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){
+		######Replace this with Carp method?
+while (@call = caller($cnt++)){
+if ($cnt == 2){
+$call_name = basename($call[1]);
+$call_line = $call[2]
+}
+$prog_name = basename($call[1]);
+$prog_line = $call[2];
+}
+		#This still attempts to print if file not opened
+print $DBGFILE "debug $message\t: [$$ - $prog_name:$prog_line  $call_name:$call_line]\n";
+		#carp("carping $message");
+}
+}
+################################################################################
+=head2 debug_hash
+Description : Method to write the contents of passed hash to debug output.
+Args        : int: debug level and hashref.
+ReturnType  : none
+Example     : $Helper->debug_hash(3,\%hash);
+Exceptions  : none
+=cut
+################################################################################
+sub debug_hash{
+my ($self,$level,$hashref) = @_;
+my ($attr);
+# if debug on at the requested level then output the passed hash
+if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){
+		print $DBGFILE Data::Dumper::Dumper(\$hashref)."\n";
+	}
+}
+################################################################################
+=head2 run_system_cmd
+Description : Method to control the execution of the standard system() command
+ReturnType  : none
+Example     : $Helper->debug(2,"dir=$dir file=$file");
+Exceptions  : throws exception if system command returns none zero
+=cut
+################################################################################
+#Move most of this to EFGUtils.pm
+#Maintain wrapper here with throws, only warn in EFGUtils
+sub run_system_cmd{
+my ($self, $command, $no_exit) = @_;
+my $redirect = '';
+$self->debug(3, "system($command)");
+# decide where the command line output should be redirected
+#This should account for redirects
+#This just sends everything to 1 no?
+if (defined $self->{_debug_level} && $self->{_debug_level} >= 3){
+if (defined $self->{_debug_file}){
+$redirect = " >>".$self->{_debug_file}." 2>&1";
+}
+else{
+$redirect = "";
+}
+}
+else{
+#$redirect = " > /dev/null 2>&1";
+}
+# execute the passed system command
+my $status = system("$command $redirect");
+my $exit_code = $status >> 8;
+if ($status == -1) {
+	warn "Failed to execute: $!\n";
+}
+elsif ($status & 127) {
+	warn sprintf("Child died with signal %d, %s coredump\nError:\t$!",($status & 127),($status & 128) ? 'with' : 'without');
+}
+elsif($status != 0) {
+	warn sprintf("Child exited with value %d\nError:\t$!\n", $exit_code); #get the true exit code
+}
+if ($exit_code != 0){
+if (! $no_exit){
+throw("System command failed:\t$command\nExit code:\t$exit_code\n$!");
+}
+else{
+warn("System command returned non-zero exit code:\t$command\nExit code:\t$exit_code\n$!");
+}
+}
+#reverse boolean logic for perl...can't do this anymore due to tab2mage successful non-zero exit codes :/
+return $exit_code;
+}
+#add sys_get method ehre to handle system calls which retrieve data?
+#i.e.backtick commands `find . -name *fasta`
+#or use want or flag with above method?
+#should open pipe instead to capture error?
+sub get_data{
+my ($self, $data_type, $data_name) = @_;
+#This method is just to provide standard checking for specific get_data/config methods
+if(defined $data_name){
+throw("Defs data name $data_name for type '$data_type' does not exist\n") if (! exists $self->{"${data_type}"}{$data_name});
+}else{
+throw("Defs data type $data_type does not exist\n") if (! exists $self->{"${data_type}"});
+}
+return (defined $data_name) ? $self->{"${data_type}"}{$data_name} : $self->{"${data_type}"};
+}
+#sub Timer{
+#	my ($self) = shift;
+#	$self->{'_timer'} = new Devel::Timer()  if(! defined $self->{'_timer'});
+#	return $self->{'_timer'};
+#}
+sub set_header_hash{
+my ($self, $header_ref, $fields) = @_;
+my %hpos;
+for my $x(0..$#{$header_ref}){
+$hpos{$header_ref->[$x]} = $x;
+}
+if($fields){
+foreach my $field(@$fields){
+if(! exists $hpos{$field}){
+	throw("Header does not contain mandatory field:\t${field}");
+}
+}
+}
+return \%hpos;
+}
+#Move this to EFGUtils?
+sub backup_file{
+my ($self, $file_path) = @_;
+throw("Must define a file path to backup") if(! $file_path);
+if (-f $file_path) {
+	 $self->log("Backing up:\t$file_path");
+system ("mv ${file_path} ${file_path}.".`date '+%T'`);
+}
+return;
+}
+#This should move to Utils
+#as it is a simple string manipulation
+sub get_schema_and_build{
+my ($self, $dbname) = @_;
+my @dbname = split/_/, $dbname;
+return [$dbname[($#dbname -1)], $dbname[($#dbname )]];
+}
+=head2 get_regbuild_set_states
+Arg [1]    : Bio::EnsEMBL::DBAdaptor
+Example    : my ($dset_states, $rset_states, $fset_states) = $helper->get_regbuild_set_states($db);
+Description: Returns Array refs of appropriate states for sets use din the regulatory build
+Returntype : Array
+Exceptions : Warns if cannot find chromosome CoordSystem
+Caller     : HealthChecker & regulatory build code
+Status     : At risk
+=cut
+sub get_regbuild_set_states{
+my ($self, $db) = @_;
+my $cs_a = $db->get_CoordSystemAdaptor;
+#These states need to be mirrored in RegulatorySets.java
+my $chrom_cs = $cs_a->fetch_by_name('chromosome');
+my (@dset_states, @rset_states, @fset_states);
+if(! defined $chrom_cs){
+	#This species most likely does not have a regbuild
+	#really just need to get the 'highest' level here
+	warn "Could not find Chromosome CoordSystem. ".$db->dbc->dbname.". most likely does not contain a RegulatoryBuild";
+}
+else{
+	my $imp_cs_status = 'IMPORTED_'.$cs_a->fetch_by_name('chromosome')->version;
+	#What about non-chromosome assemblies?
+	#top level will not return version...why not?
+	@dset_states = ('DISPLAYABLE');
+	@rset_states = (@dset_states, 'DAS_DISPLAYABLE', $imp_cs_status);
+	@fset_states = (@rset_states, 'MART_DISPLAYABLE');
+}
+return (\@dset_states, \@rset_states, \@fset_states);
+}
+=head2 define_and_validate_sets
+Arg [1]    : hash - set constructor parameters:
+-dbadaptor    Bio::EnsEMBL::Funcgen::DBAdaptor
+-name         Data/FeatureSet/ResultSet name to create
+-feature_type Bio::EnsEMBL::Funcgen::FeatureType
+-cell_type    Bio::EnsEMBL::Funcgen::CellType
+-analysis     FeatureSet Bio::EnsEMBL::Analysis
+-feature_class e.g. annotated or regulatory
+-description  FeatureSet description
+-recovery     Allows definition of extant sets so long as they match
+-append       Boolean - Forces import on top of previously imported data
+-rollback     Rolls back product feature set.
+-supporting_sets Complete set of pre-stored supporting or input sets for this DataSet
+-slices       ARRAYREF of Slices to rollback
+Example    : my $dset = $self->define_and_validate_Set(%params);
+Description: Checks whether set is already in DB based on set name, rolls back features
+if roll back flag set. Or creates new DataSet and Feature|ResultSet if not present.
+Returntype : Bio::EnsEMBL::Funcgen::DataSet
+Exceptions : Throws if DBAdaptor param not valid
+Caller     : Importers and Parsers
+Status     : At risk
+=cut
+sub define_and_validate_sets{
+my $self = shift;
+#change slice to slices to support multi slice import from InputSet::define_sets
+#Can't do full rollback in slice mode
+#This may not be safe in slice mode as we will then have mixed inputs/outputs
+my ($name, $anal, $ftype, $ctype, $type, $append, $db, $ssets, $description, $rollback, $recovery, $slices, $display_label) = rearrange(['NAME', 'ANALYSIS', 'FEATURE_TYPE', 'CELL_TYPE', 'FEATURE_CLASS', 'APPEND',
+				 'DBADAPTOR', 'SUPPORTING_SETS', 'DESCRIPTION', 'ROLLBACK', 'RECOVERY', 'SLICES', 'DISPLAY_LABEL'], @_);
+#VALIDATE CONFIG HASH
+#$config_hash ||= {};#default so exists will work without testing
+#if(keys %{$config_hash}){
+#	#There is a module to handle config hashes somewhere!
+#	throw('config_hash not yet implemented for define_and_validate_sets');
+#my @known_config = ('full_delete');#We never want full delete here as this is a create method!
+#Can we set vars from has by refs like getopts?
+#map {
+#  throw("Found unsupported config hash parameter:\t$_") if ! grep(/^${_}$/, @known_config);
+#} keys %{$config_hash};
+#  }
+#define rollback level
+#extract this to _set_rollback_level($rollback_mode, $feature_class)
+my $rollback_level = 0;
+#These should be globally defined so all rollback methods can use them
+my %valid_rollback_modes =
+	(
+	 product_features => 1,
+	 #Just product features and FeatureSet status, what about DataSet status?
+	 #full delete does nothing here?
+	 sets => 2,
+	 #Includes product_features and
+	 #deletes supporting_sets entries unless we specify append
+	 #revoke all states on Feature/Data/InputSets
+	 #Full delete removes Feature/Data/InputSet entries
+	 #Never includes ResultSets!
+	 supporting_features => 3,
+	 #Includes product_feature and sets
+	 #Removes all states and supporting features
+	 #inc. ResultSet results/ResultFeatures
+	 #Full_delete remove supporting set entries
+	 #Otherwise just rollback states for affected sets
+	);
+if($rollback){
+	if(! exists $valid_rollback_modes{$rollback}){
+	  #Default to some sensible values
+	  $rollback = 'product_features';#default for FeatureSets
+	  #Always want overwrite supporting sets if there is a difference
+	  $rollback = 'sets' if ($type eq 'regulatory');
+	  $rollback = 'supporting_sets' if ($type eq 'result');
+	  warn ("You have not set a valid rollback mode(product_features|sets|supporting_features), defaulting to $rollback for feature class $type\n");
+	}
+	$rollback_level = $valid_rollback_modes{$rollback};
+}
+if($slices && (ref($slices) ne 'ARRAY')){
+	throw('-slices param must be an ARRAYREF of Bio::EnsEMBL::Slice objects');
+	#Rest of validation done in other methods
+}
+#But how are we going to resolve the append behaviour when we also want to validate the ssets?
+#Can't, so append also functions to enable addition in the absence of some or all previous data/esets?
+#No this is not true, we want to be able to fetch an extant set for import,
+#we just need to be aware of sset IMPORTED status?
+#This should be a recovery thing, allow fetch, but validate sets?
+#Check mandatory params
+if(! (ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){
+	throw('Must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor');
+}
+throw('Must provide a -name ') if(! defined $name);
+#Not necessarily, just do rollback then append?
+#But then we'd potentially have a supporting set associated which has had it's data removed from the feature set.
+#Generating sets for an ExpSet will always have append set
+#This could be valid for generically grabing/creating sets for adding new supporting sets e.g. reg build
+throw('-append and -rollback are mutually exclusive') if $rollback_level && $append;
+#This will never happen due to previous test? append will always fail?
+#warn('You are defining a pre-existing FeatureSet without rolling back'.
+#	   ' previous data, this could result in data duplication') if $append && ! $rollback_level;
+#Is this really possible, surely the supporting set will fail to store due to unique key?
+#Should we warn here about append && recovery?
+#Aren't these mutually exclusive?
+#Do we know if we have new data? append should override recovery, or just specifiy append
+#This will stop the import and highlight the issue to the user
+#We need to be able to run with both otherwise the import will not work
+throw('Must provide a -feature_class e.g. annotated, external, result or regulatory') if(! defined $type);
+#Check for annotated, external, regulatory etc here?
+#Should never be external as we don't have DataSets for external sets?
+$db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureType',  $ftype);
+if (defined $ctype){
+	$db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::CellType', $ctype);
+}
+elsif($type ne 'regulatory'){
+	throw('Only Data/FeatureSets with type \'regulatory\' can have an undefined CellType');
+	#Coudl extend this to core set by name eq 'RegulatoryFeatures'?
+}
+$db->is_stored_and_valid('Bio::EnsEMBL::Analysis',  $anal);
+my $dset_adaptor = $db->get_DataSetAdaptor;
+my $fset_adaptor = $db->get_FeatureSetAdaptor;
+my $rset_adaptor = $db->get_ResultSetAdaptor;
+#DataSet centric definition to enable multiple DataSets
+#to be generated from the same supporting sets
+my $dset = $dset_adaptor->fetch_by_name($name);
+my ($fset, $rset, @input_sets);
+#Validate stored vs passed set data
+if(defined $dset){
+	$self->log('Found Stored DataSet '.$dset->name);
+	if($type ne 'result'){#i.e. annotated
+	  #Does this account for regulatory?
+	  $fset = $dset->product_FeatureSet;
+	  #Here we have the possiblity that a feature_set with a different name may have
+	  #been associated with the DataSet
+	  if(defined $fset){
+		$self->log("Found associated product FeatureSet:\t".$fset->name);
+		#if(! $clobber &&
+		if($fset->name ne $name){
+		  throw('Invalid product FeatureSet name ('.$fset->name.') for DataSet ('.$name.'). Rollback will overwrite the FeatureSet and mismatched name will be retained.');
+		  #Need to clobber both or give explicit name for datasets or rename dataset???
+		  #Force this throw for now, make this fix manual as we may end up automatically overwriting data
+		}
+	  }
+	  #This needs to be modified to support InputSets in ResultSets?
+	  #Would never have mixed Input/ResultSets so no need
+	  #Could potential need to do it for mixed Result/FeatureSets
+	  #if we ever use an analysis which uses both set types
+	  #check supporting_sets here if defined
+	  #We have the problem here of wanting to add ssets to a previously existing dset
+	  #we may not know the original sset, or which of the ssets are new
+	  #Hence there is a likelihood of a mismatch.
+	  #Much of this is replicated in store_udpated sets
+	  if(defined $ssets){
+		my @sorted_ssets = sort {$a->dbID <=> $b->dbID} @{$ssets};
+		my @stored_ssets = sort {$a->dbID <=> $b->dbID} @{$dset->get_supporting_sets};
+		my $mismatch = 0;
+		$mismatch = 1 if(scalar(@sorted_ssets) != scalar(@stored_ssets));
+		if(! $mismatch){
+		  for my $i(0..$#stored_ssets){
+			if($stored_ssets[$i]->dbID != $sorted_ssets[$i]->dbID){
+			  $mismatch=1;
+			  last;
+			}
+		  }
+		}
+		if($mismatch){
+		  #We're really print this names here which may hide the true cell/feature/anal type differences.
+		  my $mismatch = 'There is a (name/type/analysis) mismatch between the supplied supporting_sets and the'.
+			' supporting_sets in the DB for DataSet '.$dset->name."\n\nStored:\n"
+			  .join(', ', (map { $_->name } @stored_ssets))."\n\nSupplied supporting_sets:\n"
+				.join(', ', (map { $_->name } @sorted_ssets));
+		  if($append){
+			warn($mismatch."\n\nAppending supporting set data to unvalidated supporting sets");
+		  }
+		  elsif($rollback_level > 1){#supporting set rollback
+			warn($mismatch."\n\nReplacing previously stored supporting sets with newly defined sets\n");
+			if($slices){
+			  warn("WARNING:\tPerforming supporting_set rollback in slice mode. This may corrupt the supporting_set definition for other slices in this DataSet if they are not re-generated using the same supporting_sets\n");
+			}
+			#Remove supporting_set entries
+			#This should be in a rollback_DataSet method
+			#This has moved to DataSetAdaptor::store_update_sets
+			#Reset supporting sets
+			$dset->{'supporting_sets'} = undef;
+			$dset->add_supporting_sets(\@sorted_ssets);
+			#Move this to last block?
+			#This will currently fail as it test for product_FeatureSet
+			#How do we get around this? Remove IMPORTED status and only throw if fset has IMPORTED status?
+			#warn "pre store sset ".@{$dset->get_supporting_sets};
+			#($dset) = @{$dset_adaptor->store_updated_sets([$dset], $rollback_level)};
+			#$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory';
+		  }
+		  else{
+			throw($mismatch);
+		  }
+		}
+	  }
+	  else{
+		warn("No supporting sets defined, skipping supporting set validation for definition of DataSet:\t".$name);
+	  }
+	}
+	else{#result_features from InputSet
+	  #Do we ever pass supporting sets here?
+	  #Do we need to test vs stored_sets?
+	  #There is the potential for more than one ResultSet to be associated with DataSet
+	  #But as we are using the same name, this restricts the number wrt the cardinality
+	  #of the name field. i.e. 1 name per analysis/cell_type/feature_type.
+	  #This now works slightly differently to the rest of this method as we
+	  #need to treat the ResultSet as we are currently treating the FeatureSet below.
+	  #However, the use case of this method is for one InputSet giving rise to one ResultSet
+	  #Hence just throw if we find more than one or have a name mismatch???
+	  my @stored_sets = @{$dset->get_supporting_sets};
+	  #THis assumes we will always have supporting sets
+	  #and is failing as we have removed this test in DataSet::new
+	  #But where are we storing it without the supporting set?
+	  if(scalar(@stored_sets) > 1){
+		throw('define_and_validate_sets does not yet support DataSets with multiple supporting ResultSets for result_features');
+	  }
+	  elsif(! @stored_sets){
+		throw("DataSet($name) does not have any stored supporting sets. These should have been defined when storing the DataSet");
+		#Or should we handle this?
+	  }
+	  $rset = $stored_sets[0];
+	  if($rset->set_type ne 'result'){
+		throw("DataSet already contains a supporting set which is not a ResultSet:\t".$rset->set_type."\t".$stored_sets[0]->name);
+	  }
+	  elsif($ssets){
+		#Do we ever pass supporting sets, test for completeness
+		#Just test we have the same supplied ssets if it is defined
+		if(scalar(@$ssets) != 1){
+		  throw("ResultFeature data sets currently only support one supporting ResultSet.\nSupproting sets:\t".
+				join(', ', (map { $_->name.'('.$_->set_type } @$ssets)));
+		}
+		elsif(! ($rset->dbID == $ssets->[0]->dbID) &&
+			  ($ssets->[0]->set_type eq 'result')){
+		  throw('Supplied supporting set('.$ssets->[0]->name.') does not match stored supporting set('.$rset->name.')');
+		}
+	  }
+	  @input_sets = @{$rset->get_InputSets};
+	}
+}
+if($type eq 'result'){
+	#Validate the defined InputSets
+	if (scalar(@$ssets) > 1) {
+	  throw("define_and_validate_sets does not yet support multiple InputSets for defining a ResultSet:\t".$name);
+	}
+	if ($ssets->[0]->set_type ne 'input') {
+	  throw("To define a ResultSet($name) containing result_features, you must provide and InputSet as a supporting set\nArray based ResultSets(i.e. experimental_chip/channel) are not defined using this method, see specific Import Parsers.");
+	}
+	#Try and grab the rset just in case it has been orphaned somehow
+	if (! defined $rset) {
+	  $rset = $rset_adaptor->fetch_all_by_name($name, $ftype, $ctype, $anal)->[0];
+	  #Should only ever be one given all parts of unique key
+	  @input_sets = @{$rset->get_InputSets} if $rset;
+	}
+	if (defined $rset) {		#Validate stored InputSets
+	  if (scalar(@input_sets) != scalar(@$ssets)) {
+		throw('Found mismatch between number of previously stored InputSets('.scalar(@input_sets).') and defined InputSets('.scalar(@$ssets).'). You must provide a complete list of InputSets to define your ResultSet.');
+	  }
+	  if ($input_sets[0]->dbID != $ssets->[0]->dbID) {
+		throw('Found dbID mismatch between previously stored InputSet('.$input_sets[0]->name.') and define InputSet('.$ssets->[0]->name.')');
+	  }
+	  #rollback ResultSet/InputSet here?
+	  if($rollback_level > 2){
+		warn "rollback not yet fully implemented for Result/InputSets";
+		#Does this need to be by slice?
+		#What about states if we are running in parallel?
+		if($slices){
+		  map {$self->rollback_ResultSet($rset, $rollback, $_)} @$slices;
+		}
+		else{
+		  $self->rollback_ResultSet($rset, $rollback);
+		}
+	  }
+	}
+	else{#define ResultSet
+	  ($rset) = @{$rset_adaptor->store(Bio::EnsEMBL::Funcgen::ResultSet->new
+									   (
+										-name => $name,
+										-feature_type => $ftype,
+										-cell_type => $ctype,
+										-table_name => 'input_set',
+										-table_id   => $ssets->[0]->dbID,
+										-analysis   => $anal
+									   )
+									  )};
+	}
+}
+else{#annotated/regulatory/external i.e. FeatureSet
+	#Try and grab the fset just in case it has been orphaned somehow
+	if(! defined $fset){
+	  $fset = $fset_adaptor->fetch_by_name($name);
+	  if(defined $fset){
+		#Now we need to test whether it is attached to a dset
+		#Will be incorrect dset if it is as we couldn't get it before
+		#else we test the types and rollback
+		$self->log("Found stored orphan FeatureSet:\t".$fset->name);
+		my $stored_dset = $dset_adaptor->fetch_by_product_FeatureSet($fset);
+		if(defined $stored_dset){
+		  throw('Found FeatureSet('.$name.') associated with incorrect DataSet('.$stored_dset->name.
+				").\nTry using another -name in the set parameters hash");
+		}
+	  }
+	}
+	#Rollback or create FeatureSet
+	if(defined $fset){
+	  if($rollback_level){
+		#Don't check for IMPORTED here as we want to rollback anyway
+		#Not forcing delete here as this may be used as a supporting set itself.
+		$self->rollback_FeatureSet($fset, undef, $slices);
+	  }
+	  elsif ($append || $recovery) {
+		#This is only true if we have an sset mismatch
+		#Do we need to revoke IMPORTED here too?
+		#This behaves differently dependant on the supporting set.
+		#InputSet status refers to loading in FeatureSet, where as ResultSet status refers to loading into result table
+		#So we really want to revoke it
+		#But this leaves us vulnerable to losing data if the import crashes after this point
+		#because we have no way of assesing which is complete data and which is incomplete data
+		#within a feature set.
+		#This means we need a status on supporting_set, not InputSet or ResultSet
+		#as this has to be in the context of a dataset.
+		#Grrr, this means we need a SupportingSet class which simply wraps the InputSet/ResultSet
+		#We also need a single dbID for the supporting_set table
+		#Which means we will have to do some wierdity with the normal dbID implementation
+		#i.e. Have supporting_set_id, so we can still access all the normal dbID method for the given Set class
+		#This will have to be hardcoded into the state methods
+		#Also will need to specify when we want to store as supporting_status or normal set status.
+		#This is an awful lot to protect against vulnerability
+		#Also as there easy way to track what features came from which supporting set
+		#There isn't currently a viable way to rollback, hence will have to redo the whole set.
+		#Maybe we can enforce this by procedure?
+		#By simply not associating the supporting set until it has been loaded into the feature set?
+		#This may cause even more tracking problems
+		#Right then, simply warn and do not revoke feature_set IMPORTED to protect old data?
+		#Parsers should identify supporting_sets(InputSets) which exist but do not have IMPORTED
+		#status and fail, specifying -recover which will rollback_FeatureSet which will revoke the IMPORTED status
+		#This can mean a failed import can leave a partially imported feature set with the IMPORTED status!!!
+		#We just need to handle InputSets and ResultSets differently.
+		#In parsers or here?
+		#Probably best in the parsers as this is where the states are set.
+		#Should we throw here for ResultSet?
+		#Force rollback of FeatureSet first or create new one?
+		#And throw for InputSet?
+		#This again comes back to whether we will ever have more than one file
+		#for a give InputSet, currently not.
+		$self->log("WARNING\t::\tAdding data to a extant FeatureSet:\t".$fset->name);
+	  } else {
+		throw('Found extant FeatureSet '.$fset->name.'. Maybe you want to specify the rollback, append or recovery parameter or roll back the FeatureSet separately?');
+	  }
+	} else {
+	  #create a new one
+	  $self->log("Creating new FeatureSet:\t".$name);
+	  $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new(
+													 -name          => $name,
+													 -feature_type  => $ftype,
+													 -cell_type     => $ctype,
+													 -analysis      => $anal,
+													 -feature_class => $type,
+													 -description   => $description,
+													 -display_label => $display_label,
+													);
+	  ($fset) = @{$fset_adaptor->store($fset)};
+	}
+}
+#Create/Update the DataSet
+if(defined $dset){
+	#Could do these updates above?
+	#But delayed to reduce redundancy
+	if($type ne 'result'){
+	  if(! defined $dset->product_FeatureSet){
+		$self->log("Updating DataSet with new product FeatureSet:\t".$fset->name);
+		$dset->product_FeatureSet($fset);
+	  }
+	  $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0];
+	  #This cannot store the focus sets as we don't know which are which yet
+	  #Only the script knows this
+	  # $dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory';
+	}
+	else{
+	  #We may have the case where we have a DataSet(with a FeatureSet) but no ResultSet
+	  #i.e. Load result_features after peak calls
+	  #So update dset with ResultSet
+	  if(! @{$dset->get_supporting_sets}){
+		$self->log("Updating DataSet with new ResultSet:\t".$rset->name);
+		$dset->add_supporting_sets([$rset]);
+		$dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0];
+	  }
+	}
+}
+else{
+	$self->log("Creating new ${type}_feature DataSet:\t".$name);
+	if($type ne 'result'){
+	  ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new
+									   (
+										-name => $name,
+										-feature_set => $fset,
+										-supporting_sets => $ssets,
+									   ))};
+	  #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory';
+	}
+	else{
+	  warn "creating dataset $name with supporting set $rset";
+	  ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new
+									   (
+										-name => $name,
+										-supporting_sets => [$rset],
+									   ))};
+	}
+}
+return $dset;
+}
+#Rollback/load methods migrated from DBAdaptor
+#Move to SetAdaptors, better located and will remove cyclical dependancy
+=head2 rollback_FeatureSet
+Arg [0]    : Bio::EnsEMBL::Funcgen::FeatureSet
+Arg [1]    : optional - boolean force delete flag, if this FeatureSet is use as a support
+for another DataSet.
+Arg [2]    : optional - arrayref of Bio::EnsEMBL::Slice objects to rollback
+Arg [3]    : optional - boolean flag to perform full rollback i.e. default will just remove feature
+specifying this with also delete the feature_set record
+Example    : $self->rollback_FeatureSet($fset);
+Description: Deletes all status and feature entries for this FeatureSet.
+Checks whether FeatureSet is a supporting set in any other DataSet.
+Returntype : none
+Exceptions : Throws if any deletes fails or if db method unavailable
+Caller     : Importers and Parsers
+Status     : At risk
+=cut
+sub rollback_FeatureSet{
+my ($self, $fset, $force_delete, $slices, $full_delete) = @_;
+#Remove force delete and just throw?
+#Currently only used in project_feature_set.
+#May want to keep an old RegBuild for mapping/comparison?
+#Coudl get around this by simply deleting the data_set? Unknown impact.
+#Move to config hash?
+#No need for rollback_level here as we always want to do the same thing
+my ($sql, $slice_name);
+my $slice_join = '';
+my $table = $fset->feature_class.'_feature';
+my $adaptor = $fset->adaptor || throw('FeatureSet must have an adaptor');
+my $db = $adaptor->db;
+#Cyclical dpendancy here, so not strictly necessary.
+$db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset);
+$self->log_header('Rolling back '.$fset->feature_class." FeatureSet:\t".$fset->name);
+if($slices){
+	if($full_delete){
+	  throw("Cannot specify a full_delete for a Slice based rollback:\t".$fset->name);
+	}
+	if(! ref($slices) eq 'ARRAY'){
+	  throw('Slices must be an ARRAYREF of Slice objects');
+	}
+	map { throw("Must pass a valid Bio::EnsEMBL::Slice") if (! (ref($_) && $_->isa('Bio::EnsEMBL::Slice'))) } @$slices;
+	$self->log("Restricting to slices:\n\t\t".join("\n\t\t", (map { $_->name } @$slices)) );
+	#Allow subslice rollback only for one slice at a time
+	my $subslice = (scalar(@$slices) == 1) ? 1 : 0;
+	my @sr_ids;
+	foreach my $slice(@$slices){
+	  my $efg_sr_id = $fset->get_FeatureAdaptor->get_seq_region_id_by_Slice($slice);
+	  if(! $efg_sr_id){
+		$self->log("Slice is not present in eFG DB:\t".$slice->name);
+	  }else{
+		if(! $subslice){#Test is not subslice
+		  my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name);
+		  if(($slice->start != 1) ||
+			 ($full_slice->end != $slice->end)){
+			throw("Can only rollback subslices one at a time:\nRollback slice:\t"
+				  .$slice->name."\nFull slice:\t".$full_slice->name);
+		  }
+		}
+		push @sr_ids, $efg_sr_id;
+	  }
+	}
+	if(scalar(@sr_ids) == 1){
+	  #Allow sub slice rollback
+	  #add range here from meta coord?
+	  $slice_join = " and f.seq_region_id=$sr_ids[0] and f.seq_region_start<=".$slices->[0]->end.' and f.seq_region_end>='.$slices->[0]->start;
+	}
+	else{
+	  $slice_join = ' and f.seq_region_id in ('.join(', ', @sr_ids).')';
+	}
+}
+#Check whether this is a supporting set for another data_set
+my @dsets = @{$db->get_DataSetAdaptor->fetch_all_by_supporting_set($fset)};
+if(@dsets){
+	my $txt = $fset->name." is a supporting set of the following DataSets:\t".join(', ', (map {$_->name} @dsets));
+	if($force_delete){
+	  $self->log("WARNING:\t$txt\n");
+	}
+	else{
+	  throw($txt."\nPlease resolve or specify the force_delete argument")
+	}
+}
+#Remove states
+if(! $slices){
+	$fset->adaptor->revoke_states($fset);
+	#Revoke InputSet states here as this refers to whether
+	#they are imported in the FeatureSet
+	#Do this in FeatureSet->revoke_states?
+	my $dset = $db->get_DataSetAdaptor->fetch_by_product_FeatureSet($fset);
+	#Account for absent dset if we have an external_feature set
+	if((! defined $dset) &&
+	   $fset->feature_class ne 'external'){
+	  warn "WARNING:\tFeatureSet ".$fset->name." does not have an associated DataSet. Rollback may be incomplete";
+	}
+	if($dset){
+	  foreach my $sset(@{$dset->get_supporting_sets}){
+		#Maybe skip this if we defined slice?
+		#??? Do we want to do this?
+		#This is dependant on the feature_class of the InputSet
+		#result InputSets may have been imported as ResultFeatureCollections
+		#So we want to leave those in place
+		#annotated feature_class InputSets are directly imports, so the status of these refers
+		#to the FeatureSet import status
+		#Where is the imported status set for SWEmbl?
+		if(($sset->feature_class eq 'annotated') &&
+		   $sset->isa('Bio::EnsEMBL::Funcgen::InputSet')){
+		  $self->rollback_InputSet($sset) if $sset->isa('Bio::EnsEMBL::Funcgen::InputSet');
+		  $self->rollback_InputSet($sset);#add full delete here?
+		  #Do not want to rollback here for other type of sset
+		}
+	  }
+	}
+}
+else{
+	$self->log('Skipping '.$fset->name.' revoke_states for partial Slice rollback, maybe revoke IMPORTED? ');
+}
+#should add some log statements here?
+my $row_cnt;
+#Rollback reg attributes
+if($fset->feature_class eq 'regulatory'){
+	$sql = "DELETE ra from regulatory_attribute ra, $table f where f.${table}_id=ra.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join;
+	$self->rollback_table($sql, 'regulatory_attribute', undef, $db);
+	if($full_delete){
+	  #Now delete meta entries
+	  #This is messy as we use the following meta_key nomencalture
+	  #which do not match the fset names
+	  #regbuild.feature_set_ids_v5
+	  #regbuild.feature_type_ids_v5
+	  #regbuild.focus_feature_set_ids
+	  #regbuild.initial_release_date_v6
+	  #regbuild.last_annotation_update_v6
+	  #regbuild.version NEED TO ADD THIS
+	  #Also need to revise how these are generated by build_reg_feats.
+	  #WHat about new cell_type level feature sets?
+	  #How will we model these in the meta table?
+	  warn "Need to revise meta table entries before we add a delete here, remove manually for now for:\t".$fset->name;
+	  #We would only remove meta entries if we are performing a full rollback
+	  my $version;
+	  ($version = $fset->name) =~ s/.*_v([0-9]+)$/$1/;
+	  $version = ($version eq  $fset->name) ? '' : "_v${version}";
+	  #These are versionless meta_keys and apply to all sets
+	  #handle these in reg build script
+	  #'regbuild.initial_release_date',
+	  #'regbuild.last_annotation_update'
+	  #'regbuild.version'
+	  foreach my  $mkey('regbuild.%s.feature_set_ids',
+						'regbuild.%s.feature_type_ids',
+						'regbuild.%s.focus_feature_set_ids'){
+		my $meta_key = sprintf($mkey, $fset->cell_type->name).$version;
+		$sql = "DELETE from meta where meta_key='${meta_key}'";
+		$self->rollback_table($sql, 'meta', undef, $db);
+	  }
+	}
+}
+#Need to remove object xrefs here
+#Do not remove xrefs as these may be used by something else!
+$sql = "DELETE ox from object_xref ox, $table f where ox.ensembl_object_type='".ucfirst($fset->feature_class)."Feature' and ox.ensembl_id=f.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join;
+$self->rollback_table($sql, 'object_xref', 'object_xref_id', $db);
+#Remove associated_feature_type records
+#Do not remove actual feature_type records as they may be used by something else.
+$sql ="DELETE aft from associated_feature_type aft, $table f where f.feature_set_id=".$fset->dbID." and f.${table}_id=aft.table_id and aft.table_name='".$fset->feature_class."_feature'".$slice_join;
+$self->rollback_table($sql, 'associated_feature_type', undef, $db);
+#Remove features
+$sql = "DELETE f from $table f where f.feature_set_id=".$fset->dbID.$slice_join;
+$self->rollback_table($sql, $table, "${table}_id", $db);
+if($full_delete){ #Also delete feature/data_set records
+	$sql = "DELETE from feature_set where feature_set_id=".$fset->dbID;
+	$self->rollback_table($sql, 'feature_set', 'feature_set_id', $db);
+	$self->log("Deleted feature_set entry for:\t".$fset->name);
+	$sql = "DELETE from data_set where feature_set_id=".$fset->dbID;
+	$self->rollback_table($sql, 'data_set', 'data_set_id', $db);
+	$self->log("Deleted associated data_set entry for:\t".$fset->name);
+}
+return;
+}
+=head2 rollback_ResultSet
+Arg[1]     : Bio::EnsEMBL::Funcgen::ResultSet
+Arg[2]     : Boolean - optional flag to roll back array results
+Example    : $self->rollback_ResultSet($rset);
+Description: Deletes all status. chip_channel and result_set entries for this ResultSet.
+Will also rollback_results sets if rollback_results specified.  This will also
+update or delete associated ResultSets where appropriate.
+Returntype : Arrayref containing the ResultSet and associated DataSet which have not been rolled back
+Exceptions : Throws if ResultSet not valid
+Throws is result_rollback flag specified but associated product FeatureSet found.
+Caller     : General
+Status     : At risk
+=cut
+#Need to change slice to slices ref here
+#Need to add full rollback, which will specify to remove all sets
+#as well as results and
+#These params need clarifying as their nature changes between input_set and array rsets
+#Don't we always want to rollback_results?
+#force should only really be used to rollback InputSet ResultFeature sets
+#i.e. Read collections which are not used as direct input for the linked product FeatureSet
+#This should fail with array data associated with a product feature set
+#Do we want to separate ResultFeature rollback from result rollback?
+#Currently the array based collection rollback is done by hand
+#Could be done via the ResultFeature Collector, but should probably use this method.
+#rollback_results is only used in the MAGE parser to identify sets which have an
+#associated product fset.
+#Can't really separate due to integrated functionality
+sub rollback_ResultSet{
+my ($self, $rset, $rollback_results, $slice, $force, $full_delete) = @_;
+if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){
+	throw('Must provide a valid stored Bio::EnsEMBL::ResultSet');
+}
+if($slice && $rset->table_name ne 'input_set'){
+	throw('Can only rollback_ResultSet by Slice if the ResultSet contains InputSets');
+}
+#We're still validating against itself??
+#And reciprocating part of the test :|
+my $sql;
+my $db = $rset->adaptor->db;#This needs to be tested
+$db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset);
+$self->log("Rolling back ResultSet:\t".$rset->name);
+my $dset_adaptor = $self->db->get_DataSetAdaptor;
+my $rset_adaptor = $self->db->get_ResultSetAdaptor;
+my @skipped_sets;
+### Check if this ResultSet is part of a DataSet with a product feature set
+foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($rset)}){
+	if (defined $dset){
+	  $self->log('Found linked DataSet('.$dset->name.") for ResultSet:\t".$rset->log_label);
+	  if(my $fset = $dset->product_FeatureSet){
+		@skipped_sets = ($rset,$dset);
+		#What impact does this have on result_rollback?
+		#None as we never get there
+		#But what if we have specified rollback results?
+		#We should throw here as we can't perform the rollback
+		if($rollback_results){
+		  if($rset->table_name ne 'input_set' ||
+			(! $force)){#is an input_set/reads collection
+			#This will always throws for non-input_set ResultSets
+			throw("Could not rollback supporting ResultSet and results for:\t".$rset->log_label.
+				  "\nEither manually resolve the supporting/feature set relationship or set the 'force' flag.\n");
+			#  ."Alternatively omit the rollback_results argument if you simply want to redefine the ResultSet without loading any new data");
+			#This last bit is no longer true
+			#Remove rollback_results?
+		  }
+		  else{
+			@skipped_sets = ();
+			$self->log("Forcing results rollback for InputSet based ResultSet:\t".$rset->log_label);
+		  }
+		}
+		if(@skipped_sets){
+		  $self->log('Skipping rollback. Found product FeatureSet('.$fset->name.") for supporting ResultSet:\t".$rset->log_label);
+		}
+	  }
+	  elsif((! defined $slice) &&
+			$full_delete){
+		#Found rset in dset, but not yet processed so can remove safely.
+		$self->unlink_ResultSet_DataSet($rset, $dset);
+	  }
+	}
+}
+#Now do similar for all associated ResultSets
+if(! @skipped_sets){
+	#Rollback results if required
+	if($rollback_results){
+	  $self->log("Rolling back results for ResultSet:\t".$rset->log_label);
+	  #Check result_set_input_ids are present in other result sets.
+	  my @assoc_rsets = @{$rset_adaptor->fetch_all_linked_by_ResultSet($rset)};
+	  my $feature_supporting = 0;
+	  foreach my $assoc_rset(@assoc_rsets){
+		foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($assoc_rset)}){
+		  #Check for other product_FeatureSets
+		  if(my $fset = $dset->product_FeatureSet){
+			$feature_supporting++;
+			$self->log('Found product FeatureSet('.$fset->name.
+					   ") for associated supporting ResultSet:\t".$rset->log_label);
+			if($rset->table_name ne 'input_set' ||
+			   (! $force)){#is an input_set/reads collection
+			  $feature_supporting++;
+			}
+		  }
+		}
+	  }
+	  if(! $feature_supporting){
+		#RollBack result_feature table first
+		$self->rollback_ResultFeatures($rset, $slice);
+		#Now rollback other states
+		$rset->adaptor->revoke_states($rset);
+		#This also handles Echip status rollback
+		if ($rset->table_name ne 'input_set'){
+		  $self->log("Rolling back result table for ResultSet:\t".$rset->log_label);
+		  $self->rollback_results($rset->result_set_input_ids);
+		}
+		$self->log('Removing result_set_input entries from associated ResultSets') if @assoc_rsets;
+		if((! $slice) &&
+		   $full_delete){
+		  #Now remove result_set_input_ids from associated rsets.
+		  foreach my $assoc_rset(@assoc_rsets){
+			$sql = 'DELETE from result_set_input where result_set_id='.$assoc_rset->dbID.
+			  ' and result_set_input_id in('.join', ', @{$assoc_rset->result_set_input_ids}.')';
+			$db->dbc->do($sql);
+			# we need to delete complete subsets from the result_set table.
+			my $subset = 1;
+			foreach my $cc_id(@{$assoc_rset->result_set_input_ids}){
+			  if(! grep { /$cc_id/ } @{$rset->result_set_input_ids}){
+				$subset = 0;
+				last;
+			  }
+			}
+			#$assoc_rset is complete subset of $rset so can delete
+			#We know this does not have an assoicated product feature set
+			#Only if it is not derived from an input_set
+			if($subset){
+			  $self->log("Deleting associated subset ResultSet:\t".$assoc_rset->log_label);
+			  #Delete status entries first
+			  $assoc_rset->adaptor->revoke_states($assoc_rset);
+			  #All cc records will have already been deleted
+			  $sql = 'DELETE from result_set where result_set_id='.$assoc_rset->dbID;
+			  $db->dbc->do($sql);
+			}
+		  }
+		}
+		#Now warn about Echips in Experiments which may need removing.
+		if($rset->table_name ne 'input_set'){
+		  my %experiment_chips;
+		  foreach my $echip(@{$rset->get_ExperimentalChips}){
+			$experiment_chips{$echip->experiment->name}{$echip->unique_id} = undef;
+		  }
+		  foreach my $exp(keys %experiment_chips){
+			$self->log("Experiment $exp has had ".scalar(values %{$experiment_chips{$exp}}).
+					   " ExperimentalChips rolled back:\t".join('; ', values %{$experiment_chips{$exp}}).
+					   ".\nTo fully remove these, use the rollback_experiment.pl (with -chip_ids) script");
+		  }
+		}
+		else{
+		  #Should only be one to rollback
+		  foreach my $iset(@{$rset->get_InputSets}){
+			$self->rollback_InputSet($iset);
+		  }
+		}
+	  }
+	  else{
+		#$self->log("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label);
+		#warn("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label);
+		#do we need to return this info in skipped_rsets?
+		#This is just to allow importer to know which ones
+		#weren't rolled back to avoid naming clashes.
+		#so no.
+		#But the results persist on the same chip_channel_ids
+		#So not returning this rset may result in loading of more data
+		#This should fail as status entries will not have been removed
+		#Still we should throw here as we'll most likely want to manually resolve this
+		#Besides this would be obfuscating the function
+		throw("Could not rollback ResultSet and results, found $feature_supporting associated supporting ".
+			  "ResultSets for:\t".$rset->log_label."\nManually resolve the supporting/feature set relationship or omit the ".
+			 "rollback_results argument if you simply want to redefine the ResultSet without loading any new data");
+	  }
+	}
+	else{
+	  $self->log('Skipping results rollback');
+	  if($rset->name =~ /_IMPORT$/){
+		throw("Rolling back an IMPORT set without rolling back the result can result in ophaning result records for a whole experiment.  Specify the result_rollback flag if you want to rollback the results for:\t".$rset->log_label);
+	  }
+	}
+	#Delete chip_channel and result_set records
+	#This should only be done with full delete
+	if((! $slice) &&
+	  $full_delete){
+	  $sql = 'DELETE from result_set_input where result_set_id='.$rset->dbID;
+	  $self->rollback_table($sql, 'result_set_input', 'result_set_input_id', $db);
+	  $sql = 'DELETE from result_set where result_set_id='.$rset->dbID;
+	  $db->dbc->do($sql);
+	  $self->rollback_table($sql, 'result_set', 'result_set_id', $db);
+	}
+}
+return \@skipped_sets;
+}
+sub unlink_ResultSet_DataSet{
+my ($self, $rset, $dset, $new_name) = @_;
+#validate set vars
+my $db = $rset->adaptor->db;
+$self->log("Removing supporting ResultSet from DataSet:\t".$dset->name."\tResultSet:".$rset->log_label);
+my $sql = 'DELETE from supporting_set where data_set_id='.$dset->dbID.
+	' and type="result" and supporting_set_id='.$rset->dbID;
+warn "Removing ".$rset->log_label." as a supporting set to DataSet:\t".$dset->name.
+	"\nThis may result in a DataSet with no supporting sets";
+$db->dbc->do($sql);
+if($new_name){
+	#We risk overwriting any previously renamed result sets.
+	#Should use datestamp?
+	$sql = 'UPDATE result_set set name="OLD_'.$rset->name.'" where result_set_id='.$rset->dbID;
+	$self->db->dbc->do($sql);
+	if($dset->product_FeatureSet){
+	  $self->log('Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data');
+	  warn 'Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data';
+	}
+}
+return;
+}
+=head2 rollback_InputSet
+Arg[1]     : Bio::EnsEMBL::Funcgen::InputSet
+Example    : $self->rollback_InputSet($eset);
+Description: Deletes all status entries for this InputSet and it's Subsets
+Returntype : none
+Exceptions : Throws if any deletes fails or if db method unavailable
+Caller     : Importers and Parsers
+Status     : At risk
+=cut
+sub rollback_InputSet{
+my ($self, $eset, $force_delete, $full_delete) = @_;
+#Need to implement force_delete!!!!!!!!!!!!!!!!!!!!!!
+#Need to check this is not used in a DataSet/ResultSet
+my $adaptor = $eset->adaptor || throw('InputSet must have an adaptor');
+my $db = $adaptor->db;
+$db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::InputSet', $eset);
+$self->log("Rolling back InputSet:\t".$eset->name);
+#SubSets
+foreach my $esset(@{$eset->get_InputSubsets}){
+	$esset->adaptor->revoke_states($esset);
+}
+#InputSet
+$eset->adaptor->revoke_states($eset);
+return;
+}
+=head2 rollback_results
+Arg[1]     : Arrayref of chip_channel ids
+Example    : $self->rollback_results($rset->chip_channels_ids);
+Description: Deletes all result records for the given chip_channel ids.
+Also deletes all status records for associated experimental_chips or channels
+Returntype : None
+Exceptions : Throws if no chip_channel ids provided
+Caller     : General
+Status     : At risk
+=cut
+#changed implementation to take arrayref
+sub rollback_results{
+my ($self, $cc_ids) = @_;
+my @cc_ids = @{$cc_ids};
+#Need to test for $self->db here?
+if(! scalar(@cc_ids) >0){
+	throw('Must pass an array ref of result_set_input_ids to rollback');
+}
+#Rollback status entries
+#Cannot use revoke_states here?
+#We can if we retrieve the Chip or Channel first
+#Add to ResultSet adaptor
+my $sql = 'DELETE s from status s, result_set_input rsi WHERE rsi.result_set_input_id IN ('.join(',', @cc_ids).
+	') AND rsi.table_id=s.table_id AND rsi.table_name=s.table_name';
+if(! $self->db->dbc->do($sql)){
+	throw("Status rollback failed for result_set_input_ids:\t@cc_ids\n".$self->db->dbc->db_handle->errstr());
+}
+#Rollback result entries
+$sql = 'DELETE from result where result_set_input_id in ('.join(',', @cc_ids).');';
+$self->rollback_table($sql, 'result', 'result_id', $self->db);
+return;
+}
+=head2 rollback_ResultFeatures
+Arg[0]     : Bio::EnsEMBL::Funcgen::ResultSet
+Arg[1]     : Optional - Bio::EnsEMBL::Slice
+Arg[2]     : Optional - no_revoke Boolean. This is only used when generating new windows
+from a 0 window size which has been projected from a previous assembly.
+Example    : $self->rollback_result_features($rset);
+Description: Deletes all result_feature records for the given ResultSet.
+Also deletes 'RESULT_FEATURE_SET' status.
+Returntype : None
+Exceptions : Throws if ResultSet not provided
+Caller     : General
+Status     : At risk
+=cut
+sub rollback_ResultFeatures{
+my ($self, $rset, $slice, $no_revoke) = @_;
+if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){
+	throw('Must provide a valid stored Bio::EnsEMBL::ResultSet');
+}
+if(! $slice && $no_revoke){
+	throw("Cannot rollback_ResultFeatures with no_reovke unless you specify a Slice");
+}
+#else warn if slice and no_revoke?
+my ($sql, $slice_name, $slice_constraint);
+if($slice){
+	if(ref($slice) && $slice->isa('Bio::EnsEMBL::Slice')){
+	  my $sr_id = $rset->adaptor->db->get_ResultFeatureAdaptor->get_seq_region_id_by_Slice($slice);
+	  if($sr_id){
+		#Need to test for full slice here
+		my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name);
+		$slice_name = "\t".$slice->name;
+		$slice_constraint = ' and seq_region_id='.$sr_id;
+		if(($slice->start != 1) ||
+		   ($slice->end != $full_slice->end)){
+throw("rollback_ResultFeatures does not yet support non-full length Slices:\t".$slice_name);
+		  #Need to test whether we have non-0 wsize collections without the exact seq_region values
+		  #$sql='SELECT window_size from result_feature where result_feature_id='.$rset->dbID.
+#	' and window_size!=0 and seq_region_start!='.$slice->start.' and seq_region_end!='.$slice->end.$slice_constraint;
+		}
+	  }
+	  else{#seq_region is not yet present in DB
+		return;
+	  }
+	}
+	else{
+	  throw('slice argument must be a valid Bio::EnsEMBL::Slice');
+	}
+}
+#We're still validating against itself??
+#And reciprocating part of the test :|
+my $db = $rset->adaptor->db;
+$db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset);
+#Do this conditionally on whether it is a result_feature_set?
+#This may break if we have removed the status but not finished the rollback so no!
+$self->log("Rolling back result_feature table for ResultSet:\t".$rset->name.$slice_name);
+#Rollback status entry
+if($rset->has_status('RESULT_FEATURE_SET') && ! $no_revoke){
+	$rset->adaptor->revoke_status('RESULT_FEATURE_SET', $rset);
+}
+#Cannot use revoke_states here?
+#We can if we retrieve the Chip or Channel first
+#Add to ResultSet adaptor
+$sql = 'DELETE from result_feature where result_set_id='.$rset->dbID.$slice_constraint;
+$self->rollback_table($sql, 'result_feature', 'result_feature_id', $db);
+return;
+}
+=head2 rollback_ArrayChips
+Arg[1]     : ARRAYREF: Bio::EnsEMBL::Funcgen::ArrayChip objects
+Example    : $self->rollback_ArrayChips([$achip1, $achip2]);
+Description: Deletes all Probes, ProbeSets, ProbeFeatures and
+states associated with this ArrayChip
+Returntype : None
+Exceptions : Throws if ArrayChip not valid and stored
+Throws if ArrayChips are not of same class
+Caller     : General
+Status     : At risk
+=cut
+#This should be tied to a CS id!!!
+#And analysis dependant?
+#We may not want to delete alignment by different analyses?
+#In practise the slice methods ignore analysis_id for this table
+#So we currently never use this!
+#So IMPORTED status should be tied to CS id and Analysis id?
+sub rollback_ArrayChips{
+my ($self, $acs, $mode, $force, $keep_xrefs, $no_clean_up, $force_clean_up) = @_;
+#no_clean_up and force_clean_up allow analyze/optimize to be skipped until the last rollback
+#We could get around this by specifying all ArrayChips for all formats at the same time?
+#Need to implement in RollbackArrays
+$mode ||= 'probe';
+if($mode && ($mode ne 'probe' &&
+			   $mode ne 'probe_feature' &&
+			   $mode ne 'ProbeAlign' &&
+			   $mode ne 'ProbeTranscriptAlign' &&
+			   $mode ne 'probe2transcript')){
+	throw("You have passed an invalid mode argument($mode), you must omit or specify either 'probe2transcript', 'probe', 'ProbeAlign, 'ProbeTranscriptAlign' or 'probe_feature' for all of the Align output");
+}
+if($force && ($force ne 'force')){
+	throw("You have not specified a valid force argument($force), you must specify 'force' or omit");
+}
+if($keep_xrefs && ($keep_xrefs ne 'keep_xrefs')){
+	throw("You have not specified a valid keep_xrefs argument($keep_xrefs), you must specify 'keep_xrefs' or omit");
+}
+if($keep_xrefs){
+	if($mode eq 'probe' || $mode eq 'probe2transcript'){
+	  throw("You cannot specify 'keep_xrefs' with mode $mode, you can only rollback features e.g. probe_feature, ProbeAlign or ProbeTranscriptAlign");
+	}
+	if($force){
+	  throw("You cannot 'force' delete the probe2transcript xrefs and 'keep_xrefs' at the same time. Please specify just one.");
+	}
+}
+my ($adaptor, $db, %classes);
+foreach my $ac(@$acs){
+	$adaptor ||= $ac->adaptor || throw('ArrayChip must have an adaptor');
+	$db      ||= $adaptor->db;
+	$db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ArrayChip', $ac);
+	if(! $ac->get_Array->class){
+	  throw('The ArrayChip you are trying to rollback does not have a class attribute');
+	}
+	$classes{$ac->get_Array->class} = undef;
+	#if($class && ($class ne $ac->get_Array->class)){
+	#  throw('You can only rollback_ArrayChips for ArrayChips with the same class');
+	#}
+}
+#This is always the case as we register the association before we set the Import status
+#Hence the 2nd stage of the import fails as we have an associated ExperimentalChip
+#We need to make sure the ExperimentalChip and Channel have not been imported!!!
+warn "NOTE: rollback_ArrayChips. Need to implement ExperimentlChip check, is the problem that ExperimentalChips are registered before ArrayChips imported?";
+#Check for dependent ExperimentalChips
+#if(my @echips = @{$db->get_ExperimentalChipAdaptor->fetch_all_by_ArrayChip($ac)}){
+#	my %exps;
+#	my $txt = "Experiment\t\t\t\tExperimentalChip Unique IDs\n";
+#	foreach my $ec(@echips){
+#	  $exps{$ec->get_Experiment->name} ||= '';
+#	  $exps{$ec->get_Experiment->name} .= "\t".$ec->unique_id;
+#	}
+#	map {$txt.= "\t".$_.":".$exps{$_}."\n"} keys %exps;
+#	throw("Cannot rollback ArrayChip:\t".$ac->name.
+#		  "\nFound Dependent Experimental Data:\n".$txt);
+#  }
+my $ac_names = join(', ', (map { $_->name } @$acs));
+my $ac_ids   = join(', ', (map { $_->dbID } @$acs));
+$self->log("Rolling back ArrayChips $mode entries:\t$ac_names");
+my ($row_cnt, $probe_join, $sql);
+#$ac->adaptor->revoke_states($ac);#This need to be more specific to the type of rollback
+my $species = $db->species;
+if(!$species){
+	throw('Cannot rollback probe2transcript level xrefs without specifying a species for the DBAdaptor');
+}
+#Will from registry? this return Homo sapiens?
+#Or homo_sapiens
+($species = lc($species)) =~ s/ /_/;
+my $transc_edb_name = "${species}_core_Transcript";
+my $genome_edb_name = "${species}_core_Genome";
+#Maybe we want to rollback ProbeAlign and ProbeTranscriptAlign output separately so we
+#can re-run just one part of the alignment step.
+#We want this Probe(Transcript)Align rollback available in the environment
+#So we can do it natively and before we get to the RunnableDB stage,
+#where we would be trying multiple rollbacks in parallel
+#Wrapper script?
+#Or do we keep it simple here and maintain probe_feature wide rollback
+#And just the ProbeAlign/ProbeTranscriptAlign roll back in the environment?
+#We can restrict the probe deletes using the ac_id
+#We should test for other ac_ids using the same probe_id
+#Then fail unless we have specified force delete
+#These should be deleted for all other modes but only if force is set?
+#This may delete xrefs for other ArrayChips
+#The issues is if we need to specify force for one delete but don't want to delete something else?
+#force should only be used to delete upto and including the mode specified
+#no mode equates to probe mode
+#if no force then we fail if previous levels/modes have xrefs etc...
+#Let's grab the edb ids first and use them directly, this will avoid table locks on edb
+#and should also speed query up?
+if($mode eq 'probe2transcript' ||
+	 $force){
+	#Delete ProbeFeature UnmappedObjects
+	$self->log("Deleting probe2transcript ProbeFeature UnmappedObjects");
+	$sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, probe_feature pf, external_db e WHERE a.logic_name ='probe2transcript' AND a.analysis_id=uo.analysis_id AND p.probe_id=pf.probe_id and pf.probe_feature_id=uo.ensembl_id and uo.ensembl_object_type='ProbeFeature' and uo.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)";
+	$self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
+	#Delete ProbeFeature Xrefs/DBEntries
+	$self->log("Deleting probe2transcript ProbeFeature Xrefs");
+	$sql = "DELETE ox FROM xref x, object_xref ox, probe p, probe_feature pf, external_db e WHERE x.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND x.xref_id=ox.xref_id AND ox.ensembl_object_type='ProbeFeature' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND ox.linkage_annotation!='ProbeTranscriptAlign' AND p.array_chip_id IN($ac_ids)";
+	$self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up);
+	#Probe/Set specific entries
+	for my $xref_object('Probe', 'ProbeSet'){
+	  $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id';
+	  #Delete Probe/Set UnmappedObjects
+	  $self->log("Deleting probe2transcript $xref_object UnmappedObjects");
+	  $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, external_db e WHERE a.logic_name='probe2transcript' AND a.analysis_id=uo.analysis_id AND uo.ensembl_object_type='${xref_object}' AND $probe_join=uo.ensembl_id AND uo.external_db_id=e.external_db_id AND e.db_name='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)";
+	  #.' and edb.db_release="'.$schema_build.'"';
+	  $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
+	  #Delete Probe/Set Xrefs/DBEntries
+	  $sql = "DELETE ox FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' AND ox.ensembl_object_type='${xref_object}' AND ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)";
+	  $self->log("Deleting probe2transcript $xref_object xref records");
+	  $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up);
+	}
+}
+elsif(! $keep_xrefs){#Need to check for existing xrefs if not force
+	#we don't know whether this is on probe or probeset level
+	#This is a little hacky as there's not way we can guarantee this xref will be from probe2transcript
+	#until we get the analysis_id moved from identity_xref to xref
+	#We are also using the Probe/Set Xrefs as a proxy for all other Xrefs and UnmappedObjects
+	#Do we need to set a status here? Would have problem rolling back the states of associated ArrayChips
+	for my $xref_object('Probe', 'ProbeSet'){
+	  $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id';
+	  $row_cnt = $db->dbc->db_handle->selectrow_array("SELECT COUNT(*) FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' and ox.ensembl_object_type='${xref_object}' and ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)");
+	  if($row_cnt){
+		throw("Cannot rollback ArrayChips($ac_names), found $row_cnt $xref_object Xrefs. Pass 'force' argument or 'probe2transcript' mode to delete");
+	  }
+	  else{
+		#$self->log("Found $row_cnt $xref_object Xrefs");
+	  }
+	}
+}
+#ProbeFeatures inc ProbeTranscriptAlign xrefs
+if($mode ne 'probe2transcript'){
+	if(($mode eq 'probe' && $force) ||
+	   $mode eq 'probe_feature'  ||
+	   $mode eq 'ProbeAlign' ||
+	   $mode eq 'ProbeTranscriptAlign'){
+	  #Should really revoke some state here but we only have IMPORTED
+	  #ProbeTranscriptAlign Xref/DBEntries
+	  #my (@anal_ids) = @{$db->get_AnalysisAdaptor->generic_fetch("a.module='ProbeAlign'")};
+	  #Grrrr! AnalysisAdaptor is not a standard BaseAdaptor implementation
+	  #my @anal_ids = @{$db->dbc->db_handle->selectall_arrayref('select analysis_id from analysis where module like "%ProbeAlign"')};
+	  #@anal_ids = map {$_= "@$_"} @anal_ids;
+	  if($mode ne 'ProbeAlign'){
+		my $lnames = join(', ', (map { "'${_}_ProbeTranscriptAlign'" } keys(%classes)));
+		$sql = "DELETE ox from object_xref ox, xref x, probe p, probe_feature pf, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
+		$self->log("Deleting ProbeFeature Xref/DBEntry records for:\t$lnames");
+		$self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up);
+		#Can't include uo.type='ProbeTranscriptAlign' in these deletes yet as uo.type is enum'd to xref or probe2transcript
+		#will have to join to analysis and do a like "%ProbeTranscriptAlign" on the the logic name?
+		#or/and ur.summary_description='Promiscuous probe'?
+		$sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name in (${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${transc_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
+		$self->log("Deleting UnmappedObjects for:\t${lnames}");
+		$self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
+		#Now the actual ProbeFeatures
+		$sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
+		$self->log("Deleting ProbeFeatures for:\t${lnames}");
+		$self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up);
+	  }
+	  if($mode ne 'ProbeTranscriptAlign'){
+		my $lnames = join(', ', (map { "'${_}_ProbeAlign'" } keys(%classes)));
+		$sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name=(${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${genome_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
+		$self->log("Deleting UnmappedObjects for:\t${lnames}");
+		$self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up);
+		$sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
+		$self->log("Deleting ProbeFeatures for:\t${lnames}");
+		$self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up);
+	  }
+	}
+	else{
+	  #Need to count to see if we can carry on with a unforced probe rollback?
+	  #Do we need this level of control here
+	  #Can't we assume that if you want probe you also want probe_feature?
+	  #Leave for safety, at least until we get the dependant ExperimetnalChip test sorted
+	  #What about if we only want to delete one array from an associated set?
+	  #This would delete all the features from the rest?
+	  $sql = "select count(*) from object_xref ox, xref x, probe p, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)";
+	  $row_cnt =  $db->dbc->db_handle->selectrow_array($sql);
+	  if($row_cnt){
+		throw("Cannot rollback ArrayChips($ac_names), found $row_cnt ProbeFeatures. Pass 'force' argument or 'probe_feature' mode to delete");
+	  }
+	   else{
+		 $self->log("Found $row_cnt ProbeFeatures");
+	  }
+	}
+	if($mode eq 'probe'){
+	  #Don't need to rollback on a CS as we have no dependant EChips?
+	  #Is this true?  Should we enforce a 3rd CoordSystem argument, 'all' string we delete all?
+	  foreach my $ac(@$acs){
+		$ac->adaptor->revoke_states($ac);#Do we need to change this to revoke specific states?
+		#Current states are only IMPORTED, so not just yet, but we could change this for safety?
+	  }
+	  #ProbeSets
+	  $sql = "DELETE ps from probe p, probe_set ps where p.array_chip_id IN($ac_ids) and p.probe_set_id=ps.probe_set_id";
+	  $self->rollback_table($sql, 'probe_set', 'probe_set_id', $db, $no_clean_up);
+	  #Probes
+	  $sql = "DELETE from probe where array_chip_id IN($ac_ids)";
+	  $self->rollback_table($sql, 'probe', 'probe_id', $db, $no_clean_up);
+	}
+}
+$self->log("Finished $mode roll back for ArrayChip:\t$ac_names");
+return;
+}
+#This will just fail silently if the reset value
+#Is less than the true autoinc value
+#i.e. if there are parallel inserts going on
+#So we can never assume that the $new_auto_inc will be used
+sub rollback_table{
+my ($self, $sql, $table, $id_field, $db, $no_clean_up, $force_clean_up) = @_;
+my $row_cnt;
+eval { $row_cnt = $db->dbc->do($sql) };
+if($@){
+	throw("Failed to rollback table $table using sql:\t$sql\n$@");
+}
+$row_cnt = 0 if $row_cnt eq '0E0';
+$self->log("Deleted $row_cnt $table records");
+if($force_clean_up ||
+	 ($row_cnt && ! $no_clean_up)){
+	$self->refresh_table($table, $id_field, $db);
+}
+return;
+}
+#Now separated so that we can do this once at the end of a rollback of many Sets
+sub refresh_table{
+my ($self, $table, $id_field, $db) = @_;
+#This only works if the new calue is available
+#i.e. do not need lock for this to be safe
+$self->reset_table_autoinc($table, $id_field, $db) if $id_field;
+$self->log("Optimizing and Analyzing $table");
+$db->dbc->do("optimize table $table");#defrag data, sorts indices, updates table stats
+$db->dbc->do("analyze  table $table");#analyses key distribution
+return;
+}
+sub reset_table_autoinc{
+#Is this called elsewhere or can we merge with
+my($self, $table_name, $autoinc_field, $db) = @_;
+if(! ($table_name && $autoinc_field && $db)){
+	throw('You must pass a table_name and an autoinc_field to reset the autoinc value');
+}
+if(! (ref($db) && $db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){
+	throw('Must pass a valid Bio::EnsEMBL::DBSQL::DBAdaptor');
+}
+#my $sql = "show table status where name='$table_name'";
+#my ($autoinc) = ${$db->dbc->db_handle->selectrow_array($sql)}[11];
+#11 is the field in the show table status table
+#We cannot select just the Auto_increment, so this will fail if the table format changes
+#Why do we need autoinc here?
+my $sql = "select $autoinc_field from $table_name order by $autoinc_field desc limit 1";
+my ($current_auto_inc) = $db->dbc->db_handle->selectrow_array($sql);
+my $new_autoinc = ($current_auto_inc) ? ($current_auto_inc + 1) : 1;
+$sql = "ALTER TABLE $table_name AUTO_INCREMENT=$new_autoinc";
+$db->dbc->do($sql);
+return;
+}
+=head2 get_core_display_name_by_stable_id
+Args [1]   : Bio::EnsEMBL::DBSQL::DBAdaptor
+Args [2]   : stable ID from core DB.
+Args [3]   : stable feature type e.g. gene, transcript, translation
+Example    : $self->validate_and_store_feature_types;
+Description: Builds a cache of stable ID to display names.
+Returntype : string - display name
+Exceptions : Throws is type is not valid.
+Caller     : General
+Status     : At risk
+=cut
+# --------------------------------------------------------------------------------
+# Build a cache of ensembl stable ID -> display_name
+# Return hashref keyed on {$type}{$stable_id}
+#Need to update cache if we're doing more than one 'type' at a time
+# as it will never get loaded for the new type!
+sub get_core_display_name_by_stable_id{
+my ($self, $cdb, $stable_id, $type) = @_;
+$type = lc($type);
+if($type !~ /(gene|transcript|translation)/){
+throw("Cannot get display_name for stable_id $stable_id with type $type");
+}
+if(! exists $self->{'display_name_cache'}->{$stable_id}){
+($self->{'display_name_cache'}->{$stable_id}) = $cdb->dbc->db_handle->selectrow_array("SELECT x.display_label FROM $type t, xref x where t.display_xref_id=x.xref_id and t.stable_id='${stable_id}'");
+}
+return $self->{'display_name_cache'}->{$stable_id};
+}
+=head2 get_core_stable_id_by_display_name
+Args [1]   : Bio::EnsEMBL::DBSQL::DBAdaptor
+Args [2]   : display name (e.g. from core DB or GNC name)
+Example    :
+Description: Builds a cache of stable ID to display names.
+Returntype : string - gene stable ID
+Exceptions : None
+Caller     : General
+Status     : At risk
+=cut
+# --------------------------------------------------------------------------------
+# Build a cache of ensembl stable ID -> display_name
+# Return hashref keyed on {$type}{$stable_id}
+#Need to update cache if we're doing more than one 'type' at a time
+# as it will never get loaded for the new type!
+sub get_core_stable_id_by_display_name{
+my ($self, $cdb, $display_name) = @_;
+#if($type !~ /(gene|transcript|translation)/){
+#	throw("Cannot get display_name for stable_id $stable_id with type $type");
+#  }
+if(! exists $self->{'stable_id_cache'}->{$display_name}){
+	($self->{'stable_id_cache'}->{$display_name}) = $cdb->dbc->db_handle->selectrow_array("SELECT g.stable_id FROM gene g, xref x where g.display_xref_id=x.xref_id and and x.display_label='${display_name}'");
+}
+return $self->{'stable_id_cache'}->{$display_name};
+}
+1;

Mercurial > repos > mahtabm > ensembl

comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/Helper.pm @ 0:1f6dce3d34e0