Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Funcgen/Utils/Helper.pm @ 3:d30fa12e4cc5 default tip
Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author | devteam <devteam@galaxyproject.org> |
---|---|
date | Mon, 13 Jan 2014 10:38:30 -0500 |
parents | 1f6dce3d34e0 |
children |
line wrap: on
line source
=head1 LICENSE Copyright (c) 1999-2011 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <ensembl-dev@ebi.ac.uk>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =head1 NAME Bio::EnsEMBL::Funcgen::Utils::Helper =head1 SYNOPSIS e.g. my $object = Bio::EnsEMBL::Object->new ( logging => 1, log_file => "/tmp/Misc.log", debug_level => 2, debug_file => "/tmp/Misc.dbg", ); $object->log("This is a log message."); $object->debug(1,"This is a debug message."); $object->system("rmdir /tmp/test"); ---------------------------------------------------------------------------- =head1 OPTIONS =over 8 =item B<-debug> Turns on and defines the verbosity of debugging output, 1-3, default = 0 = off =over 8 =item B<-log_file|l> Defines the log file, default = "${instance}.log" =item B<-help> Print a brief help message and exits. =item B<-man> Prints the manual page and exits. =back =head1 DESCRIPTION B<This program> performs several debugging and logging functions, aswell as providing several inheritable EFGUtils methods. =cut ################################################################################ package Bio::EnsEMBL::Funcgen::Utils::Helper; use Bio::Root::Root; use Data::Dumper; use Bio::EnsEMBL::Utils::Exception qw (throw stack_trace); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (get_date); #use Devel::Timer; use Carp;#? Can't use unless we can get it to redirect use File::Basename; use strict; use vars qw(@ISA); @ISA = qw(Bio::Root::Root); #List of valid rollback levels #To be used in conjunction with -full_delete my @rollback_tables = ('data_set', 'feature_set', 'result_set', 'input_set', 'experiment', 'array', 'array_chip', 'experimental_chip'); #Some local filevars to avoid assigning to package typeglobs my ($DBGFILE, $LOGFILE); ################################################################################ =head2 new Description : Constructor method to create a new object with passed or default attributes. Arg [1] : hash containing optional attributes :- log_file - name of log file (default = undef -> STDOUT) debug_level - level of detail of debug message [1-3] (default = 0 = off) debug_file - name of debug file (default = undef -> STDERR) ReturnType : Helper Example : my $Helper = new Bio::EnsEMBL::Helper( debug_level => 3, debug_file => "/tmp/efg.debug", log_file => "/tmp/efg.log", ); Exceptions : throws exception if failed to open debug file : throws exception if failed to open log file =cut ################################################################################ #To do , change to rearrange sub new{ my ($caller, %args) = @_; my ($self, %attrdata, $argname); my $class = ref($caller) || $caller; #Create object from parent class $self = $class->SUPER::new(%args); #we need to mirror ensembl behaviour here #use rearrange and set default afterwards if not defined # objects private data and default values #Not all of these need to be in main %attrdata = ( _tee => $main::_tee, _debug_level => $main::_debug_level, _debug_file => $main::_debug_file, _log_file => $main::_log_file,#default should be set in caller _no_log => $main::_no_log,#suppresses log file generation if log file not defined _default_log_dir => $main::_default_log_dir, ); # set each class attribute using passed value or default value foreach my $attrname (keys %attrdata){ ($argname = $attrname) =~ s/^_//; # remove leading underscore $self->{$attrname} = (exists $args{$argname}) ? $args{$argname} : $attrdata{$attrname}; } $self->{'_tee'} = 1 if $self->{'_no_log'}; #should we undef log_file here too? #This currently only turns off default logging $self->{_default_log_dir} ||= $ENV{'HOME'}.'/logs'; $self->{'_report'} = []; # DEBUG OUTPUT & STDERR #should default to lowest or highest debug level here! if(defined $self->{_debug_level} && $self->{_debug_level}){ $main::_debug_level = $self->{_debug_level}; if(defined $self->{_debug_file}){ $main::_debug_file = $self->{_debug_file}; open($DBGFILE, '>>', $self->{_debug_file}) or throw("Failed to open debug file : $!"); #open (DBGFILE, "<STDERR | tee -a ".$self->{_debug_file});#Mirrors STDERR to debug file } else{ open($DBGFILE, '>&STDERR'); } select $DBGFILE; $| = 1; # make debug file unbuffered $self->debug(1,"Debugging started ".localtime()." on $0 at level ".$self->{_debug_level}." ..."); } my $log_file = $self->{_log_file}; # LOG OUTPUT if (defined $self->{_log_file}){ #This causes print on unopened file as we try and log in the DESTROY throw('You have specified mutually exclusive parameters log_file and no_log') if($self->{'_no_log'}); $main::_log_file = $self->{_log_file}; #we need to implment tee here if($self->{'_tee'}){ open($LOGFILE, ' | tee -a '.$log_file); } else{ open($LOGFILE, '>>', $log_file) or throw("Failed to open log file : $log_file\nError: $!"); } } else{ #Change this to get the name of the control script and append with PID.out #This is to ensure that we always capture output #We need to also log params #We will have to call this from the child class. #Only do this if we don't have supress default logs set #To avoid loads of loags during testing if(! $self->{'_no_log'}){ my @stack = stack_trace(); my $top_level = $stack[$#stack]; my (undef, $file) = @{$top_level}; $file =~ s/.*\///; $self->run_system_cmd('mkdir '.$self->{_default_log_dir}) if(! -e $self->{_default_log_dir}); $self->{'_log_file'} = $self->{_default_log_dir}.'/'.$file.'.'.$$.'.log'; warn "No log file defined, defaulting to:\t".$self->{'_log_file'}."\n"; #we should still tee here if($self->{'_tee'}){ open($LOGFILE, '| tee -a '.$self->{'_log_file'}); } else{ open($LOGFILE, '>', $self->{'_log_file'}) or throw('Failed to open log file : '.$self->{'_log_file'}."\nError: $!"); } } else{ #Have to include STD filehandles in operator open($LOGFILE, '>&STDOUT'); } } select $LOGFILE; $| = 1; # make log file unbuffered $self->log("\n\nLogging started at ".localtime()."..."); # RESET STDOUT TO DEFAULT select STDOUT; $| = 1; $self->debug(2,"Helper class instance created."); return $self; } ################################################################################ =head2 DESTROY Description : Called by gargbage collection to enable tidy up before object deleted ReturnType : none Example : none - should not be called directly Exceptions : none =cut ################################################################################ sub DESTROY{ my ($self) = @_; $self->report; if($self->{_log_file}){ $self->log("Logging complete ".localtime()."."); $self->log('Virtual Memory '.`ps -p $$ -o vsz |tail -1`); $self->log('Resident Memory '.`ps -p $$ -o rss |tail -1`); # close LOGFILE; # if inherited object then cannot close filehandle !!! } if($self->{_debug_level}){ $self->debug(1,"Debugging complete ".localtime()."."); # close DBGFILE; # if inherited object then cannot close filehandle !!! } if(defined $self->{'_timer'}){ $self->{'_timer'}->report(); } $self->debug(2,"Bio::EnsEMBL::Helper class instance destroyed."); return; } ##Need generic method in here to get stack and line info ###Use Root.pm stack methods! # and replace this with caller line method for logging sub _get_stack{ my ($self) = shift; #need to resolve this method with that in debug, pass log or debug arg for different format my @prog = (caller(2)) ? caller(2) : (caller(1)) ? caller(1) : (undef,"undef",0); return "[".localtime()." - ".basename($prog[1]).":$prog[2]]"; } ################################################################################ =head2 log Arg[0] : string - log message. Arg[1] : boolean - memory usage, appends current process memory stats Description : Method to write messages to a previously set up log file. Return type : none Example : $root->log("Processing file $filename ...", 1); Exceptions : none =cut ################################################################################ sub log{ my ($self, $message, $mem, $date, $no_return) = @_; if($mem){ $message.= " :: ".`ps -p $$ -o vsz |tail -1`; chomp $message; $message .= " KB"; } if($date){ my $time = localtime(); chomp($time); $message .= ' - '.localtime(); } $message .= "\n" if ! $no_return; print $LOGFILE "::\t$message"; # Add to debug file if not printing to STDERR? # only if verbose? # this would double print everything to STDOUT if tee and debug has not redefined STDERR $self->debug(1,$message); } ################################################################################ =head2 report Arg[0] : optional string - log message. Arg[1] : optional boolean - memory usage, appends current process memory stats Description : Wrapper method for log, which also stores message for summary reporting Return type : none Example : $root->report("WARNING: You have not done this or that and want it reported at the end of a script"); Exceptions : none =cut ################################################################################ sub report{ my ($self, $message, $mem) = @_; if(defined $message){ $self->log($message, $mem); push @{$self->{'_report'}}, $message; } elsif(scalar(@{$self->{'_report'}})){ print $LOGFILE "\n::\tSUMMARY REPORT\t::\n"; print $LOGFILE join("\n", @{$self->{'_report'}})."\n"; $self->{'_report'} = []; } return; } ################################################################################ =head2 log_header Arg[0] : string - log message. Arg[1] : boolean - memory usage, appends current process memory stats Description : Wrapper method to format a log as a header line Return type : none Example : $root->log("Processing file $filename ...", 1); Exceptions : none =cut ################################################################################ sub log_header{ my ($self, $message, $mem, $date) = @_; print $LOGFILE "\n\n"; $self->log("::\t$message\t::\t::", $mem, $date); print $LOGFILE "\n"; } ################################################################################ =head2 debug Description : Method to write debug info to a previously set up debug file. Over-rides Root.pm on/off style debugging Args : int: debug level and string: log message. ReturnType : none Example : $root->debug(2,"dir=$dir file=$file"); Exceptions : none =cut ################################################################################ sub debug{ my ($self,$level,$message) = @_; #Can we not detect whther message is a scalar, array or hash and Dump or print accordingly? my (@call,$cnt,$prog_name,$prog_line,$call_name,$call_line); $prog_name = $call_name = "undef"; $prog_line = $call_line = $cnt = 0; # if debug on at the requested level then output the passed message if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){ ######Replace this with Carp method? while (@call = caller($cnt++)){ if ($cnt == 2){ $call_name = basename($call[1]); $call_line = $call[2] } $prog_name = basename($call[1]); $prog_line = $call[2]; } #This still attempts to print if file not opened print $DBGFILE "debug $message\t: [$$ - $prog_name:$prog_line $call_name:$call_line]\n"; #carp("carping $message"); } } ################################################################################ =head2 debug_hash Description : Method to write the contents of passed hash to debug output. Args : int: debug level and hashref. ReturnType : none Example : $Helper->debug_hash(3,\%hash); Exceptions : none =cut ################################################################################ sub debug_hash{ my ($self,$level,$hashref) = @_; my ($attr); # if debug on at the requested level then output the passed hash if (defined $self->{_debug_level} && $level <= $self->{_debug_level}){ print $DBGFILE Data::Dumper::Dumper(\$hashref)."\n"; } } ################################################################################ =head2 run_system_cmd Description : Method to control the execution of the standard system() command ReturnType : none Example : $Helper->debug(2,"dir=$dir file=$file"); Exceptions : throws exception if system command returns none zero =cut ################################################################################ #Move most of this to EFGUtils.pm #Maintain wrapper here with throws, only warn in EFGUtils sub run_system_cmd{ my ($self, $command, $no_exit) = @_; my $redirect = ''; $self->debug(3, "system($command)"); # decide where the command line output should be redirected #This should account for redirects #This just sends everything to 1 no? if (defined $self->{_debug_level} && $self->{_debug_level} >= 3){ if (defined $self->{_debug_file}){ $redirect = " >>".$self->{_debug_file}." 2>&1"; } else{ $redirect = ""; } } else{ #$redirect = " > /dev/null 2>&1"; } # execute the passed system command my $status = system("$command $redirect"); my $exit_code = $status >> 8; if ($status == -1) { warn "Failed to execute: $!\n"; } elsif ($status & 127) { warn sprintf("Child died with signal %d, %s coredump\nError:\t$!",($status & 127),($status & 128) ? 'with' : 'without'); } elsif($status != 0) { warn sprintf("Child exited with value %d\nError:\t$!\n", $exit_code); #get the true exit code } if ($exit_code != 0){ if (! $no_exit){ throw("System command failed:\t$command\nExit code:\t$exit_code\n$!"); } else{ warn("System command returned non-zero exit code:\t$command\nExit code:\t$exit_code\n$!"); } } #reverse boolean logic for perl...can't do this anymore due to tab2mage successful non-zero exit codes :/ return $exit_code; } #add sys_get method ehre to handle system calls which retrieve data? #i.e.backtick commands `find . -name *fasta` #or use want or flag with above method? #should open pipe instead to capture error? sub get_data{ my ($self, $data_type, $data_name) = @_; #This method is just to provide standard checking for specific get_data/config methods if(defined $data_name){ throw("Defs data name $data_name for type '$data_type' does not exist\n") if (! exists $self->{"${data_type}"}{$data_name}); }else{ throw("Defs data type $data_type does not exist\n") if (! exists $self->{"${data_type}"}); } return (defined $data_name) ? $self->{"${data_type}"}{$data_name} : $self->{"${data_type}"}; } #sub Timer{ # my ($self) = shift; # $self->{'_timer'} = new Devel::Timer() if(! defined $self->{'_timer'}); # return $self->{'_timer'}; #} sub set_header_hash{ my ($self, $header_ref, $fields) = @_; my %hpos; for my $x(0..$#{$header_ref}){ $hpos{$header_ref->[$x]} = $x; } if($fields){ foreach my $field(@$fields){ if(! exists $hpos{$field}){ throw("Header does not contain mandatory field:\t${field}"); } } } return \%hpos; } #Move this to EFGUtils? sub backup_file{ my ($self, $file_path) = @_; throw("Must define a file path to backup") if(! $file_path); if (-f $file_path) { $self->log("Backing up:\t$file_path"); system ("mv ${file_path} ${file_path}.".`date '+%T'`); } return; } #This should move to Utils #as it is a simple string manipulation sub get_schema_and_build{ my ($self, $dbname) = @_; my @dbname = split/_/, $dbname; return [$dbname[($#dbname -1)], $dbname[($#dbname )]]; } =head2 get_regbuild_set_states Arg [1] : Bio::EnsEMBL::DBAdaptor Example : my ($dset_states, $rset_states, $fset_states) = $helper->get_regbuild_set_states($db); Description: Returns Array refs of appropriate states for sets use din the regulatory build Returntype : Array Exceptions : Warns if cannot find chromosome CoordSystem Caller : HealthChecker & regulatory build code Status : At risk =cut sub get_regbuild_set_states{ my ($self, $db) = @_; my $cs_a = $db->get_CoordSystemAdaptor; #These states need to be mirrored in RegulatorySets.java my $chrom_cs = $cs_a->fetch_by_name('chromosome'); my (@dset_states, @rset_states, @fset_states); if(! defined $chrom_cs){ #This species most likely does not have a regbuild #really just need to get the 'highest' level here warn "Could not find Chromosome CoordSystem. ".$db->dbc->dbname.". most likely does not contain a RegulatoryBuild"; } else{ my $imp_cs_status = 'IMPORTED_'.$cs_a->fetch_by_name('chromosome')->version; #What about non-chromosome assemblies? #top level will not return version...why not? @dset_states = ('DISPLAYABLE'); @rset_states = (@dset_states, 'DAS_DISPLAYABLE', $imp_cs_status); @fset_states = (@rset_states, 'MART_DISPLAYABLE'); } return (\@dset_states, \@rset_states, \@fset_states); } =head2 define_and_validate_sets Arg [1] : hash - set constructor parameters: -dbadaptor Bio::EnsEMBL::Funcgen::DBAdaptor -name Data/FeatureSet/ResultSet name to create -feature_type Bio::EnsEMBL::Funcgen::FeatureType -cell_type Bio::EnsEMBL::Funcgen::CellType -analysis FeatureSet Bio::EnsEMBL::Analysis -feature_class e.g. annotated or regulatory -description FeatureSet description -recovery Allows definition of extant sets so long as they match -append Boolean - Forces import on top of previously imported data -rollback Rolls back product feature set. -supporting_sets Complete set of pre-stored supporting or input sets for this DataSet -slices ARRAYREF of Slices to rollback Example : my $dset = $self->define_and_validate_Set(%params); Description: Checks whether set is already in DB based on set name, rolls back features if roll back flag set. Or creates new DataSet and Feature|ResultSet if not present. Returntype : Bio::EnsEMBL::Funcgen::DataSet Exceptions : Throws if DBAdaptor param not valid Caller : Importers and Parsers Status : At risk =cut sub define_and_validate_sets{ my $self = shift; #change slice to slices to support multi slice import from InputSet::define_sets #Can't do full rollback in slice mode #This may not be safe in slice mode as we will then have mixed inputs/outputs my ($name, $anal, $ftype, $ctype, $type, $append, $db, $ssets, $description, $rollback, $recovery, $slices, $display_label) = rearrange(['NAME', 'ANALYSIS', 'FEATURE_TYPE', 'CELL_TYPE', 'FEATURE_CLASS', 'APPEND', 'DBADAPTOR', 'SUPPORTING_SETS', 'DESCRIPTION', 'ROLLBACK', 'RECOVERY', 'SLICES', 'DISPLAY_LABEL'], @_); #VALIDATE CONFIG HASH #$config_hash ||= {};#default so exists will work without testing #if(keys %{$config_hash}){ # #There is a module to handle config hashes somewhere! # throw('config_hash not yet implemented for define_and_validate_sets'); #my @known_config = ('full_delete');#We never want full delete here as this is a create method! #Can we set vars from has by refs like getopts? #map { # throw("Found unsupported config hash parameter:\t$_") if ! grep(/^${_}$/, @known_config); #} keys %{$config_hash}; # } #define rollback level #extract this to _set_rollback_level($rollback_mode, $feature_class) my $rollback_level = 0; #These should be globally defined so all rollback methods can use them my %valid_rollback_modes = ( product_features => 1, #Just product features and FeatureSet status, what about DataSet status? #full delete does nothing here? sets => 2, #Includes product_features and #deletes supporting_sets entries unless we specify append #revoke all states on Feature/Data/InputSets #Full delete removes Feature/Data/InputSet entries #Never includes ResultSets! supporting_features => 3, #Includes product_feature and sets #Removes all states and supporting features #inc. ResultSet results/ResultFeatures #Full_delete remove supporting set entries #Otherwise just rollback states for affected sets ); if($rollback){ if(! exists $valid_rollback_modes{$rollback}){ #Default to some sensible values $rollback = 'product_features';#default for FeatureSets #Always want overwrite supporting sets if there is a difference $rollback = 'sets' if ($type eq 'regulatory'); $rollback = 'supporting_sets' if ($type eq 'result'); warn ("You have not set a valid rollback mode(product_features|sets|supporting_features), defaulting to $rollback for feature class $type\n"); } $rollback_level = $valid_rollback_modes{$rollback}; } if($slices && (ref($slices) ne 'ARRAY')){ throw('-slices param must be an ARRAYREF of Bio::EnsEMBL::Slice objects'); #Rest of validation done in other methods } #But how are we going to resolve the append behaviour when we also want to validate the ssets? #Can't, so append also functions to enable addition in the absence of some or all previous data/esets? #No this is not true, we want to be able to fetch an extant set for import, #we just need to be aware of sset IMPORTED status? #This should be a recovery thing, allow fetch, but validate sets? #Check mandatory params if(! (ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){ throw('Must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'); } throw('Must provide a -name ') if(! defined $name); #Not necessarily, just do rollback then append? #But then we'd potentially have a supporting set associated which has had it's data removed from the feature set. #Generating sets for an ExpSet will always have append set #This could be valid for generically grabing/creating sets for adding new supporting sets e.g. reg build throw('-append and -rollback are mutually exclusive') if $rollback_level && $append; #This will never happen due to previous test? append will always fail? #warn('You are defining a pre-existing FeatureSet without rolling back'. # ' previous data, this could result in data duplication') if $append && ! $rollback_level; #Is this really possible, surely the supporting set will fail to store due to unique key? #Should we warn here about append && recovery? #Aren't these mutually exclusive? #Do we know if we have new data? append should override recovery, or just specifiy append #This will stop the import and highlight the issue to the user #We need to be able to run with both otherwise the import will not work throw('Must provide a -feature_class e.g. annotated, external, result or regulatory') if(! defined $type); #Check for annotated, external, regulatory etc here? #Should never be external as we don't have DataSets for external sets? $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureType', $ftype); if (defined $ctype){ $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::CellType', $ctype); } elsif($type ne 'regulatory'){ throw('Only Data/FeatureSets with type \'regulatory\' can have an undefined CellType'); #Coudl extend this to core set by name eq 'RegulatoryFeatures'? } $db->is_stored_and_valid('Bio::EnsEMBL::Analysis', $anal); my $dset_adaptor = $db->get_DataSetAdaptor; my $fset_adaptor = $db->get_FeatureSetAdaptor; my $rset_adaptor = $db->get_ResultSetAdaptor; #DataSet centric definition to enable multiple DataSets #to be generated from the same supporting sets my $dset = $dset_adaptor->fetch_by_name($name); my ($fset, $rset, @input_sets); #Validate stored vs passed set data if(defined $dset){ $self->log('Found Stored DataSet '.$dset->name); if($type ne 'result'){#i.e. annotated #Does this account for regulatory? $fset = $dset->product_FeatureSet; #Here we have the possiblity that a feature_set with a different name may have #been associated with the DataSet if(defined $fset){ $self->log("Found associated product FeatureSet:\t".$fset->name); #if(! $clobber && if($fset->name ne $name){ throw('Invalid product FeatureSet name ('.$fset->name.') for DataSet ('.$name.'). Rollback will overwrite the FeatureSet and mismatched name will be retained.'); #Need to clobber both or give explicit name for datasets or rename dataset??? #Force this throw for now, make this fix manual as we may end up automatically overwriting data } } #This needs to be modified to support InputSets in ResultSets? #Would never have mixed Input/ResultSets so no need #Could potential need to do it for mixed Result/FeatureSets #if we ever use an analysis which uses both set types #check supporting_sets here if defined #We have the problem here of wanting to add ssets to a previously existing dset #we may not know the original sset, or which of the ssets are new #Hence there is a likelihood of a mismatch. #Much of this is replicated in store_udpated sets if(defined $ssets){ my @sorted_ssets = sort {$a->dbID <=> $b->dbID} @{$ssets}; my @stored_ssets = sort {$a->dbID <=> $b->dbID} @{$dset->get_supporting_sets}; my $mismatch = 0; $mismatch = 1 if(scalar(@sorted_ssets) != scalar(@stored_ssets)); if(! $mismatch){ for my $i(0..$#stored_ssets){ if($stored_ssets[$i]->dbID != $sorted_ssets[$i]->dbID){ $mismatch=1; last; } } } if($mismatch){ #We're really print this names here which may hide the true cell/feature/anal type differences. my $mismatch = 'There is a (name/type/analysis) mismatch between the supplied supporting_sets and the'. ' supporting_sets in the DB for DataSet '.$dset->name."\n\nStored:\n" .join(', ', (map { $_->name } @stored_ssets))."\n\nSupplied supporting_sets:\n" .join(', ', (map { $_->name } @sorted_ssets)); if($append){ warn($mismatch."\n\nAppending supporting set data to unvalidated supporting sets"); } elsif($rollback_level > 1){#supporting set rollback warn($mismatch."\n\nReplacing previously stored supporting sets with newly defined sets\n"); if($slices){ warn("WARNING:\tPerforming supporting_set rollback in slice mode. This may corrupt the supporting_set definition for other slices in this DataSet if they are not re-generated using the same supporting_sets\n"); } #Remove supporting_set entries #This should be in a rollback_DataSet method #This has moved to DataSetAdaptor::store_update_sets #Reset supporting sets $dset->{'supporting_sets'} = undef; $dset->add_supporting_sets(\@sorted_ssets); #Move this to last block? #This will currently fail as it test for product_FeatureSet #How do we get around this? Remove IMPORTED status and only throw if fset has IMPORTED status? #warn "pre store sset ".@{$dset->get_supporting_sets}; #($dset) = @{$dset_adaptor->store_updated_sets([$dset], $rollback_level)}; #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; } else{ throw($mismatch); } } } else{ warn("No supporting sets defined, skipping supporting set validation for definition of DataSet:\t".$name); } } else{#result_features from InputSet #Do we ever pass supporting sets here? #Do we need to test vs stored_sets? #There is the potential for more than one ResultSet to be associated with DataSet #But as we are using the same name, this restricts the number wrt the cardinality #of the name field. i.e. 1 name per analysis/cell_type/feature_type. #This now works slightly differently to the rest of this method as we #need to treat the ResultSet as we are currently treating the FeatureSet below. #However, the use case of this method is for one InputSet giving rise to one ResultSet #Hence just throw if we find more than one or have a name mismatch??? my @stored_sets = @{$dset->get_supporting_sets}; #THis assumes we will always have supporting sets #and is failing as we have removed this test in DataSet::new #But where are we storing it without the supporting set? if(scalar(@stored_sets) > 1){ throw('define_and_validate_sets does not yet support DataSets with multiple supporting ResultSets for result_features'); } elsif(! @stored_sets){ throw("DataSet($name) does not have any stored supporting sets. These should have been defined when storing the DataSet"); #Or should we handle this? } $rset = $stored_sets[0]; if($rset->set_type ne 'result'){ throw("DataSet already contains a supporting set which is not a ResultSet:\t".$rset->set_type."\t".$stored_sets[0]->name); } elsif($ssets){ #Do we ever pass supporting sets, test for completeness #Just test we have the same supplied ssets if it is defined if(scalar(@$ssets) != 1){ throw("ResultFeature data sets currently only support one supporting ResultSet.\nSupproting sets:\t". join(', ', (map { $_->name.'('.$_->set_type } @$ssets))); } elsif(! ($rset->dbID == $ssets->[0]->dbID) && ($ssets->[0]->set_type eq 'result')){ throw('Supplied supporting set('.$ssets->[0]->name.') does not match stored supporting set('.$rset->name.')'); } } @input_sets = @{$rset->get_InputSets}; } } if($type eq 'result'){ #Validate the defined InputSets if (scalar(@$ssets) > 1) { throw("define_and_validate_sets does not yet support multiple InputSets for defining a ResultSet:\t".$name); } if ($ssets->[0]->set_type ne 'input') { throw("To define a ResultSet($name) containing result_features, you must provide and InputSet as a supporting set\nArray based ResultSets(i.e. experimental_chip/channel) are not defined using this method, see specific Import Parsers."); } #Try and grab the rset just in case it has been orphaned somehow if (! defined $rset) { $rset = $rset_adaptor->fetch_all_by_name($name, $ftype, $ctype, $anal)->[0]; #Should only ever be one given all parts of unique key @input_sets = @{$rset->get_InputSets} if $rset; } if (defined $rset) { #Validate stored InputSets if (scalar(@input_sets) != scalar(@$ssets)) { throw('Found mismatch between number of previously stored InputSets('.scalar(@input_sets).') and defined InputSets('.scalar(@$ssets).'). You must provide a complete list of InputSets to define your ResultSet.'); } if ($input_sets[0]->dbID != $ssets->[0]->dbID) { throw('Found dbID mismatch between previously stored InputSet('.$input_sets[0]->name.') and define InputSet('.$ssets->[0]->name.')'); } #rollback ResultSet/InputSet here? if($rollback_level > 2){ warn "rollback not yet fully implemented for Result/InputSets"; #Does this need to be by slice? #What about states if we are running in parallel? if($slices){ map {$self->rollback_ResultSet($rset, $rollback, $_)} @$slices; } else{ $self->rollback_ResultSet($rset, $rollback); } } } else{#define ResultSet ($rset) = @{$rset_adaptor->store(Bio::EnsEMBL::Funcgen::ResultSet->new ( -name => $name, -feature_type => $ftype, -cell_type => $ctype, -table_name => 'input_set', -table_id => $ssets->[0]->dbID, -analysis => $anal ) )}; } } else{#annotated/regulatory/external i.e. FeatureSet #Try and grab the fset just in case it has been orphaned somehow if(! defined $fset){ $fset = $fset_adaptor->fetch_by_name($name); if(defined $fset){ #Now we need to test whether it is attached to a dset #Will be incorrect dset if it is as we couldn't get it before #else we test the types and rollback $self->log("Found stored orphan FeatureSet:\t".$fset->name); my $stored_dset = $dset_adaptor->fetch_by_product_FeatureSet($fset); if(defined $stored_dset){ throw('Found FeatureSet('.$name.') associated with incorrect DataSet('.$stored_dset->name. ").\nTry using another -name in the set parameters hash"); } } } #Rollback or create FeatureSet if(defined $fset){ if($rollback_level){ #Don't check for IMPORTED here as we want to rollback anyway #Not forcing delete here as this may be used as a supporting set itself. $self->rollback_FeatureSet($fset, undef, $slices); } elsif ($append || $recovery) { #This is only true if we have an sset mismatch #Do we need to revoke IMPORTED here too? #This behaves differently dependant on the supporting set. #InputSet status refers to loading in FeatureSet, where as ResultSet status refers to loading into result table #So we really want to revoke it #But this leaves us vulnerable to losing data if the import crashes after this point #because we have no way of assesing which is complete data and which is incomplete data #within a feature set. #This means we need a status on supporting_set, not InputSet or ResultSet #as this has to be in the context of a dataset. #Grrr, this means we need a SupportingSet class which simply wraps the InputSet/ResultSet #We also need a single dbID for the supporting_set table #Which means we will have to do some wierdity with the normal dbID implementation #i.e. Have supporting_set_id, so we can still access all the normal dbID method for the given Set class #This will have to be hardcoded into the state methods #Also will need to specify when we want to store as supporting_status or normal set status. #This is an awful lot to protect against vulnerability #Also as there easy way to track what features came from which supporting set #There isn't currently a viable way to rollback, hence will have to redo the whole set. #Maybe we can enforce this by procedure? #By simply not associating the supporting set until it has been loaded into the feature set? #This may cause even more tracking problems #Right then, simply warn and do not revoke feature_set IMPORTED to protect old data? #Parsers should identify supporting_sets(InputSets) which exist but do not have IMPORTED #status and fail, specifying -recover which will rollback_FeatureSet which will revoke the IMPORTED status #This can mean a failed import can leave a partially imported feature set with the IMPORTED status!!! #We just need to handle InputSets and ResultSets differently. #In parsers or here? #Probably best in the parsers as this is where the states are set. #Should we throw here for ResultSet? #Force rollback of FeatureSet first or create new one? #And throw for InputSet? #This again comes back to whether we will ever have more than one file #for a give InputSet, currently not. $self->log("WARNING\t::\tAdding data to a extant FeatureSet:\t".$fset->name); } else { throw('Found extant FeatureSet '.$fset->name.'. Maybe you want to specify the rollback, append or recovery parameter or roll back the FeatureSet separately?'); } } else { #create a new one $self->log("Creating new FeatureSet:\t".$name); $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new( -name => $name, -feature_type => $ftype, -cell_type => $ctype, -analysis => $anal, -feature_class => $type, -description => $description, -display_label => $display_label, ); ($fset) = @{$fset_adaptor->store($fset)}; } } #Create/Update the DataSet if(defined $dset){ #Could do these updates above? #But delayed to reduce redundancy if($type ne 'result'){ if(! defined $dset->product_FeatureSet){ $self->log("Updating DataSet with new product FeatureSet:\t".$fset->name); $dset->product_FeatureSet($fset); } $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0]; #This cannot store the focus sets as we don't know which are which yet #Only the script knows this # $dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; } else{ #We may have the case where we have a DataSet(with a FeatureSet) but no ResultSet #i.e. Load result_features after peak calls #So update dset with ResultSet if(! @{$dset->get_supporting_sets}){ $self->log("Updating DataSet with new ResultSet:\t".$rset->name); $dset->add_supporting_sets([$rset]); $dset = $dset_adaptor->store_updated_sets([$dset], $rollback_level)->[0]; } } } else{ $self->log("Creating new ${type}_feature DataSet:\t".$name); if($type ne 'result'){ ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new ( -name => $name, -feature_set => $fset, -supporting_sets => $ssets, ))}; #$dset->adaptor->store_regbuild_meta_strings($dset, $rollback_level) if $type eq 'regulatory'; } else{ warn "creating dataset $name with supporting set $rset"; ($dset) = @{$dset_adaptor->store(Bio::EnsEMBL::Funcgen::DataSet->new ( -name => $name, -supporting_sets => [$rset], ))}; } } return $dset; } #Rollback/load methods migrated from DBAdaptor #Move to SetAdaptors, better located and will remove cyclical dependancy =head2 rollback_FeatureSet Arg [0] : Bio::EnsEMBL::Funcgen::FeatureSet Arg [1] : optional - boolean force delete flag, if this FeatureSet is use as a support for another DataSet. Arg [2] : optional - arrayref of Bio::EnsEMBL::Slice objects to rollback Arg [3] : optional - boolean flag to perform full rollback i.e. default will just remove feature specifying this with also delete the feature_set record Example : $self->rollback_FeatureSet($fset); Description: Deletes all status and feature entries for this FeatureSet. Checks whether FeatureSet is a supporting set in any other DataSet. Returntype : none Exceptions : Throws if any deletes fails or if db method unavailable Caller : Importers and Parsers Status : At risk =cut sub rollback_FeatureSet{ my ($self, $fset, $force_delete, $slices, $full_delete) = @_; #Remove force delete and just throw? #Currently only used in project_feature_set. #May want to keep an old RegBuild for mapping/comparison? #Coudl get around this by simply deleting the data_set? Unknown impact. #Move to config hash? #No need for rollback_level here as we always want to do the same thing my ($sql, $slice_name); my $slice_join = ''; my $table = $fset->feature_class.'_feature'; my $adaptor = $fset->adaptor || throw('FeatureSet must have an adaptor'); my $db = $adaptor->db; #Cyclical dpendancy here, so not strictly necessary. $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::FeatureSet', $fset); $self->log_header('Rolling back '.$fset->feature_class." FeatureSet:\t".$fset->name); if($slices){ if($full_delete){ throw("Cannot specify a full_delete for a Slice based rollback:\t".$fset->name); } if(! ref($slices) eq 'ARRAY'){ throw('Slices must be an ARRAYREF of Slice objects'); } map { throw("Must pass a valid Bio::EnsEMBL::Slice") if (! (ref($_) && $_->isa('Bio::EnsEMBL::Slice'))) } @$slices; $self->log("Restricting to slices:\n\t\t".join("\n\t\t", (map { $_->name } @$slices)) ); #Allow subslice rollback only for one slice at a time my $subslice = (scalar(@$slices) == 1) ? 1 : 0; my @sr_ids; foreach my $slice(@$slices){ my $efg_sr_id = $fset->get_FeatureAdaptor->get_seq_region_id_by_Slice($slice); if(! $efg_sr_id){ $self->log("Slice is not present in eFG DB:\t".$slice->name); }else{ if(! $subslice){#Test is not subslice my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name); if(($slice->start != 1) || ($full_slice->end != $slice->end)){ throw("Can only rollback subslices one at a time:\nRollback slice:\t" .$slice->name."\nFull slice:\t".$full_slice->name); } } push @sr_ids, $efg_sr_id; } } if(scalar(@sr_ids) == 1){ #Allow sub slice rollback #add range here from meta coord? $slice_join = " and f.seq_region_id=$sr_ids[0] and f.seq_region_start<=".$slices->[0]->end.' and f.seq_region_end>='.$slices->[0]->start; } else{ $slice_join = ' and f.seq_region_id in ('.join(', ', @sr_ids).')'; } } #Check whether this is a supporting set for another data_set my @dsets = @{$db->get_DataSetAdaptor->fetch_all_by_supporting_set($fset)}; if(@dsets){ my $txt = $fset->name." is a supporting set of the following DataSets:\t".join(', ', (map {$_->name} @dsets)); if($force_delete){ $self->log("WARNING:\t$txt\n"); } else{ throw($txt."\nPlease resolve or specify the force_delete argument") } } #Remove states if(! $slices){ $fset->adaptor->revoke_states($fset); #Revoke InputSet states here as this refers to whether #they are imported in the FeatureSet #Do this in FeatureSet->revoke_states? my $dset = $db->get_DataSetAdaptor->fetch_by_product_FeatureSet($fset); #Account for absent dset if we have an external_feature set if((! defined $dset) && $fset->feature_class ne 'external'){ warn "WARNING:\tFeatureSet ".$fset->name." does not have an associated DataSet. Rollback may be incomplete"; } if($dset){ foreach my $sset(@{$dset->get_supporting_sets}){ #Maybe skip this if we defined slice? #??? Do we want to do this? #This is dependant on the feature_class of the InputSet #result InputSets may have been imported as ResultFeatureCollections #So we want to leave those in place #annotated feature_class InputSets are directly imports, so the status of these refers #to the FeatureSet import status #Where is the imported status set for SWEmbl? if(($sset->feature_class eq 'annotated') && $sset->isa('Bio::EnsEMBL::Funcgen::InputSet')){ $self->rollback_InputSet($sset) if $sset->isa('Bio::EnsEMBL::Funcgen::InputSet'); $self->rollback_InputSet($sset);#add full delete here? #Do not want to rollback here for other type of sset } } } } else{ $self->log('Skipping '.$fset->name.' revoke_states for partial Slice rollback, maybe revoke IMPORTED? '); } #should add some log statements here? my $row_cnt; #Rollback reg attributes if($fset->feature_class eq 'regulatory'){ $sql = "DELETE ra from regulatory_attribute ra, $table f where f.${table}_id=ra.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join; $self->rollback_table($sql, 'regulatory_attribute', undef, $db); if($full_delete){ #Now delete meta entries #This is messy as we use the following meta_key nomencalture #which do not match the fset names #regbuild.feature_set_ids_v5 #regbuild.feature_type_ids_v5 #regbuild.focus_feature_set_ids #regbuild.initial_release_date_v6 #regbuild.last_annotation_update_v6 #regbuild.version NEED TO ADD THIS #Also need to revise how these are generated by build_reg_feats. #WHat about new cell_type level feature sets? #How will we model these in the meta table? warn "Need to revise meta table entries before we add a delete here, remove manually for now for:\t".$fset->name; #We would only remove meta entries if we are performing a full rollback my $version; ($version = $fset->name) =~ s/.*_v([0-9]+)$/$1/; $version = ($version eq $fset->name) ? '' : "_v${version}"; #These are versionless meta_keys and apply to all sets #handle these in reg build script #'regbuild.initial_release_date', #'regbuild.last_annotation_update' #'regbuild.version' foreach my $mkey('regbuild.%s.feature_set_ids', 'regbuild.%s.feature_type_ids', 'regbuild.%s.focus_feature_set_ids'){ my $meta_key = sprintf($mkey, $fset->cell_type->name).$version; $sql = "DELETE from meta where meta_key='${meta_key}'"; $self->rollback_table($sql, 'meta', undef, $db); } } } #Need to remove object xrefs here #Do not remove xrefs as these may be used by something else! $sql = "DELETE ox from object_xref ox, $table f where ox.ensembl_object_type='".ucfirst($fset->feature_class)."Feature' and ox.ensembl_id=f.${table}_id and f.feature_set_id=".$fset->dbID.$slice_join; $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db); #Remove associated_feature_type records #Do not remove actual feature_type records as they may be used by something else. $sql ="DELETE aft from associated_feature_type aft, $table f where f.feature_set_id=".$fset->dbID." and f.${table}_id=aft.table_id and aft.table_name='".$fset->feature_class."_feature'".$slice_join; $self->rollback_table($sql, 'associated_feature_type', undef, $db); #Remove features $sql = "DELETE f from $table f where f.feature_set_id=".$fset->dbID.$slice_join; $self->rollback_table($sql, $table, "${table}_id", $db); if($full_delete){ #Also delete feature/data_set records $sql = "DELETE from feature_set where feature_set_id=".$fset->dbID; $self->rollback_table($sql, 'feature_set', 'feature_set_id', $db); $self->log("Deleted feature_set entry for:\t".$fset->name); $sql = "DELETE from data_set where feature_set_id=".$fset->dbID; $self->rollback_table($sql, 'data_set', 'data_set_id', $db); $self->log("Deleted associated data_set entry for:\t".$fset->name); } return; } =head2 rollback_ResultSet Arg[1] : Bio::EnsEMBL::Funcgen::ResultSet Arg[2] : Boolean - optional flag to roll back array results Example : $self->rollback_ResultSet($rset); Description: Deletes all status. chip_channel and result_set entries for this ResultSet. Will also rollback_results sets if rollback_results specified. This will also update or delete associated ResultSets where appropriate. Returntype : Arrayref containing the ResultSet and associated DataSet which have not been rolled back Exceptions : Throws if ResultSet not valid Throws is result_rollback flag specified but associated product FeatureSet found. Caller : General Status : At risk =cut #Need to change slice to slices ref here #Need to add full rollback, which will specify to remove all sets #as well as results and #These params need clarifying as their nature changes between input_set and array rsets #Don't we always want to rollback_results? #force should only really be used to rollback InputSet ResultFeature sets #i.e. Read collections which are not used as direct input for the linked product FeatureSet #This should fail with array data associated with a product feature set #Do we want to separate ResultFeature rollback from result rollback? #Currently the array based collection rollback is done by hand #Could be done via the ResultFeature Collector, but should probably use this method. #rollback_results is only used in the MAGE parser to identify sets which have an #associated product fset. #Can't really separate due to integrated functionality sub rollback_ResultSet{ my ($self, $rset, $rollback_results, $slice, $force, $full_delete) = @_; if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){ throw('Must provide a valid stored Bio::EnsEMBL::ResultSet'); } if($slice && $rset->table_name ne 'input_set'){ throw('Can only rollback_ResultSet by Slice if the ResultSet contains InputSets'); } #We're still validating against itself?? #And reciprocating part of the test :| my $sql; my $db = $rset->adaptor->db;#This needs to be tested $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset); $self->log("Rolling back ResultSet:\t".$rset->name); my $dset_adaptor = $self->db->get_DataSetAdaptor; my $rset_adaptor = $self->db->get_ResultSetAdaptor; my @skipped_sets; ### Check if this ResultSet is part of a DataSet with a product feature set foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($rset)}){ if (defined $dset){ $self->log('Found linked DataSet('.$dset->name.") for ResultSet:\t".$rset->log_label); if(my $fset = $dset->product_FeatureSet){ @skipped_sets = ($rset,$dset); #What impact does this have on result_rollback? #None as we never get there #But what if we have specified rollback results? #We should throw here as we can't perform the rollback if($rollback_results){ if($rset->table_name ne 'input_set' || (! $force)){#is an input_set/reads collection #This will always throws for non-input_set ResultSets throw("Could not rollback supporting ResultSet and results for:\t".$rset->log_label. "\nEither manually resolve the supporting/feature set relationship or set the 'force' flag.\n"); # ."Alternatively omit the rollback_results argument if you simply want to redefine the ResultSet without loading any new data"); #This last bit is no longer true #Remove rollback_results? } else{ @skipped_sets = (); $self->log("Forcing results rollback for InputSet based ResultSet:\t".$rset->log_label); } } if(@skipped_sets){ $self->log('Skipping rollback. Found product FeatureSet('.$fset->name.") for supporting ResultSet:\t".$rset->log_label); } } elsif((! defined $slice) && $full_delete){ #Found rset in dset, but not yet processed so can remove safely. $self->unlink_ResultSet_DataSet($rset, $dset); } } } #Now do similar for all associated ResultSets if(! @skipped_sets){ #Rollback results if required if($rollback_results){ $self->log("Rolling back results for ResultSet:\t".$rset->log_label); #Check result_set_input_ids are present in other result sets. my @assoc_rsets = @{$rset_adaptor->fetch_all_linked_by_ResultSet($rset)}; my $feature_supporting = 0; foreach my $assoc_rset(@assoc_rsets){ foreach my $dset(@{$dset_adaptor->fetch_all_by_supporting_set($assoc_rset)}){ #Check for other product_FeatureSets if(my $fset = $dset->product_FeatureSet){ $feature_supporting++; $self->log('Found product FeatureSet('.$fset->name. ") for associated supporting ResultSet:\t".$rset->log_label); if($rset->table_name ne 'input_set' || (! $force)){#is an input_set/reads collection $feature_supporting++; } } } } if(! $feature_supporting){ #RollBack result_feature table first $self->rollback_ResultFeatures($rset, $slice); #Now rollback other states $rset->adaptor->revoke_states($rset); #This also handles Echip status rollback if ($rset->table_name ne 'input_set'){ $self->log("Rolling back result table for ResultSet:\t".$rset->log_label); $self->rollback_results($rset->result_set_input_ids); } $self->log('Removing result_set_input entries from associated ResultSets') if @assoc_rsets; if((! $slice) && $full_delete){ #Now remove result_set_input_ids from associated rsets. foreach my $assoc_rset(@assoc_rsets){ $sql = 'DELETE from result_set_input where result_set_id='.$assoc_rset->dbID. ' and result_set_input_id in('.join', ', @{$assoc_rset->result_set_input_ids}.')'; $db->dbc->do($sql); # we need to delete complete subsets from the result_set table. my $subset = 1; foreach my $cc_id(@{$assoc_rset->result_set_input_ids}){ if(! grep { /$cc_id/ } @{$rset->result_set_input_ids}){ $subset = 0; last; } } #$assoc_rset is complete subset of $rset so can delete #We know this does not have an assoicated product feature set #Only if it is not derived from an input_set if($subset){ $self->log("Deleting associated subset ResultSet:\t".$assoc_rset->log_label); #Delete status entries first $assoc_rset->adaptor->revoke_states($assoc_rset); #All cc records will have already been deleted $sql = 'DELETE from result_set where result_set_id='.$assoc_rset->dbID; $db->dbc->do($sql); } } } #Now warn about Echips in Experiments which may need removing. if($rset->table_name ne 'input_set'){ my %experiment_chips; foreach my $echip(@{$rset->get_ExperimentalChips}){ $experiment_chips{$echip->experiment->name}{$echip->unique_id} = undef; } foreach my $exp(keys %experiment_chips){ $self->log("Experiment $exp has had ".scalar(values %{$experiment_chips{$exp}}). " ExperimentalChips rolled back:\t".join('; ', values %{$experiment_chips{$exp}}). ".\nTo fully remove these, use the rollback_experiment.pl (with -chip_ids) script"); } } else{ #Should only be one to rollback foreach my $iset(@{$rset->get_InputSets}){ $self->rollback_InputSet($iset); } } } else{ #$self->log("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label); #warn("Skipping result rollback, found $feature_supporting associated supporting ResultSets for:\t".$rset->log_label); #do we need to return this info in skipped_rsets? #This is just to allow importer to know which ones #weren't rolled back to avoid naming clashes. #so no. #But the results persist on the same chip_channel_ids #So not returning this rset may result in loading of more data #This should fail as status entries will not have been removed #Still we should throw here as we'll most likely want to manually resolve this #Besides this would be obfuscating the function throw("Could not rollback ResultSet and results, found $feature_supporting associated supporting ". "ResultSets for:\t".$rset->log_label."\nManually resolve the supporting/feature set relationship or omit the ". "rollback_results argument if you simply want to redefine the ResultSet without loading any new data"); } } else{ $self->log('Skipping results rollback'); if($rset->name =~ /_IMPORT$/){ throw("Rolling back an IMPORT set without rolling back the result can result in ophaning result records for a whole experiment. Specify the result_rollback flag if you want to rollback the results for:\t".$rset->log_label); } } #Delete chip_channel and result_set records #This should only be done with full delete if((! $slice) && $full_delete){ $sql = 'DELETE from result_set_input where result_set_id='.$rset->dbID; $self->rollback_table($sql, 'result_set_input', 'result_set_input_id', $db); $sql = 'DELETE from result_set where result_set_id='.$rset->dbID; $db->dbc->do($sql); $self->rollback_table($sql, 'result_set', 'result_set_id', $db); } } return \@skipped_sets; } sub unlink_ResultSet_DataSet{ my ($self, $rset, $dset, $new_name) = @_; #validate set vars my $db = $rset->adaptor->db; $self->log("Removing supporting ResultSet from DataSet:\t".$dset->name."\tResultSet:".$rset->log_label); my $sql = 'DELETE from supporting_set where data_set_id='.$dset->dbID. ' and type="result" and supporting_set_id='.$rset->dbID; warn "Removing ".$rset->log_label." as a supporting set to DataSet:\t".$dset->name. "\nThis may result in a DataSet with no supporting sets"; $db->dbc->do($sql); if($new_name){ #We risk overwriting any previously renamed result sets. #Should use datestamp? $sql = 'UPDATE result_set set name="OLD_'.$rset->name.'" where result_set_id='.$rset->dbID; $self->db->dbc->do($sql); if($dset->product_FeatureSet){ $self->log('Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data'); warn 'Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data'; } } return; } =head2 rollback_InputSet Arg[1] : Bio::EnsEMBL::Funcgen::InputSet Example : $self->rollback_InputSet($eset); Description: Deletes all status entries for this InputSet and it's Subsets Returntype : none Exceptions : Throws if any deletes fails or if db method unavailable Caller : Importers and Parsers Status : At risk =cut sub rollback_InputSet{ my ($self, $eset, $force_delete, $full_delete) = @_; #Need to implement force_delete!!!!!!!!!!!!!!!!!!!!!! #Need to check this is not used in a DataSet/ResultSet my $adaptor = $eset->adaptor || throw('InputSet must have an adaptor'); my $db = $adaptor->db; $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::InputSet', $eset); $self->log("Rolling back InputSet:\t".$eset->name); #SubSets foreach my $esset(@{$eset->get_InputSubsets}){ $esset->adaptor->revoke_states($esset); } #InputSet $eset->adaptor->revoke_states($eset); return; } =head2 rollback_results Arg[1] : Arrayref of chip_channel ids Example : $self->rollback_results($rset->chip_channels_ids); Description: Deletes all result records for the given chip_channel ids. Also deletes all status records for associated experimental_chips or channels Returntype : None Exceptions : Throws if no chip_channel ids provided Caller : General Status : At risk =cut #changed implementation to take arrayref sub rollback_results{ my ($self, $cc_ids) = @_; my @cc_ids = @{$cc_ids}; #Need to test for $self->db here? if(! scalar(@cc_ids) >0){ throw('Must pass an array ref of result_set_input_ids to rollback'); } #Rollback status entries #Cannot use revoke_states here? #We can if we retrieve the Chip or Channel first #Add to ResultSet adaptor my $sql = 'DELETE s from status s, result_set_input rsi WHERE rsi.result_set_input_id IN ('.join(',', @cc_ids). ') AND rsi.table_id=s.table_id AND rsi.table_name=s.table_name'; if(! $self->db->dbc->do($sql)){ throw("Status rollback failed for result_set_input_ids:\t@cc_ids\n".$self->db->dbc->db_handle->errstr()); } #Rollback result entries $sql = 'DELETE from result where result_set_input_id in ('.join(',', @cc_ids).');'; $self->rollback_table($sql, 'result', 'result_id', $self->db); return; } =head2 rollback_ResultFeatures Arg[0] : Bio::EnsEMBL::Funcgen::ResultSet Arg[1] : Optional - Bio::EnsEMBL::Slice Arg[2] : Optional - no_revoke Boolean. This is only used when generating new windows from a 0 window size which has been projected from a previous assembly. Example : $self->rollback_result_features($rset); Description: Deletes all result_feature records for the given ResultSet. Also deletes 'RESULT_FEATURE_SET' status. Returntype : None Exceptions : Throws if ResultSet not provided Caller : General Status : At risk =cut sub rollback_ResultFeatures{ my ($self, $rset, $slice, $no_revoke) = @_; if(! (ref($rset) && $rset->can('adaptor') && defined $rset->adaptor)){ throw('Must provide a valid stored Bio::EnsEMBL::ResultSet'); } if(! $slice && $no_revoke){ throw("Cannot rollback_ResultFeatures with no_reovke unless you specify a Slice"); } #else warn if slice and no_revoke? my ($sql, $slice_name, $slice_constraint); if($slice){ if(ref($slice) && $slice->isa('Bio::EnsEMBL::Slice')){ my $sr_id = $rset->adaptor->db->get_ResultFeatureAdaptor->get_seq_region_id_by_Slice($slice); if($sr_id){ #Need to test for full slice here my $full_slice = $slice->adaptor->fetch_by_region(undef, $slice->seq_region_name); $slice_name = "\t".$slice->name; $slice_constraint = ' and seq_region_id='.$sr_id; if(($slice->start != 1) || ($slice->end != $full_slice->end)){ throw("rollback_ResultFeatures does not yet support non-full length Slices:\t".$slice_name); #Need to test whether we have non-0 wsize collections without the exact seq_region values #$sql='SELECT window_size from result_feature where result_feature_id='.$rset->dbID. # ' and window_size!=0 and seq_region_start!='.$slice->start.' and seq_region_end!='.$slice->end.$slice_constraint; } } else{#seq_region is not yet present in DB return; } } else{ throw('slice argument must be a valid Bio::EnsEMBL::Slice'); } } #We're still validating against itself?? #And reciprocating part of the test :| my $db = $rset->adaptor->db; $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ResultSet', $rset); #Do this conditionally on whether it is a result_feature_set? #This may break if we have removed the status but not finished the rollback so no! $self->log("Rolling back result_feature table for ResultSet:\t".$rset->name.$slice_name); #Rollback status entry if($rset->has_status('RESULT_FEATURE_SET') && ! $no_revoke){ $rset->adaptor->revoke_status('RESULT_FEATURE_SET', $rset); } #Cannot use revoke_states here? #We can if we retrieve the Chip or Channel first #Add to ResultSet adaptor $sql = 'DELETE from result_feature where result_set_id='.$rset->dbID.$slice_constraint; $self->rollback_table($sql, 'result_feature', 'result_feature_id', $db); return; } =head2 rollback_ArrayChips Arg[1] : ARRAYREF: Bio::EnsEMBL::Funcgen::ArrayChip objects Example : $self->rollback_ArrayChips([$achip1, $achip2]); Description: Deletes all Probes, ProbeSets, ProbeFeatures and states associated with this ArrayChip Returntype : None Exceptions : Throws if ArrayChip not valid and stored Throws if ArrayChips are not of same class Caller : General Status : At risk =cut #This should be tied to a CS id!!! #And analysis dependant? #We may not want to delete alignment by different analyses? #In practise the slice methods ignore analysis_id for this table #So we currently never use this! #So IMPORTED status should be tied to CS id and Analysis id? sub rollback_ArrayChips{ my ($self, $acs, $mode, $force, $keep_xrefs, $no_clean_up, $force_clean_up) = @_; #no_clean_up and force_clean_up allow analyze/optimize to be skipped until the last rollback #We could get around this by specifying all ArrayChips for all formats at the same time? #Need to implement in RollbackArrays $mode ||= 'probe'; if($mode && ($mode ne 'probe' && $mode ne 'probe_feature' && $mode ne 'ProbeAlign' && $mode ne 'ProbeTranscriptAlign' && $mode ne 'probe2transcript')){ throw("You have passed an invalid mode argument($mode), you must omit or specify either 'probe2transcript', 'probe', 'ProbeAlign, 'ProbeTranscriptAlign' or 'probe_feature' for all of the Align output"); } if($force && ($force ne 'force')){ throw("You have not specified a valid force argument($force), you must specify 'force' or omit"); } if($keep_xrefs && ($keep_xrefs ne 'keep_xrefs')){ throw("You have not specified a valid keep_xrefs argument($keep_xrefs), you must specify 'keep_xrefs' or omit"); } if($keep_xrefs){ if($mode eq 'probe' || $mode eq 'probe2transcript'){ throw("You cannot specify 'keep_xrefs' with mode $mode, you can only rollback features e.g. probe_feature, ProbeAlign or ProbeTranscriptAlign"); } if($force){ throw("You cannot 'force' delete the probe2transcript xrefs and 'keep_xrefs' at the same time. Please specify just one."); } } my ($adaptor, $db, %classes); foreach my $ac(@$acs){ $adaptor ||= $ac->adaptor || throw('ArrayChip must have an adaptor'); $db ||= $adaptor->db; $db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ArrayChip', $ac); if(! $ac->get_Array->class){ throw('The ArrayChip you are trying to rollback does not have a class attribute'); } $classes{$ac->get_Array->class} = undef; #if($class && ($class ne $ac->get_Array->class)){ # throw('You can only rollback_ArrayChips for ArrayChips with the same class'); #} } #This is always the case as we register the association before we set the Import status #Hence the 2nd stage of the import fails as we have an associated ExperimentalChip #We need to make sure the ExperimentalChip and Channel have not been imported!!! warn "NOTE: rollback_ArrayChips. Need to implement ExperimentlChip check, is the problem that ExperimentalChips are registered before ArrayChips imported?"; #Check for dependent ExperimentalChips #if(my @echips = @{$db->get_ExperimentalChipAdaptor->fetch_all_by_ArrayChip($ac)}){ # my %exps; # my $txt = "Experiment\t\t\t\tExperimentalChip Unique IDs\n"; # foreach my $ec(@echips){ # $exps{$ec->get_Experiment->name} ||= ''; # $exps{$ec->get_Experiment->name} .= "\t".$ec->unique_id; # } # map {$txt.= "\t".$_.":".$exps{$_}."\n"} keys %exps; # throw("Cannot rollback ArrayChip:\t".$ac->name. # "\nFound Dependent Experimental Data:\n".$txt); # } my $ac_names = join(', ', (map { $_->name } @$acs)); my $ac_ids = join(', ', (map { $_->dbID } @$acs)); $self->log("Rolling back ArrayChips $mode entries:\t$ac_names"); my ($row_cnt, $probe_join, $sql); #$ac->adaptor->revoke_states($ac);#This need to be more specific to the type of rollback my $species = $db->species; if(!$species){ throw('Cannot rollback probe2transcript level xrefs without specifying a species for the DBAdaptor'); } #Will from registry? this return Homo sapiens? #Or homo_sapiens ($species = lc($species)) =~ s/ /_/; my $transc_edb_name = "${species}_core_Transcript"; my $genome_edb_name = "${species}_core_Genome"; #Maybe we want to rollback ProbeAlign and ProbeTranscriptAlign output separately so we #can re-run just one part of the alignment step. #We want this Probe(Transcript)Align rollback available in the environment #So we can do it natively and before we get to the RunnableDB stage, #where we would be trying multiple rollbacks in parallel #Wrapper script? #Or do we keep it simple here and maintain probe_feature wide rollback #And just the ProbeAlign/ProbeTranscriptAlign roll back in the environment? #We can restrict the probe deletes using the ac_id #We should test for other ac_ids using the same probe_id #Then fail unless we have specified force delete #These should be deleted for all other modes but only if force is set? #This may delete xrefs for other ArrayChips #The issues is if we need to specify force for one delete but don't want to delete something else? #force should only be used to delete upto and including the mode specified #no mode equates to probe mode #if no force then we fail if previous levels/modes have xrefs etc... #Let's grab the edb ids first and use them directly, this will avoid table locks on edb #and should also speed query up? if($mode eq 'probe2transcript' || $force){ #Delete ProbeFeature UnmappedObjects $self->log("Deleting probe2transcript ProbeFeature UnmappedObjects"); $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, probe_feature pf, external_db e WHERE a.logic_name ='probe2transcript' AND a.analysis_id=uo.analysis_id AND p.probe_id=pf.probe_id and pf.probe_feature_id=uo.ensembl_id and uo.ensembl_object_type='ProbeFeature' and uo.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)"; $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); #Delete ProbeFeature Xrefs/DBEntries $self->log("Deleting probe2transcript ProbeFeature Xrefs"); $sql = "DELETE ox FROM xref x, object_xref ox, probe p, probe_feature pf, external_db e WHERE x.external_db_id=e.external_db_id AND e.db_name ='${transc_edb_name}' AND x.xref_id=ox.xref_id AND ox.ensembl_object_type='ProbeFeature' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND ox.linkage_annotation!='ProbeTranscriptAlign' AND p.array_chip_id IN($ac_ids)"; $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); #Probe/Set specific entries for my $xref_object('Probe', 'ProbeSet'){ $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id'; #Delete Probe/Set UnmappedObjects $self->log("Deleting probe2transcript $xref_object UnmappedObjects"); $sql = "DELETE uo FROM analysis a, unmapped_object uo, probe p, external_db e WHERE a.logic_name='probe2transcript' AND a.analysis_id=uo.analysis_id AND uo.ensembl_object_type='${xref_object}' AND $probe_join=uo.ensembl_id AND uo.external_db_id=e.external_db_id AND e.db_name='${transc_edb_name}' AND p.array_chip_id IN($ac_ids)"; #.' and edb.db_release="'.$schema_build.'"'; $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); #Delete Probe/Set Xrefs/DBEntries $sql = "DELETE ox FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' AND ox.ensembl_object_type='${xref_object}' AND ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)"; $self->log("Deleting probe2transcript $xref_object xref records"); $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); } } elsif(! $keep_xrefs){#Need to check for existing xrefs if not force #we don't know whether this is on probe or probeset level #This is a little hacky as there's not way we can guarantee this xref will be from probe2transcript #until we get the analysis_id moved from identity_xref to xref #We are also using the Probe/Set Xrefs as a proxy for all other Xrefs and UnmappedObjects #Do we need to set a status here? Would have problem rolling back the states of associated ArrayChips for my $xref_object('Probe', 'ProbeSet'){ $probe_join = ($xref_object eq 'ProbeSet') ? 'p.probe_set_id' : 'p.probe_id'; $row_cnt = $db->dbc->db_handle->selectrow_array("SELECT COUNT(*) FROM xref x, object_xref ox, external_db e, probe p WHERE x.xref_id=ox.xref_id AND e.external_db_id=x.external_db_id AND e.db_name ='${transc_edb_name}' and ox.ensembl_object_type='${xref_object}' and ox.ensembl_id=${probe_join} AND p.array_chip_id IN($ac_ids)"); if($row_cnt){ throw("Cannot rollback ArrayChips($ac_names), found $row_cnt $xref_object Xrefs. Pass 'force' argument or 'probe2transcript' mode to delete"); } else{ #$self->log("Found $row_cnt $xref_object Xrefs"); } } } #ProbeFeatures inc ProbeTranscriptAlign xrefs if($mode ne 'probe2transcript'){ if(($mode eq 'probe' && $force) || $mode eq 'probe_feature' || $mode eq 'ProbeAlign' || $mode eq 'ProbeTranscriptAlign'){ #Should really revoke some state here but we only have IMPORTED #ProbeTranscriptAlign Xref/DBEntries #my (@anal_ids) = @{$db->get_AnalysisAdaptor->generic_fetch("a.module='ProbeAlign'")}; #Grrrr! AnalysisAdaptor is not a standard BaseAdaptor implementation #my @anal_ids = @{$db->dbc->db_handle->selectall_arrayref('select analysis_id from analysis where module like "%ProbeAlign"')}; #@anal_ids = map {$_= "@$_"} @anal_ids; if($mode ne 'ProbeAlign'){ my $lnames = join(', ', (map { "'${_}_ProbeTranscriptAlign'" } keys(%classes))); $sql = "DELETE ox from object_xref ox, xref x, probe p, probe_feature pf, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=pf.probe_feature_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; $self->log("Deleting ProbeFeature Xref/DBEntry records for:\t$lnames"); $self->rollback_table($sql, 'object_xref', 'object_xref_id', $db, $no_clean_up); #Can't include uo.type='ProbeTranscriptAlign' in these deletes yet as uo.type is enum'd to xref or probe2transcript #will have to join to analysis and do a like "%ProbeTranscriptAlign" on the the logic name? #or/and ur.summary_description='Promiscuous probe'? $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name in (${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${transc_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; $self->log("Deleting UnmappedObjects for:\t${lnames}"); $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); #Now the actual ProbeFeatures $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; $self->log("Deleting ProbeFeatures for:\t${lnames}"); $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up); } if($mode ne 'ProbeTranscriptAlign'){ my $lnames = join(', ', (map { "'${_}_ProbeAlign'" } keys(%classes))); $sql = "DELETE uo from unmapped_object uo, probe p, external_db e, analysis a WHERE uo.ensembl_object_type='Probe' AND uo.analysis_id=a.analysis_id AND a.logic_name=(${lnames}) AND e.external_db_id=uo.external_db_id and e.db_name='${genome_edb_name}' AND uo.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; $self->log("Deleting UnmappedObjects for:\t${lnames}"); $self->rollback_table($sql, 'unmapped_object', 'unmapped_object_id', $db, $no_clean_up); $sql = "DELETE pf from probe_feature pf, probe p, analysis a WHERE a.logic_name in(${lnames}) AND a.analysis_id=pf.analysis_id AND pf.probe_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; $self->log("Deleting ProbeFeatures for:\t${lnames}"); $self->rollback_table($sql, 'probe_feature', 'probe_feature_id', $db, $no_clean_up); } } else{ #Need to count to see if we can carry on with a unforced probe rollback? #Do we need this level of control here #Can't we assume that if you want probe you also want probe_feature? #Leave for safety, at least until we get the dependant ExperimetnalChip test sorted #What about if we only want to delete one array from an associated set? #This would delete all the features from the rest? $sql = "select count(*) from object_xref ox, xref x, probe p, external_db e WHERE ox.ensembl_object_type='ProbeFeature' AND ox.linkage_annotation='ProbeTranscriptAlign' AND ox.xref_id=x.xref_id AND e.external_db_id=x.external_db_id and e.db_name='${transc_edb_name}' AND ox.ensembl_id=p.probe_id AND p.array_chip_id IN($ac_ids)"; $row_cnt = $db->dbc->db_handle->selectrow_array($sql); if($row_cnt){ throw("Cannot rollback ArrayChips($ac_names), found $row_cnt ProbeFeatures. Pass 'force' argument or 'probe_feature' mode to delete"); } else{ $self->log("Found $row_cnt ProbeFeatures"); } } if($mode eq 'probe'){ #Don't need to rollback on a CS as we have no dependant EChips? #Is this true? Should we enforce a 3rd CoordSystem argument, 'all' string we delete all? foreach my $ac(@$acs){ $ac->adaptor->revoke_states($ac);#Do we need to change this to revoke specific states? #Current states are only IMPORTED, so not just yet, but we could change this for safety? } #ProbeSets $sql = "DELETE ps from probe p, probe_set ps where p.array_chip_id IN($ac_ids) and p.probe_set_id=ps.probe_set_id"; $self->rollback_table($sql, 'probe_set', 'probe_set_id', $db, $no_clean_up); #Probes $sql = "DELETE from probe where array_chip_id IN($ac_ids)"; $self->rollback_table($sql, 'probe', 'probe_id', $db, $no_clean_up); } } $self->log("Finished $mode roll back for ArrayChip:\t$ac_names"); return; } #This will just fail silently if the reset value #Is less than the true autoinc value #i.e. if there are parallel inserts going on #So we can never assume that the $new_auto_inc will be used sub rollback_table{ my ($self, $sql, $table, $id_field, $db, $no_clean_up, $force_clean_up) = @_; my $row_cnt; eval { $row_cnt = $db->dbc->do($sql) }; if($@){ throw("Failed to rollback table $table using sql:\t$sql\n$@"); } $row_cnt = 0 if $row_cnt eq '0E0'; $self->log("Deleted $row_cnt $table records"); if($force_clean_up || ($row_cnt && ! $no_clean_up)){ $self->refresh_table($table, $id_field, $db); } return; } #Now separated so that we can do this once at the end of a rollback of many Sets sub refresh_table{ my ($self, $table, $id_field, $db) = @_; #This only works if the new calue is available #i.e. do not need lock for this to be safe $self->reset_table_autoinc($table, $id_field, $db) if $id_field; $self->log("Optimizing and Analyzing $table"); $db->dbc->do("optimize table $table");#defrag data, sorts indices, updates table stats $db->dbc->do("analyze table $table");#analyses key distribution return; } sub reset_table_autoinc{ #Is this called elsewhere or can we merge with my($self, $table_name, $autoinc_field, $db) = @_; if(! ($table_name && $autoinc_field && $db)){ throw('You must pass a table_name and an autoinc_field to reset the autoinc value'); } if(! (ref($db) && $db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){ throw('Must pass a valid Bio::EnsEMBL::DBSQL::DBAdaptor'); } #my $sql = "show table status where name='$table_name'"; #my ($autoinc) = ${$db->dbc->db_handle->selectrow_array($sql)}[11]; #11 is the field in the show table status table #We cannot select just the Auto_increment, so this will fail if the table format changes #Why do we need autoinc here? my $sql = "select $autoinc_field from $table_name order by $autoinc_field desc limit 1"; my ($current_auto_inc) = $db->dbc->db_handle->selectrow_array($sql); my $new_autoinc = ($current_auto_inc) ? ($current_auto_inc + 1) : 1; $sql = "ALTER TABLE $table_name AUTO_INCREMENT=$new_autoinc"; $db->dbc->do($sql); return; } =head2 get_core_display_name_by_stable_id Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor Args [2] : stable ID from core DB. Args [3] : stable feature type e.g. gene, transcript, translation Example : $self->validate_and_store_feature_types; Description: Builds a cache of stable ID to display names. Returntype : string - display name Exceptions : Throws is type is not valid. Caller : General Status : At risk =cut # -------------------------------------------------------------------------------- # Build a cache of ensembl stable ID -> display_name # Return hashref keyed on {$type}{$stable_id} #Need to update cache if we're doing more than one 'type' at a time # as it will never get loaded for the new type! sub get_core_display_name_by_stable_id{ my ($self, $cdb, $stable_id, $type) = @_; $type = lc($type); if($type !~ /(gene|transcript|translation)/){ throw("Cannot get display_name for stable_id $stable_id with type $type"); } if(! exists $self->{'display_name_cache'}->{$stable_id}){ ($self->{'display_name_cache'}->{$stable_id}) = $cdb->dbc->db_handle->selectrow_array("SELECT x.display_label FROM $type t, xref x where t.display_xref_id=x.xref_id and t.stable_id='${stable_id}'"); } return $self->{'display_name_cache'}->{$stable_id}; } =head2 get_core_stable_id_by_display_name Args [1] : Bio::EnsEMBL::DBSQL::DBAdaptor Args [2] : display name (e.g. from core DB or GNC name) Example : Description: Builds a cache of stable ID to display names. Returntype : string - gene stable ID Exceptions : None Caller : General Status : At risk =cut # -------------------------------------------------------------------------------- # Build a cache of ensembl stable ID -> display_name # Return hashref keyed on {$type}{$stable_id} #Need to update cache if we're doing more than one 'type' at a time # as it will never get loaded for the new type! sub get_core_stable_id_by_display_name{ my ($self, $cdb, $display_name) = @_; #if($type !~ /(gene|transcript|translation)/){ # throw("Cannot get display_name for stable_id $stable_id with type $type"); # } if(! exists $self->{'stable_id_cache'}->{$display_name}){ ($self->{'stable_id_cache'}->{$display_name}) = $cdb->dbc->db_handle->selectrow_array("SELECT g.stable_id FROM gene g, xref x where g.display_xref_id=x.xref_id and and x.display_label='${display_name}'"); } return $self->{'stable_id_cache'}->{$display_name}; } 1;