0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Funcgen::Utils::HealthChecker
|
|
22
|
|
23 =head1 SYNOPSIS
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 B<This program> provides several methods to health check and update tables prior to
|
|
28 release. Using the updte_DB_for_release method runs the following:
|
|
29
|
|
30 validate_new_seq_regions - _pre_stores seq_region & coord_system info from new core DB
|
|
31 check_regbuild_strings - Validates or inserts regbuild_string entries
|
|
32 check_meta_species_version - Validates meta species and version wrt dbname
|
|
33 set_current_coord_system - Updates coord_system.is_current to 1 for current schema_build (required for mart)
|
|
34 update_meta_coord - Regenerates meta_coord.max_length values (required for Slice range queries)
|
|
35 clean_xrefs - Removes old unused xref and external_db records
|
|
36 validate_DataSets - Performs various checks on Data/Feature/ResultSets links and states
|
|
37 check_stable_ids - Check for any NULL stable IDs
|
|
38 log_data_sets - Logs all DISPLAYABLE DataSets
|
|
39 analyse_and_optimise_tables - Does what is says
|
|
40
|
|
41
|
|
42 =cut
|
|
43
|
|
44 ################################################################################
|
|
45
|
|
46 package Bio::EnsEMBL::Funcgen::Utils::HealthChecker;
|
|
47
|
|
48 use strict;
|
|
49 use Bio::EnsEMBL::Funcgen::Utils::Helper;
|
|
50 use Bio::EnsEMBL::Funcgen::ProbeFeature;
|
|
51 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
52 use Bio::EnsEMBL::Utils::Exception qw( throw );
|
|
53 use Bio::EnsEMBL::Analysis;
|
|
54 use vars qw(@ISA);
|
|
55 @ISA = qw(Bio::EnsEMBL::Funcgen::Utils::Helper);
|
|
56
|
|
57
|
|
58
|
|
59 #TO DO
|
|
60 # 1 DONE Print all fails and warnings in summary at end of script.
|
|
61 # 2 validate_RegulatoryFeature_Sets
|
|
62 # 3 Some of these can be migrated or mirrored in java HCs for safety
|
|
63
|
|
64
|
|
65 ################################################################################
|
|
66
|
|
67 sub new {
|
|
68 my $caller = shift;
|
|
69 my $class = ref($caller) || $caller;
|
|
70 my $self = $class->SUPER::new(@_);
|
|
71
|
|
72 #validate and set type, analysis and feature_set here
|
|
73 my ($db, $builds, $skip_mc, $check_displayable, $skip_analyse, $meta_coord_tables, $skip_xrefs, $fix) =
|
|
74 rearrange(['DB', 'BUILDS', 'SKIP_META_COORD', 'CHECK_DISPLAYABLE', 'SKIP_ANALYSE', 'META_COORD_TABLES', 'SKIP_XREF_CLEANUP', 'FIX'], @_);
|
|
75
|
|
76
|
|
77 if (! ($db && ref($db) &&
|
|
78 $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){
|
|
79 throw('You must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor');
|
|
80 }
|
|
81
|
|
82 #test connection
|
|
83 $db->dbc->db_handle;
|
|
84
|
|
85 $self->{'db'} = $db;
|
|
86 $self->{'mysql_connect_string'} = 'mysql -h'.$db->dbc->host.' -u'.$db->dbc->username.' -p'
|
|
87 .$db->dbc->password.' '.$db->dbc->dbname.' -P'.$db->dbc->port;
|
|
88 $self->{'dbname'} = $db->dbc->dbname;
|
|
89 $self->{'builds'} = (scalar(@$builds)>0) ? $builds : [];
|
|
90 $self->{'skip_meta_coord'} = $skip_mc;
|
|
91 $self->{'skip_xrefs'} = $skip_xrefs;
|
|
92 $self->{'skip_analyse'} = $skip_analyse;
|
|
93 $self->{'check_displayable'} = $check_displayable;
|
|
94 $self->{fix} = $fix;
|
|
95
|
|
96 if(defined $meta_coord_tables){
|
|
97
|
|
98 throw('-skip_meta_coord is set, Cannot build meta_coord entries for tables '.join(', ', @$meta_coord_tables));
|
|
99
|
|
100 if(! ref($meta_coord_tables) eq 'ARRAY'){
|
|
101 throw('-meta_coord_tables parameter must be an array ref');
|
|
102 }
|
|
103
|
|
104 @{$self->{'meta_coord_tables'}} = @$meta_coord_tables;
|
|
105 }
|
|
106
|
|
107 return $self;
|
|
108 }
|
|
109
|
|
110 sub db{ return $_[0]->{db}; }
|
|
111
|
|
112 sub fix{ return $_[0]->{fix}; }
|
|
113
|
|
114 =head2 update_db_for_release
|
|
115
|
|
116 Arg[0] :
|
|
117 Example :
|
|
118 Description: Wrapper method to perform all common update functions
|
|
119 Returntype :
|
|
120 Exceptions : None
|
|
121 Caller : General
|
|
122 Status : at risk
|
|
123
|
|
124 =cut
|
|
125
|
|
126 sub update_db_for_release{
|
|
127 my ($self, @args) = @_;
|
|
128
|
|
129 if(@args){
|
|
130 }
|
|
131
|
|
132 #do seq_region_update to validate dnadb first
|
|
133 #hence avoiding redoing longer methods
|
|
134 $self->validate_new_seq_regions;#($force_srs);
|
|
135 #$self->update_meta_schema_version;
|
|
136 $self->check_regbuild_strings;
|
|
137 $self->check_meta_species_version;
|
|
138 $self->set_current_coord_system;
|
|
139 $self->update_meta_coord;
|
|
140 $self->clean_xrefs;
|
|
141 $self->validate_DataSets;
|
|
142 $self->check_stable_ids;
|
|
143 $self->log_data_sets();
|
|
144 $self->analyse_and_optimise_tables;#ALWAYS LAST!!
|
|
145
|
|
146 $self->log_header('??? Have you dumped/copied GFF dumps ???');
|
|
147
|
|
148 #Log footer? Pass optional counts hash?
|
|
149 $self->log('Finished updating '.$self->{'dbname'}." for release\n\n");
|
|
150 }
|
|
151
|
|
152 sub validate_new_seq_regions{
|
|
153 my ($self, $force) = @_;
|
|
154
|
|
155
|
|
156 #We need to add some functionality to handle non-standard schema_build progression here
|
|
157 #This should be used before any data is loaded
|
|
158 #It should also warn if there any duplicates if it is not run before coord_systems are duplicated
|
|
159 #Should this just be handled in the BaseFeatureAdaptor/CoordSystemAdaptor?
|
|
160
|
|
161
|
|
162
|
|
163
|
|
164 #do we need to add the none default levels here?
|
|
165 #or are we only bothered about those which constitute the toplevel?
|
|
166
|
|
167 #To make sure we have all the correct levels in eFG we need to get all the names.
|
|
168 #then get all by name from the core db and set them as the dnadb.
|
|
169 # we also need to get all the toplevel seq_regions and store them in the seq_region table
|
|
170 #use BaseFeatureAdaptor::_pre_store with and array of pseudo feature on each top level slice
|
|
171
|
|
172 #Validate the efgdb and dnadb schema version are the same first
|
|
173 #This is because we expect the schem_build to be the same for a release
|
|
174 #However, this will autoset the dnadb if no defined, so will always match!
|
|
175
|
|
176 if(! $force){
|
|
177 my $efgdb_sm = join('_', @{$self->get_schema_and_build($self->{'dbname'})});
|
|
178 my $dnadb_sm = join('_', @{$self->get_schema_and_build($self->db->dnadb->dbc->dbname)});
|
|
179
|
|
180 if($efgdb_sm ne $dnadb_sm){
|
|
181 $self->report("WARNING Skipped validate_new_seq_regions as schema_versions are mismatched:\t".
|
|
182 "efgdb $efgdb_sm\tdnadb $dnadb_sm");
|
|
183 return 0;
|
|
184 }
|
|
185 }
|
|
186
|
|
187 my $pf_adaptor = $self->db->get_ProbeFeatureAdaptor();
|
|
188 my $slice_adaptor = $self->db->dnadb->get_SliceAdaptor();
|
|
189 my $dnadb_csa = $self->db->dnadb->get_CoordSystemAdaptor;
|
|
190
|
|
191 $self->log_header('Validating new coord_systems/seq_regions');
|
|
192
|
|
193 my @slices;
|
|
194 my %versioned_levels;
|
|
195 my $default_version;
|
|
196
|
|
197 #Grab unversioned top level slices and versioned levels
|
|
198 #May miss some old versioned level if the new assembly no longer has them
|
|
199 foreach my $slice(@{$slice_adaptor->fetch_all('toplevel', undef, 1)}){
|
|
200
|
|
201 if (! $slice->coord_system->version){
|
|
202 push @slices, $slice;
|
|
203 }
|
|
204 else{
|
|
205
|
|
206 if($default_version &&
|
|
207 ($default_version ne $slice->coord_system->version)){
|
|
208 throw("Found more than one default CoordSystem version:\t${default_version}\t".$slice->coord_system->version);
|
|
209 }
|
|
210 else{
|
|
211 $default_version = $slice->coord_system->version;
|
|
212 }
|
|
213 }
|
|
214 }
|
|
215
|
|
216
|
|
217 #Get all versioned levels for all builds
|
|
218 foreach my $cs(@{$dnadb_csa->fetch_all}){
|
|
219
|
|
220 if($cs->version){
|
|
221 $versioned_levels{$cs->version} ||= [];
|
|
222 push @{$versioned_levels{$cs->version}}, $cs->name;
|
|
223 }
|
|
224 }
|
|
225
|
|
226 push @{$self->{'builds'}}, $default_version if scalar(@{$self->{'builds'}}) == 0;
|
|
227
|
|
228
|
|
229
|
|
230 #Grab slices for each versioned level
|
|
231 foreach my $build(@{$self->{'builds'}}){
|
|
232
|
|
233 if(! exists $versioned_levels{$build}){
|
|
234 throw("CoordSystem version $build does not exist in the dnadb ".$self->db->dnadb->dbc->dbname);
|
|
235 }
|
|
236
|
|
237 foreach my $level(@{$versioned_levels{$build}}){
|
|
238 $self->log("Getting slices for $level $build");
|
|
239 push @slices, @{$slice_adaptor->fetch_all($level, $build)};
|
|
240 }
|
|
241 }
|
|
242
|
|
243 $self->log("Importing seq_region/coord_system info for builds:\t".join(',', @{$self->{'builds'}}));
|
|
244
|
|
245 foreach my $slice(@slices){
|
|
246
|
|
247 if($slice->start() != 1){
|
|
248 $self->log("Reslicing slice:\t".$slice->name());
|
|
249 #we must have some sort of PAR linked region i.e. Y
|
|
250 $slice = $slice_adaptor->fetch_by_region($slice->coord_system_name(), $slice->seq_region_name());
|
|
251 }
|
|
252
|
|
253
|
|
254 #we need test if it needs doing first?
|
|
255 #we would need to test for the coord_systems outside of this loop
|
|
256 #and then for each seq_region inside the loop if the coord_system is present
|
|
257
|
|
258 $self->log("_pre_storing seq_region info for slice:\t".$slice->name());
|
|
259
|
|
260 my $pseudo_feature = Bio::EnsEMBL::Funcgen::ProbeFeature->new
|
|
261 (
|
|
262 -slice => $slice,
|
|
263 -start => 0,
|
|
264 -end => 0,
|
|
265 -strand => 0,
|
|
266 );
|
|
267
|
|
268 $pf_adaptor->_pre_store($pseudo_feature);
|
|
269 #This will create a meta_coord entry of max_length 1 for features which have an absent meta_coord entry
|
|
270
|
|
271 }
|
|
272
|
|
273
|
|
274 $self->log("Finished validating seq_regions\n");
|
|
275
|
|
276 return;
|
|
277 }
|
|
278
|
|
279
|
|
280
|
|
281
|
|
282 sub update_meta_coord{
|
|
283 my ($self, @table_names) = @_;
|
|
284
|
|
285 my $species_id = $self->db->species_id;
|
|
286
|
|
287 if($self->{'skip_meta_coord'}){
|
|
288 $self->log("Skipping meta_coord update\n");
|
|
289 return;
|
|
290 }
|
|
291
|
|
292
|
|
293 $self->log_header('Updating meta_coord table');
|
|
294
|
|
295
|
|
296 #set default table_name
|
|
297 if(! @table_names || scalar(@table_names) == 0){
|
|
298
|
|
299 #Can we do this via DBAdaptor and get all available adaptors which are BaseFeatureAdaptors then grab the first table name
|
|
300
|
|
301 if(defined $self->{'meta_coord_tables'}){
|
|
302 @table_names = @{$self->{'meta_coord_tables'}};
|
|
303 }
|
|
304 else{#default
|
|
305
|
|
306 @table_names = qw(
|
|
307 regulatory_feature
|
|
308 probe_feature
|
|
309 external_feature
|
|
310 annotated_feature
|
|
311 result_feature
|
|
312 segmentation_feature
|
|
313 );
|
|
314 }
|
|
315 }
|
|
316
|
|
317 #backup meta coord
|
|
318 if(system($self->{'mysql_connect_string'}." -e 'SELECT * FROM meta_coord'"
|
|
319 . '> '.$self->{'dbname'}.'meta_coord.backup'
|
|
320 ) != 0 ){
|
|
321
|
|
322 throw("Can't dump the original meta_coord for back up");#will this get copied to log?
|
|
323 }
|
|
324 else {
|
|
325 $self->log('Original meta_coord table backed up in '. $self->{'dbname'}.'.meta_coord.backup');
|
|
326 }
|
|
327
|
|
328
|
|
329 #Update each max_length for table_name and coord_system
|
|
330
|
|
331 my $sql;
|
|
332
|
|
333 foreach my $table_name(@table_names){
|
|
334
|
|
335 $sql = "select distinct(cs.name), mc.coord_system_id, cs.version, mc.max_length from coord_system cs, meta_coord mc where mc.table_name='$table_name' and mc.coord_system_id=cs.coord_system_id and cs.species_id = $species_id";
|
|
336
|
|
337 $self->log('');
|
|
338 $self->log("Updating meta_coord max_length for $table_name:");
|
|
339 $self->log("name\tcoord_system_id\tversion\tmax_length");
|
|
340
|
|
341 #can we test for emtpy array here? Then skip delete.
|
|
342
|
|
343 my @info = @{$self->db->dbc->db_handle->selectall_arrayref($sql)};
|
|
344
|
|
345 #log this
|
|
346 map {$self->log(join("\t", @{$_}))} @info;
|
|
347
|
|
348 # Clean old entries
|
|
349 $self->log("Deleting old meta_coord entries");
|
|
350 $sql = "DELETE mc FROM meta_coord mc, coord_system cs WHERE mc.table_name ='$table_name' and mc.coord_system_id = cs.coord_system_id and cs.species_id = $species_id";
|
|
351 $self->db->dbc->db_handle->do($sql);
|
|
352
|
|
353 # Generate new max_lengths
|
|
354 $self->log("Generating new max_lengths");
|
|
355
|
|
356 #Is this query running for each redundant cs_id?
|
|
357 #would it be more efficient to retrieve the NR cs_ids first and loop the query for each cs_id?
|
|
358
|
|
359 #Can we get the dbID of the largest feature for ease of checking?
|
|
360 #This won't work as we're grouping by coord_system
|
|
361 #would need to select distinct coord_system_id for table first
|
|
362 #This may well slow down quite a bit doing it this way
|
|
363
|
|
364 $sql = "select distinct s.coord_system_id from coord_system cs, seq_region s, $table_name t WHERE t.seq_region_id = s.seq_region_id and s.coord_system_id = cs.coord_system_id and cs.species_id = $species_id";
|
|
365 my @cs_ids = @{$self->db->dbc->db_handle->selectall_arrayref($sql)};
|
|
366 #Convert single element arrayrefs to scalars
|
|
367 map $_ = ${$_}[0], @cs_ids;
|
|
368
|
|
369 $self->log("New max_lengths for $table_name are:");
|
|
370
|
|
371 $self->log(join("\t", ('coord_system_id', 'max_length', 'longest record dbID')));
|
|
372
|
|
373 foreach my $cs_id(@cs_ids){
|
|
374 #This will always give a length of 1 even if there are no features present
|
|
375
|
|
376 my @cs_lengths;
|
|
377
|
|
378 #The probe_feature table is now too big to do this in one go
|
|
379 #We need to break this down into sr_ids
|
|
380
|
|
381 $sql = "SELECT distinct t.seq_region_id from $table_name t, seq_region sr where t.seq_region_id=sr.seq_region_id and sr.coord_system_id=$cs_id";
|
|
382
|
|
383 my @sr_ids = @{$self->db->dbc->db_handle->selectcol_arrayref($sql)};
|
|
384
|
|
385
|
|
386 #Get longest feature for all seq_regions
|
|
387 foreach my $sr_id(@sr_ids){
|
|
388 $sql = "SELECT (t.seq_region_end - t.seq_region_start + 1 ) as max, t.${table_name}_id "
|
|
389 . "FROM $table_name t "
|
|
390 . "WHERE t.seq_region_id = $sr_id ";
|
|
391 $sql .= ' and t.window_size=0' if $table_name eq 'result_feature';
|
|
392 $sql .= " order by max desc limit 1";
|
|
393
|
|
394
|
|
395 #Problem here is that DBs without 0 wsize result_feture entries will not get a meta_coord entry
|
|
396 #We need to implement this in the _pre_store method too?
|
|
397
|
|
398
|
|
399 my ($cs_length, $table_id);
|
|
400 ($cs_length, $table_id) = $self->db->dbc->db_handle->selectrow_array($sql);
|
|
401 push @cs_lengths, [$cs_length, $table_id] if $cs_length;
|
|
402 }
|
|
403
|
|
404
|
|
405 if(@cs_lengths){
|
|
406 #This will now contain a list of arrays refs contain the max length and feature id for
|
|
407 #each seq_region in this coord_system
|
|
408 #Now sort to get the longest
|
|
409 #Can't sort on 2 day array in the normal way
|
|
410 #One list list lists, the comparatee is no longer a list but a reference
|
|
411 @cs_lengths = sort { $b->[0] <=> $a->[0] } @cs_lengths;
|
|
412 $self->log(join("\t\t", ($cs_id, @{$cs_lengths[0]})));
|
|
413 $sql = "INSERT INTO meta_coord values('${table_name}', $cs_id, ".$cs_lengths[0][0].')';
|
|
414 $self->db->dbc->db_handle->do($sql);
|
|
415 }
|
|
416 }
|
|
417 }
|
|
418
|
|
419 $self->log("Finished updating meta_coord max_lengths\n");
|
|
420
|
|
421 return;
|
|
422 }
|
|
423
|
|
424
|
|
425 #change this to check_meta_species_version
|
|
426
|
|
427 sub check_meta_species_version{
|
|
428 my ($self) = @_;
|
|
429
|
|
430 $self->log_header('Checking meta species.production_name and schema_version against dbname');
|
|
431
|
|
432 my $dbname = $self->db->dbc->dbname;
|
|
433 (my $dbname_species = $dbname) =~ s/_funcgen_.*//;
|
|
434 my $mc = $self->db->get_MetaContainer;
|
|
435 my $schema_version = $mc->list_value_by_key('schema_version')->[0];
|
|
436
|
|
437 if(! defined $schema_version){
|
|
438 $self->report("FAIL:\tNo meta schema_version defined");
|
|
439 }
|
|
440 elsif($dbname !~ /funcgen_${schema_version}_/){
|
|
441 $self->report("FAIL:\tMeta schema_version ($schema_version) does not match the dbname ($dbname).");
|
|
442 }
|
|
443
|
|
444
|
|
445 my @latin_names = @{$mc->list_value_by_key('species.production_name')};
|
|
446
|
|
447 if(scalar(@latin_names) > 1){
|
|
448 $self->report("FAIL:\tFound more than one species.production_name in meta:\t".join(", ", @latin_names));
|
|
449 }
|
|
450 elsif(scalar(@latin_names) == 1 && ($latin_names[0] ne $dbname_species)){
|
|
451 $self->report("FAIL:\tFound mismatch between meta species.production_name and dbname:\t".$latin_names[0]." vs $dbname_species");
|
|
452 }
|
|
453 elsif(scalar(@latin_names) == 0){
|
|
454 $self->report("WARNING:\tFound no meta species.production_name setting as:\t$dbname_species");
|
|
455 $self->db->dbc->db_handle->do("INSERT into meta(species_id, meta_key, meta_value) values(1, 'species.production_name', '$dbname_species')");
|
|
456 }
|
|
457 #else is okay
|
|
458
|
|
459 return;
|
|
460 }
|
|
461
|
|
462
|
|
463
|
|
464 #Move to Java HC? Or update if update flag specified
|
|
465 #Using same code used by build_reg_feats!
|
|
466
|
|
467 sub check_regbuild_strings{
|
|
468 my ($self) = @_;
|
|
469
|
|
470 #Removed $update arg as we would always want to do this manually
|
|
471
|
|
472 $self->log_header('Checking regbuild strings');
|
|
473 my $species_id = $self->db()->species_id();
|
|
474
|
|
475
|
|
476 my @regf_fsets;
|
|
477 my $passed = 1;
|
|
478 my $fset_a = $self->db->get_FeatureSetAdaptor;
|
|
479 #my $mc = $self->db->get_MetaContainer;
|
|
480 my $regf_a = $self->db->get_RegulatoryFeatureAdaptor;
|
|
481 #We now want to chek all build
|
|
482 @regf_fsets = @{$fset_a->fetch_all_by_type('regulatory')};
|
|
483
|
|
484
|
|
485 if(scalar(@regf_fsets) == 0){
|
|
486 $self->report('WARNING:check_regbuild_strings found no regulatory FeatureSets (fine if '.$self->db->species.' your species does not have a regulatory build');
|
|
487 }
|
|
488 else{
|
|
489
|
|
490 warn "Need to check/update regbuild.version and regbuild.initial_release_date regbuild.last_annotation_update";
|
|
491
|
|
492
|
|
493 #How do we validate this?
|
|
494 #Check all feature_sets exist
|
|
495 #Pull back some features from a test slice and check the number of bits match.
|
|
496 #Check the feature_type string exists and matches else create.
|
|
497
|
|
498
|
|
499 foreach my $fset(@regf_fsets){
|
|
500 $self->log_header("Validating regbuild_string entries for FeatureSets:\t".$fset->name);
|
|
501
|
|
502 #Fail for old versions as we want to remove these
|
|
503 if( $fset->name =~ /_v[0-9]+$/){
|
|
504 $self->report("FAIL:\t".$fset->name." is an old RegulatoryFeature set, please remove!");
|
|
505 next;
|
|
506 }
|
|
507
|
|
508 my $cell_type = (defined $fset->cell_type) ? $fset->cell_type->name : 'core';
|
|
509
|
|
510 #This has been lifted from build_regulatory_features.pl store_regbuild_strings
|
|
511 #Need to move this to a RegulatoryBuilder module
|
|
512 my $dset = $self->db->get_DataSetAdaptor->fetch_by_product_FeatureSet($fset);
|
|
513
|
|
514 my @ssets = @{$dset->get_supporting_sets};
|
|
515
|
|
516 if(! @ssets){
|
|
517 throw('You must provide a DataSet with associated supporting sets');
|
|
518 }
|
|
519
|
|
520
|
|
521
|
|
522 my %reg_strings =
|
|
523 (
|
|
524 "regbuild.${cell_type}.feature_set_ids" => join(',', map {
|
|
525 $_->dbID} sort {$a->name cmp $b->name
|
|
526 } @ssets),
|
|
527
|
|
528 "regbuild.${cell_type}.feature_type_ids" => join(',', map {
|
|
529 $_->feature_type->dbID} sort {$a->name cmp $b->name
|
|
530 } @ssets),
|
|
531 );
|
|
532
|
|
533 my @ffset_ids;
|
|
534
|
|
535 #Skip this now as we use the ftype classes for defining the focus sets
|
|
536 #foreach my $fset(@ssets){
|
|
537
|
|
538
|
|
539 # #This might fail if soem TFs haven't been included as focus i.e. are part of PolII/III
|
|
540 # #e.g. TFIIIC-110
|
|
541 ##
|
|
542 # if( ($fset->feature_type->class eq 'Transcription Factor') ||
|
|
543 # ($fset->feature_type->class eq 'Open Chromatin') ){
|
|
544 # push @ffset_ids, $fset->dbID;
|
|
545 # }
|
|
546 # }
|
|
547
|
|
548
|
|
549 my ($sql, %db_reg_string);
|
|
550
|
|
551 foreach my $string_key(keys %reg_strings){
|
|
552 my ($string)= $self->db->dbc->db_handle->selectrow_array("select string from regbuild_string where name='${string_key}' and species_id=$species_id");
|
|
553
|
|
554 if (! defined $string) {
|
|
555 $sql = "insert into regbuild_string (species_id, name, string) values ($species_id, '${string_key}', '$reg_strings{${string_key}}');";
|
|
556
|
|
557 $self->report("WARNING:\tInserting absent $string_key into regbuild_string table");
|
|
558 eval { $self->db->dbc->do($sql) };
|
|
559 die("Couldn't store $string_key in regbuild_string table\n$sql\n$@") if $@;
|
|
560 }
|
|
561 elsif ($string ne $reg_strings{$string_key}){
|
|
562 $sql = "update regbuild_string set string='".$reg_strings{$string_key}."' where name='${string_key}';";
|
|
563
|
|
564 if($self->fix){
|
|
565 $self->report("WARNING:\tUpdating mismatched $string_key found in regbuild_string table:\t${string}");#\tUpdate using:\t$sql");
|
|
566 eval { $self->db->dbc->do($sql) };
|
|
567 die("Couldn't update $string_key in regbuild_string table\n$sql\n$@") if $@;
|
|
568
|
|
569 }
|
|
570 else{
|
|
571 $self->report("FAIL:\tMismatched $string_key found in regbuild_string table:\t${string}\n\tUpdate using:\t$sql");
|
|
572 }
|
|
573 }
|
|
574
|
|
575 $db_reg_string{$string_key} = $string;
|
|
576 }
|
|
577
|
|
578
|
|
579 #Now need to tidy this block wrt new code added above
|
|
580 my $fset_string_key = "regbuild.${cell_type}.feature_set_ids";
|
|
581 my $ftype_string_key = "regbuild.${cell_type}.feature_type_ids";
|
|
582 my $fset_string = $db_reg_string{$fset_string_key};
|
|
583 my $ftype_string = $db_reg_string{$ftype_string_key};
|
|
584
|
|
585 if(! ($fset_string && $ftype_string)){
|
|
586 $self->report("FAIL:\tSkipping fset vs ftype string test for $cell_type")
|
|
587 }
|
|
588 else{
|
|
589
|
|
590 #This is now effectively handled by the loop above
|
|
591
|
|
592 $self->log("Validating :\t$fset_string_key vs $ftype_string_key");
|
|
593
|
|
594 my @fset_ids = split/,/, $fset_string;
|
|
595 my @ftype_ids = split/,/, $ftype_string;
|
|
596 my @new_ftype_ids;
|
|
597 my $ftype_fail = 0;
|
|
598
|
|
599 #Now need to work backwards through ftypes to remove pseudo ftypes before validating
|
|
600 #New string should be A,A,A;S,S,S,S,S,S;P,P,P
|
|
601 #Where A is and Anchor/Seed set
|
|
602 #S is a supporting set
|
|
603 #P is a pseudo feature type e.g. TSS proximal
|
|
604
|
|
605
|
|
606 if(scalar(@fset_ids) != scalar(@ftype_ids)){
|
|
607 $self->report("FAIL:\tLength mismatch between:\n\t$fset_string_key(".scalar(@fset_ids).")\t$fset_string\n\tAND\n\t$ftype_string_key(".scalar(@ftype_ids).")\t$ftype_string");
|
|
608 }
|
|
609
|
|
610 foreach my $i(0..$#fset_ids){
|
|
611 my $supporting_set_id = $fset_ids[$i];
|
|
612 my $sset = $fset_a->fetch_by_dbID($supporting_set_id);
|
|
613
|
|
614 if(! defined $sset){
|
|
615 $self->report("FAIL:\t$fset_string_key $supporting_set_id does not exist in the DB");
|
|
616 }
|
|
617 else{
|
|
618 #test/build ftype string
|
|
619
|
|
620 if(defined $ftype_string){
|
|
621
|
|
622 if($sset->feature_type->dbID != $ftype_ids[$i]){
|
|
623 $ftype_fail = 1;
|
|
624 $self->report("FAIL:\t$fset_string_key $supporting_set_id(".$sset->name.") FeatureType(".$sset->feature_type->name.") does not match $ftype_string_key $ftype_ids[$i]");
|
|
625 }
|
|
626 }
|
|
627
|
|
628 push @new_ftype_ids, $sset->feature_type->dbID;
|
|
629
|
|
630 }
|
|
631 }
|
|
632
|
|
633
|
|
634 #Set ftype_string
|
|
635 #This will not account for pseudo ftypes? Remove!!!?
|
|
636 my $new_ftype_string = join(',', @new_ftype_ids);
|
|
637
|
|
638 if(! defined $ftype_string){
|
|
639 $self->log("Updating $ftype_string_key to:\t$new_ftype_string");
|
|
640 $self->db->dbc->db_handle->do("INSERT into regbuild_string(species_id, name, string) values($species_id, '$ftype_string_key', '$new_ftype_string')");
|
|
641 }
|
|
642 elsif($ftype_fail){
|
|
643 $self->report("FAIL:\t$ftype_string_key($ftype_string) does not match $fset_string_key types($new_ftype_string)");
|
|
644 }
|
|
645
|
|
646
|
|
647 #Finally validate versus a reg feat
|
|
648 #Need to change this to ftype string rather than fset string?
|
|
649 my $id_row_ref = $self->db->dbc->db_handle->selectrow_arrayref('select regulatory_feature_id from regulatory_feature where feature_set_id='.$fset->dbID.' limit 1');
|
|
650
|
|
651 if(! defined $id_row_ref){
|
|
652 $self->report("FAIL:\tNo RegulatoryFeatures found for FeatureSet ".$fset->name);
|
|
653 }
|
|
654 else{
|
|
655 my ($regf_dbID) = @$id_row_ref;
|
|
656 my $rf_string = $regf_a->fetch_by_dbID($regf_dbID)->binary_string;
|
|
657
|
|
658 if(length($rf_string) != scalar(@fset_ids)){
|
|
659 $self->report("FAIL:\tRegulatory string length mismatch between RegulatoryFeature($regf_dbID) and $fset_string_key:\n$rf_string(".length($rf_string).")\n$fset_string(".scalar(@fset_ids).")");
|
|
660 }
|
|
661 }
|
|
662 }
|
|
663 }
|
|
664 }
|
|
665
|
|
666 return;
|
|
667 }
|
|
668
|
|
669
|
|
670 #Change this to log sets and incorporate RegFeat FeatureSet as standard
|
|
671 #Grab all reg fsets
|
|
672 #grab all displayable data sets which aren't reg sets?
|
|
673
|
|
674 sub log_data_sets{
|
|
675 my $self = shift;
|
|
676
|
|
677 my $dset_adaptor = $self->db->get_DataSetAdaptor;
|
|
678 my ($status);
|
|
679 my $txt = 'Checking ';
|
|
680 $status = 'DISPLAYABLE' if($self->{'check_displayable'});
|
|
681 $txt.= $status.' ' if $status;
|
|
682 $txt .= 'DataSets';
|
|
683 $self->log_header($txt);
|
|
684
|
|
685 #Check for status first to avoid warning from BaseAdaptor.
|
|
686 eval { $dset_adaptor->_get_status_name_id($status) };
|
|
687
|
|
688 if($@){
|
|
689 $self->report("FAIL: You have specified check_displayable, but the DISPLAYABLE status_name is not present in the DB");
|
|
690 return;
|
|
691 }
|
|
692
|
|
693
|
|
694 my @dsets;
|
|
695 my $dsets = $dset_adaptor->fetch_all($status);
|
|
696 @dsets = @$dsets if defined $dsets;
|
|
697
|
|
698
|
|
699 $self->log('Found '.scalar(@dsets).' DataSets');
|
|
700
|
|
701
|
|
702 foreach my $dset(@dsets){
|
|
703 $self->log_set("DataSet:\t\t", $dset) ;
|
|
704
|
|
705 my $fset = $dset->product_FeatureSet;
|
|
706 $self->log_set("Product FeatureSet:\t", $fset) if $fset;
|
|
707
|
|
708 my @supporting_sets = @{$dset->get_supporting_sets};
|
|
709
|
|
710 $self->log('Found '.scalar(@supporting_sets).' supporting sets:');
|
|
711
|
|
712 if(my @supporting_sets = @{$dset->get_supporting_sets}){
|
|
713 #type here could be result, experimental or feature
|
|
714 #and feature could be annotated or experimental
|
|
715 #Move this to log set?
|
|
716
|
|
717 map { my $type = ref($_);
|
|
718 $type =~ s/.*://;
|
|
719 $type .= '('.$_->feature_class.')' if($type eq 'FeatureSet');
|
|
720 #Need to sprintf $type here to fixed width
|
|
721 $self->log_set($type.":\t", $_)} @supporting_sets;
|
|
722 }
|
|
723 $self->log();
|
|
724 }
|
|
725
|
|
726 return;
|
|
727 }
|
|
728
|
|
729 sub log_set{
|
|
730 my ($self, $text, $set) = @_;
|
|
731
|
|
732 $text .= $set->display_label.'('.$set->name.')';
|
|
733 $text .= "\tDISPLAYABLE" if($set->is_displayable);
|
|
734 $self->log($text);
|
|
735
|
|
736 return;
|
|
737 }
|
|
738
|
|
739
|
|
740 sub check_stable_ids{
|
|
741 my ($self, @slices) = @_;
|
|
742
|
|
743 my $species_id = $self->db()->species_id();
|
|
744
|
|
745 $self->log_header('Checking stable IDs');
|
|
746
|
|
747 my $fset_a = $self->db->get_FeatureSetAdaptor;
|
|
748
|
|
749 my @regf_fsets = @{$fset_a->fetch_all_by_type('regulatory')};
|
|
750
|
|
751 if(!@regf_fsets){
|
|
752 $self->report('WARNING: No regulatory FeatureSets found (fine if '.$self->db->species.' does not have a regulatory build)');
|
|
753 }
|
|
754 else{
|
|
755
|
|
756 foreach my $fset(@regf_fsets){
|
|
757
|
|
758 if($fset->name =~ /_v[0-9]$/){
|
|
759 $self->log("Skipping stable_id test on archived set:\t".$fset->name);
|
|
760 next;
|
|
761 }
|
|
762
|
|
763 #Can't count NULL field, so have to count regulatory_feature_id!!!
|
|
764
|
|
765 #getting SR product here!!
|
|
766 my $sql = "select count(rf.regulatory_feature_id) from regulatory_feature rf, seq_region sr, coord_system cs where rf.stable_id is NULL and rf.seq_region_id = sr.seq_region_id and sr.coord_system_id = cs.coord_system_id and cs.species_id = $species_id and rf.feature_set_id=".$fset->dbID;
|
|
767
|
|
768
|
|
769
|
|
770 my ($null_sids) = @{$self->db->dbc->db_handle->selectrow_arrayref($sql)};
|
|
771
|
|
772 if($null_sids){
|
|
773 $self->report("FAIL: Found a total of $null_sids NULL stable IDs for ".$fset->name);
|
|
774
|
|
775 my $slice_a = $self->db->get_SliceAdaptor;
|
|
776
|
|
777 if(! @slices){
|
|
778 @slices = @{$slice_a->fetch_all('toplevel', 1)};
|
|
779 }
|
|
780
|
|
781 foreach my $slice(@slices){
|
|
782 my $sr_name=$slice->seq_region_name;
|
|
783 $sql = 'select count(rf.stable_id) from regulatory_feature rf, seq_region sr, coord_system cs where rf.seq_region_id=sr.seq_region_id and sr.name="'.$sr_name.'" and sr.coord_system_id = cs.coord_system_id and cs.species_id = '.$species_id.' and rf.stable_id is NULL and rf.feature_set_id='.$fset->dbID;
|
|
784 ($null_sids) = @{$self->db->dbc->db_handle->selectrow_arrayref($sql)};
|
|
785
|
|
786 #This is not reporting properly.
|
|
787
|
|
788 $self->log($fset->name.":\t$null_sids NULL stable IDs on ".$slice->name) if $null_sids;
|
|
789 }
|
|
790 }
|
|
791 else{
|
|
792 $self->log($fset->name.":\tNo NULL stable IDs found");
|
|
793 }
|
|
794 }
|
|
795 }
|
|
796
|
|
797 return;
|
|
798
|
|
799 }
|
|
800
|
|
801
|
|
802 #This is for mart to enable them to join to the seq_region table without
|
|
803 #creating a product from all the reundant seq_region entries for each schema_build
|
|
804
|
|
805 sub set_current_coord_system{
|
|
806 my ($self) = @_;
|
|
807
|
|
808
|
|
809
|
|
810 my $schema_build = $self->db->_get_schema_build($self->db->dnadb);
|
|
811 $self->log_header("Setting current coord_system on $schema_build");
|
|
812
|
|
813 my $sql = "update coord_system set is_current=False where schema_build !='$schema_build'";
|
|
814 $self->db->dbc->do($sql);
|
|
815 $sql = 'update coord_system set is_current=True where schema_build ="'.$schema_build.'" and attrib like "%default_version%"';
|
|
816 $self->db->dbc->do($sql);
|
|
817
|
|
818 return;
|
|
819 }
|
|
820
|
|
821 sub validate_DataSets{
|
|
822 my $self = shift;
|
|
823
|
|
824 $self->log_header('Validating DataSets');
|
|
825
|
|
826
|
|
827 #checks regualtory feature data and supporting sets
|
|
828 #links between DataSet and FeatureSet, i.e. correct naming, not linking to old set
|
|
829 #Displayable, DAS_DISPLAYABLE, IMPORTED_ASSM, MART_DISPLAYABLE
|
|
830 #Naming of result_set should match data/feature_set
|
|
831 #warn non-attr feature/result sets which are DISPLAYABL
|
|
832 #warn about DISPLAYABLE sets which do not have displayable set in analysis_description.web_data
|
|
833
|
|
834
|
|
835 my $fset_a = $self->db->get_FeatureSetAdaptor;
|
|
836 my $dset_a = $self->db->get_DataSetAdaptor;
|
|
837 my ($dset_states, $rset_states, $fset_states) = $self->get_regbuild_set_states($self->db);
|
|
838
|
|
839 my %rf_fsets;
|
|
840 my %set_states;
|
|
841 my $sql;
|
|
842
|
|
843 RF_FSET: foreach my $rf_fset(@{$fset_a->fetch_all_by_type('regulatory')}){
|
|
844 my $rf_fset_name = $rf_fset->name;
|
|
845
|
|
846
|
|
847 $self->log("Validating $rf_fset_name");
|
|
848
|
|
849
|
|
850 $rf_fsets{$rf_fset_name} = $rf_fset;#Do we only need the name for checking the dsets independantly?
|
|
851
|
|
852 if($rf_fset_name =~ /_v[0-9]+$/){
|
|
853 $self->report("FAIL:\tFound archived regulatory FeatureSet:\t$rf_fset_name");
|
|
854 next RF_FSET;
|
|
855 }
|
|
856
|
|
857 foreach my $state(@$fset_states){
|
|
858
|
|
859 if(! $rf_fset->has_status($state)){
|
|
860 $self->report("WARNING:\tUpdating FeatureSet $rf_fset_name with status $state");
|
|
861
|
|
862 $sql = 'INSERT into status select '. $rf_fset->dbID.
|
|
863 ", 'feature_set', status_name_id from status_name where name='$state'";
|
|
864
|
|
865 $self->db->dbc->db_handle->do($sql);
|
|
866 }
|
|
867 }
|
|
868
|
|
869 #Do we need to warn about other states?
|
|
870
|
|
871
|
|
872
|
|
873 my $rf_dset = $dset_a->fetch_by_product_FeatureSet($rf_fset);
|
|
874
|
|
875 if(! $rf_dset){
|
|
876 $self->report("FAIL:\tNo DataSet for FeatureSet:\t$rf_fset_name");
|
|
877 next RF_FSET;
|
|
878 }
|
|
879
|
|
880
|
|
881
|
|
882 if($rf_fset_name ne $rf_dset->name){
|
|
883 $self->report("FAIL:\tFound Feature/DataSet name mismatch:\t$rf_fset_name vs ".$rf_dset->name);
|
|
884 next RF_FSET;
|
|
885 }
|
|
886
|
|
887
|
|
888 foreach my $state(@$dset_states){
|
|
889
|
|
890 if(! $rf_dset->has_status($state)){
|
|
891 #Can we just update and warn here?
|
|
892 #Or do this separately in case we want some control over this?
|
|
893 $self->report("WARNING:\tUpdating DataSet $rf_fset_name with status $state");
|
|
894
|
|
895 $sql = 'INSERT into status select '.$rf_dset->dbID.
|
|
896 ", 'data_set', status_name_id from status_name where name='$state'";
|
|
897 $self->db->dbc->db_handle->do($sql);
|
|
898 }
|
|
899 }
|
|
900
|
|
901
|
|
902
|
|
903 #We should check fset ctype matches all attr_set ctypes?
|
|
904 #May have problems if we want to merge two lines into the same ctype build
|
|
905
|
|
906
|
|
907
|
|
908 foreach my $ra_fset(@{$rf_dset->get_supporting_sets}){
|
|
909
|
|
910 foreach my $state(@$fset_states){
|
|
911
|
|
912 if(! $ra_fset->has_status($state)){
|
|
913 #Can we just update and warn here?
|
|
914 #Or do this separately in case we want some control over this?
|
|
915 $self->report("WARNING:\tUpdating FeatureSet ".$ra_fset->name." with status $state");
|
|
916
|
|
917 $sql = 'INSERT into status select '.$ra_fset->dbID.
|
|
918 ", 'feature_set', status_name_id from status_name where name='$state'";
|
|
919 $self->db->dbc->db_handle->do($sql);
|
|
920 }
|
|
921 }
|
|
922
|
|
923
|
|
924
|
|
925 my $ra_dset = $dset_a->fetch_by_product_FeatureSet($ra_fset);
|
|
926 my @ssets = @{$ra_dset->get_supporting_sets(undef, 'result')};
|
|
927 my @displayable_sets;
|
|
928
|
|
929 foreach my $sset(@ssets){
|
|
930
|
|
931 if($sset->has_status('DISPLAYABLE')){
|
|
932 push @displayable_sets, $sset;
|
|
933 }
|
|
934 }
|
|
935 #Change this to get all then check status
|
|
936 #else print update sql
|
|
937
|
|
938 if(scalar(@displayable_sets) > 1){#There should only be one
|
|
939 $self->report("FAIL:\tThere should only be one DISPLAYABLE supporting ResultSet for DataSet:\t".$ra_dset->name);
|
|
940 }
|
|
941 elsif(scalar(@displayable_sets) == 0){
|
|
942
|
|
943 my $msg;
|
|
944
|
|
945 if(scalar(@ssets) == 1){
|
|
946
|
|
947 #fix here?
|
|
948
|
|
949 $msg = "Found unique non-DISPLAYABLE ResultSet:\t".$ssets[0]->name.
|
|
950 "\n\tinsert into status select ".$ssets[0]->dbID.
|
|
951 ", 'result_set', status_name_id from status_name where name='DISPLAYABLE';";
|
|
952 }
|
|
953 else{
|
|
954 $msg = "Found ".scalar(@ssets)." ResultSets ".join("\t", map($_->name, @ssets));
|
|
955 }
|
|
956
|
|
957 $self->report("FAIL:\tThere are no DISPLAYABLE supporting ResultSets for DataSet:\t".
|
|
958 $ra_dset->name."\n$msg");
|
|
959
|
|
960
|
|
961
|
|
962 next; #$ra_fset
|
|
963 }
|
|
964
|
|
965 my $ra_rset = $ssets[0];
|
|
966
|
|
967 foreach my $state(@$rset_states){
|
|
968
|
|
969 if(! $ra_rset->has_status($state)){
|
|
970 #Can we just update and warn here?
|
|
971 #Or do this separately in case we want some control over this?
|
|
972 $self->report("WARNING:\tUpdating ResultSet ".$ra_rset->name." with status $state");
|
|
973
|
|
974 $sql = 'INSERT into status select '.$ra_rset->dbID.
|
|
975 ", 'result_set', status_name_id from status_name where name='$state'";
|
|
976 $self->db->dbc->db_handle->do($sql);
|
|
977 }
|
|
978 }
|
|
979 }
|
|
980 }
|
|
981
|
|
982 return;
|
|
983 } # End of validate_DataSets
|
|
984
|
|
985
|
|
986
|
|
987
|
|
988
|
|
989
|
|
990 sub analyse_and_optimise_tables{
|
|
991 my $self = shift;
|
|
992
|
|
993 #myisamchk --analyze. or analyze statement
|
|
994
|
|
995 if($self->{'skip_analyse'}){
|
|
996 $self->log_header('Skipping analyse/optimise tables');
|
|
997 return;
|
|
998 }
|
|
999
|
|
1000 $self->log_header("Analysing and optimising tables");
|
|
1001
|
|
1002
|
|
1003 my $sql = 'show tables;';
|
|
1004 my @tables = @{$self->db->dbc->db_handle->selectall_arrayref($sql)};
|
|
1005 map $_ = "@{$_}", @tables;
|
|
1006 my $analyse_sql = 'analyze table ';
|
|
1007 my $optimise_sql = 'optimize table ';
|
|
1008
|
|
1009
|
|
1010
|
|
1011 foreach my $table(@tables){
|
|
1012 $self->log("Analysing and optimising $table");
|
|
1013
|
|
1014
|
|
1015
|
|
1016 #Remove analyse as optimise does everything this does
|
|
1017 my @anal_info = @{$self->db->dbc->db_handle->selectall_arrayref($analyse_sql.$table)};
|
|
1018
|
|
1019 foreach my $line_ref(@anal_info){
|
|
1020 my $status = $line_ref->[3];
|
|
1021 $self->report("FAIL: analyse $table status $status") if (!($status eq 'OK' || $status eq 'Table is already up to date'));
|
|
1022 }
|
|
1023
|
|
1024 my @opt_info = @{$self->db->dbc->db_handle->selectall_arrayref($optimise_sql.$table)};
|
|
1025
|
|
1026 foreach my $line_ref(@opt_info){
|
|
1027
|
|
1028 my $status = $line_ref->[3];
|
|
1029 $self->report("FAIL: optimise $table status $status") if (!( $status eq 'OK' || $status eq 'Table is already up to date'));
|
|
1030 }
|
|
1031
|
|
1032 }
|
|
1033
|
|
1034 return;
|
|
1035 }# end of analyse_and_optimise_tables
|
|
1036
|
|
1037
|
|
1038 sub clean_xrefs{
|
|
1039 my ($self) = @_;
|
|
1040
|
|
1041 if($self->{'skip_xrefs'}){
|
|
1042 $self->log_header('Skipping clean_xrefs');
|
|
1043 return;
|
|
1044 }
|
|
1045
|
|
1046 $self->log_header("Cleaning unlinked xref records");
|
|
1047
|
|
1048 my $sql = 'DELETE x FROM xref x LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id WHERE ox.xref_id IS NULL';
|
|
1049 #Should this also take accoumt of unmapped_objects?
|
|
1050 #No, as unmapped_object doesn't use xref, but probably should
|
|
1051
|
|
1052 my $row_cnt = $self->db->dbc->do($sql);
|
|
1053
|
|
1054 $self->reset_table_autoinc('xref', 'xref_id', $self->db);
|
|
1055 $row_cnt = 0 if $row_cnt eq '0E0';
|
|
1056 $self->log("Deleted $row_cnt unlinked xref records");
|
|
1057
|
|
1058
|
|
1059 #Now remove old edbs
|
|
1060 $self->log_header("Cleaning unlinked external_db records");
|
|
1061
|
|
1062 #Need to account for xref and unmapped_object here
|
|
1063 $sql = 'DELETE edb FROM external_db edb '.
|
|
1064 'LEFT JOIN xref x ON x.external_db_id = edb.external_db_id '.
|
|
1065 'LEFT JOIN unmapped_object uo ON uo.external_db_id=edb.external_db_id '.
|
|
1066 'WHERE x.external_db_id IS NULL and uo.external_db_id is NULL';
|
|
1067 $row_cnt = $self->db->dbc->do($sql);
|
|
1068
|
|
1069 $self->reset_table_autoinc('external_db', 'external_db_id', $self->db);
|
|
1070 $row_cnt = 0 if $row_cnt eq '0E0';
|
|
1071 $self->log("Deleted $row_cnt unlinked external_db records");
|
|
1072
|
|
1073
|
|
1074 #Shouldn't clean orphaned oxs here as this means a rollback been done underneath the ox data
|
|
1075 #or we have xref_id=0!
|
|
1076 #Leave this to HC?
|
|
1077
|
|
1078
|
|
1079
|
|
1080
|
|
1081
|
|
1082 return;
|
|
1083 }
|
|
1084
|
|
1085
|
|
1086 1;
|