0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2011 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Funcgen::Utils::EFGUtils
|
|
22
|
|
23 =head1 DESCRIPTION
|
|
24
|
|
25 This module collates a variety of miscellaneous methods.
|
|
26
|
|
27
|
|
28 =head1 SYNOPSIS
|
|
29
|
|
30 BEGIN
|
|
31 {
|
|
32 unshift(@INC,"/path/of/local/src/modules");
|
|
33 }
|
|
34
|
|
35 use Utils;
|
|
36
|
|
37 &Utils::send_mail($to_address, $title, $message);
|
|
38
|
|
39 =cut
|
|
40
|
|
41
|
|
42 # No API/Object based methods in here
|
|
43
|
|
44 ###############################################################################
|
|
45
|
|
46 package Bio::EnsEMBL::Funcgen::Utils::EFGUtils;
|
|
47 require Exporter;
|
|
48 @ISA = qw(Exporter);
|
|
49 @EXPORT_OK = qw(get_date species_name get_month_number species_chr_num
|
|
50 open_file median mean run_system_cmd backup_file
|
|
51 is_gzipped is_sam is_bed get_file_format strip_param_args
|
|
52 generate_slices_from_names strip_param_flags
|
|
53 get_current_regulatory_input_names add_external_db);
|
|
54
|
|
55 use Bio::EnsEMBL::Utils::Exception qw( throw );
|
|
56 use File::Path qw (mkpath);
|
|
57 use File::Basename qw (dirname);
|
|
58 use strict;
|
|
59 use Time::Local;
|
|
60 use FileHandle;
|
|
61 use Carp;
|
|
62
|
|
63 sub get_date{
|
|
64 my ($format, $file) = @_;
|
|
65
|
|
66 my ($time, $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst);
|
|
67
|
|
68
|
|
69 throw("File does not exist or is not a regular file:\t$file") if $file && ! -f $file;
|
|
70
|
|
71
|
|
72 ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = (defined $file) ?
|
|
73 localtime((stat($file))[9]) : localtime();
|
|
74
|
|
75 #print " ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst)\n";
|
|
76
|
|
77 if((! defined $format && ! defined $file) || $format eq "date"){
|
|
78 $time = ($year+1900)."-".$mday."-".($mon+1);
|
|
79 }
|
|
80 elsif($format eq "time"){#not working!
|
|
81 $time = "${hour}:${min}:${sec}";
|
|
82 }
|
|
83 elsif($format eq "timedate"){#
|
|
84 $time = localtime();
|
|
85 }
|
|
86 else{#add mysql formats here, datetime etc...
|
|
87 croak("get_date does not handle format:\t$format");
|
|
88 }
|
|
89
|
|
90 return $time;
|
|
91 }
|
|
92
|
|
93
|
|
94 #migrate this data to defs file!!??
|
|
95 #must contain all E! species and any other species which are used in local DB extractions
|
|
96 #NEED TO ADD FLY!!
|
|
97
|
|
98 sub species_name{
|
|
99 my($species) = @_;
|
|
100 my %species_names = (
|
|
101 "HOMO_SAPIENS", "human",
|
|
102 "MUS_MUSCULUS", "mouse",
|
|
103 "RATTUS_NORVEGICUS", "rat",
|
|
104 "CANIS_FAMILIARIS", "dog",
|
|
105 "PAN_TROGOLODYTES", "chimp",
|
|
106 "GALLUS_GALLUS", "chicken",
|
|
107 "SACCHAROMYCES_CEREVISIAE", "yeast",
|
|
108 "HUMAN", "HOMO_SAPIENS",
|
|
109 "MOUSE", "MUS_MUSCULUS",
|
|
110 "RAT","RATTUS_NORVEGICUS",
|
|
111 "DOG", "CANIS_FAMILIARIS",
|
|
112 "CHIMP", "PAN_TROGOLODYTES",
|
|
113 "CHICKEN", "GALLUS_GALLUS",
|
|
114 "YEAST", "SACCHAROMYCES_CEREVISIAE",
|
|
115 );
|
|
116
|
|
117 return $species_names{uc($species)};
|
|
118 }
|
|
119
|
|
120 sub get_month_number{
|
|
121 my($mon) = @_;
|
|
122 my %month_nos =(
|
|
123 "jan", "01",
|
|
124 "feb", "02",
|
|
125 "mar", "03",
|
|
126 "apr", "04",
|
|
127 "may", "05",
|
|
128 "jun", "06",
|
|
129 "jul", "07",
|
|
130 "aug", "08",
|
|
131 "sep", "09",
|
|
132 "oct", "10",
|
|
133 "nov", "11",
|
|
134 "dec", "12",
|
|
135 );
|
|
136 return $month_nos{lc($mon)};
|
|
137 }
|
|
138
|
|
139
|
|
140 sub species_chr_num{
|
|
141 my ($species, $val) = @_;
|
|
142
|
|
143 ($species = lc($species)) =~ s/ /_/;
|
|
144
|
|
145 my %species_chrs = (
|
|
146 homo_sapiens => {(
|
|
147 'x' => 23,
|
|
148 'y' => 24,
|
|
149 'mt' => 25,
|
|
150 )},
|
|
151
|
|
152 mus_musculus => {(
|
|
153 'x' => 20,
|
|
154 'y' => 21,
|
|
155 'mt' => 22,
|
|
156 )},
|
|
157
|
|
158 rattus_norvegicus => {(
|
|
159 'x' => 21,
|
|
160 'y' => 22,
|
|
161 'mt' => 23,
|
|
162 )},
|
|
163 );
|
|
164
|
|
165 die("species not defined in chromosome hash") if(! exists $species_chrs{$species});
|
|
166
|
|
167 return (exists $species_chrs{$species}{lc($val)}) ? $species_chrs{$species}{lc($val)} : $val;
|
|
168 }
|
|
169
|
|
170 #Sort should always be done in the caller if required
|
|
171
|
|
172 sub median{
|
|
173 my ($scores, $sort) = shift;
|
|
174
|
|
175 return undef if (! @$scores);
|
|
176
|
|
177
|
|
178 my ($median);
|
|
179 my $count = scalar(@$scores);
|
|
180 my $index = $count-1;
|
|
181 #need to deal with lines with no results!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
182 #deal with one score fastest
|
|
183 return $scores->[0] if ($count == 1);
|
|
184
|
|
185
|
|
186 if($sort){
|
|
187 #This is going to sort the reference here, so will affect
|
|
188 #The array in the caller
|
|
189 #We need to deref to avoid this
|
|
190 }
|
|
191
|
|
192 #taken from Statistics::Descriptive
|
|
193 #remeber we're dealing with size starting with 1 but indices starting at 0
|
|
194
|
|
195 if ($count % 2) { #odd number of scores
|
|
196 $median = $scores->[($index+1)/2];
|
|
197 }
|
|
198 else { #even, get mean of flanks
|
|
199 $median = ($scores->[($index)/2] + $scores->[($index/2)+1] ) / 2;
|
|
200 }
|
|
201
|
|
202
|
|
203 return $median;
|
|
204 }
|
|
205
|
|
206
|
|
207 sub mean{
|
|
208 my $scores = shift;
|
|
209
|
|
210 my $total = 0;
|
|
211
|
|
212 map $total+= $_, @$scores;
|
|
213 my $mean = $total/(scalar(@$scores));
|
|
214
|
|
215 return $mean;
|
|
216
|
|
217 }
|
|
218
|
|
219 #Should really extend this to detect previous file?
|
|
220 #Or do in caller?
|
|
221
|
|
222 sub open_file{
|
|
223 my ($file, $operator, $file_permissions) = @_;
|
|
224
|
|
225 my $dir_permissions = $file_permissions || 0755;
|
|
226
|
|
227 $operator ||= '<';
|
|
228
|
|
229 if($operator !~ /%/){
|
|
230 $operator = "$operator $file";
|
|
231 }
|
|
232 else{
|
|
233 #We have some piping to do
|
|
234 $operator = sprintf($operator, $file);
|
|
235 }
|
|
236
|
|
237 #Get dir here and create if not exists
|
|
238 my $dir = dirname($file);
|
|
239 mkpath($dir, {verbose => 1, mode => $dir_permissions}) if(! -d $dir);
|
|
240 my $fh = new FileHandle "$operator";
|
|
241
|
|
242
|
|
243 #This does not catch incorrectly defined named pipes
|
|
244
|
|
245 if(! defined $fh){
|
|
246 croak("Failed to open $operator");
|
|
247 }
|
|
248
|
|
249
|
|
250 #Have to chmod here as umask will over-ride permissions passed to FileHandle
|
|
251 if(defined $file_permissions){
|
|
252
|
|
253 #Catch non-numeric here as chmod still returns true
|
|
254 if($file_permissions =~ /[^0-9]/){
|
|
255 croak("Failed to change $file permissions using:\t$file_permissions");
|
|
256 }
|
|
257
|
|
258 #chmod requires a literal octal number e.g. 0775 not '0775'
|
|
259 #should catch numbers as strings here, but perl makes this very hard to test
|
|
260 #Can't even system this as if we build the cmd line with an octal it will be converted to a decimal
|
|
261 #These is still no way of testing for non-octal number or string
|
|
262 #eval/sprintf will also not fail if there are non-octal digits i.e. 1999
|
|
263
|
|
264 #eval will treat octal number and string as true octal number
|
|
265 #else will pass non-octal string/number which we can't catch
|
|
266 chmod(eval($file_permissions), $file);
|
|
267 }
|
|
268
|
|
269 return $fh;
|
|
270 }
|
|
271
|
|
272
|
|
273
|
|
274 ################################################################################
|
|
275
|
|
276 =head2 run_system_cmd
|
|
277
|
|
278 Description : Method to control the execution of the standard system() command
|
|
279
|
|
280 ReturnType : none
|
|
281
|
|
282 Example : $Helper->debug(2,"dir=$dir file=$file");
|
|
283
|
|
284 Exceptions : throws exception if system command returns none zero
|
|
285
|
|
286 =cut
|
|
287
|
|
288 ################################################################################
|
|
289
|
|
290 sub run_system_cmd{
|
|
291 my ($command, $no_exit) = @_;
|
|
292
|
|
293 my $redirect = '';
|
|
294
|
|
295 #$self->debug(3, "system($command)");
|
|
296
|
|
297 # decide where the command line output should be redirected
|
|
298
|
|
299 #This should account for redirects
|
|
300
|
|
301 #if ($self->{_debug_level} >= 3){
|
|
302
|
|
303 # if (defined $self->{_debug_file}){
|
|
304 # $redirect = " >>".$self->{_debug_file}." 2>&1";
|
|
305 # }
|
|
306 # else{
|
|
307 # $redirect = "";
|
|
308 # }
|
|
309 #}
|
|
310 #else{
|
|
311 #$redirect = " > /dev/null 2>&1";
|
|
312 #}
|
|
313
|
|
314 # execute the passed system command
|
|
315 my $status = system("$command $redirect");
|
|
316 my $exit_code = $status >> 8;
|
|
317
|
|
318
|
|
319 if ($status == -1) {
|
|
320 warn "Failed to execute: $!\n";
|
|
321 }
|
|
322 elsif ($status & 127) {
|
|
323 warn sprintf("Child died with signal %d, %s coredump\nError:\t$!",($status & 127),($status & 128) ? 'with' : 'without');
|
|
324 }
|
|
325 elsif($status != 0) {
|
|
326 warn sprintf("Child exited with value %d\nError:\t$!\n", $exit_code); #get the true exit code
|
|
327 }
|
|
328
|
|
329 #We're not catchign error message here!
|
|
330
|
|
331 if ($exit_code != 0){
|
|
332
|
|
333 if (! $no_exit){
|
|
334 throw("System command failed:\t$command\n");
|
|
335 }
|
|
336 else{
|
|
337 warn("System command returned non-zero exit code:\t$command\n");
|
|
338 }
|
|
339 }
|
|
340
|
|
341 #reverse boolean logic for perl...can't do this anymore due to tab2mage successful non-zero exit codes :/
|
|
342
|
|
343 return $exit_code;
|
|
344 }
|
|
345
|
|
346
|
|
347 sub backup_file{
|
|
348 my $file_path = shift;
|
|
349
|
|
350 throw("Must define a file path to backup") if(! $file_path);
|
|
351
|
|
352 if (-f $file_path) {
|
|
353 #$self->log("Backing up:\t$file_path");
|
|
354 system ("mv ${file_path} ${file_path}.".`date '+%T'`) == 0 || return 0;
|
|
355 }
|
|
356
|
|
357 return 1;
|
|
358
|
|
359 }
|
|
360
|
|
361
|
|
362 sub get_file_format{
|
|
363 my $file = shift;
|
|
364
|
|
365 my $format = &is_bed($file);
|
|
366
|
|
367 if(! $format){
|
|
368 $format = &is_sam($file);
|
|
369
|
|
370 #Add more testes here
|
|
371 }
|
|
372
|
|
373
|
|
374 return $format;
|
|
375 }
|
|
376
|
|
377 sub is_gzipped {
|
|
378 my ($file, $fail_if_compressed) = @_;
|
|
379
|
|
380 throw ("File does not exist:\t$file") if ! -e $file;
|
|
381
|
|
382 open(FILE, "file -L $file |")
|
|
383 or throw("Can't execute command 'file' on '$file'");
|
|
384 my $file_info = <FILE>;
|
|
385 close FILE;
|
|
386
|
|
387 my $gzip = 0;
|
|
388
|
|
389 if($file_info =~ /compressed data/){
|
|
390
|
|
391 if($file_info =~ /gzip/){
|
|
392 $gzip = 1;
|
|
393 }
|
|
394 else{
|
|
395 throw("File is compressed but not with gzip, please unzip or gzip:\t$file_info");
|
|
396 }
|
|
397 }
|
|
398
|
|
399 return $gzip;
|
|
400 }
|
|
401
|
|
402 #Change these to also return the gz status
|
|
403
|
|
404 sub is_sam{
|
|
405 my $file = shift;
|
|
406
|
|
407 warn "Only checking file suffix for is_sam";
|
|
408 #Could check for header here altho this is not mandatory!
|
|
409 #Can we use web format guessing code?
|
|
410
|
|
411 my $gz = (&is_gzipped($file, 1)) ? '.gz' : '';
|
|
412
|
|
413 return ($file =~ /.sam${gz}/) ? 'sam' : 0;
|
|
414 }
|
|
415
|
|
416 #need is bam here too!
|
|
417
|
|
418 sub is_bed {
|
|
419 my $file = shift;
|
|
420
|
|
421 #Use open_file here!
|
|
422
|
|
423 if(&is_gzipped($file, 1)){
|
|
424
|
|
425 open(FILE, "zcat $file 2>&1 |") or throw("Can't open file via zcat:\t$file");
|
|
426 }
|
|
427 else{
|
|
428 open(FILE, $file) or throw("Can't open file:\t$file");
|
|
429 }
|
|
430
|
|
431 my @line;
|
|
432 #$verbose =1;
|
|
433
|
|
434
|
|
435 while (<FILE>) {
|
|
436 chomp;
|
|
437 @line = split("\t", $_);
|
|
438 last;
|
|
439 }
|
|
440 close FILE;
|
|
441
|
|
442 if (scalar @line < 6) {
|
|
443 warn "Infile '$file' does not have 6 or more columns. We expect bed format:\t".
|
|
444 "CHROM START END NAME SCORE STRAND.\n";
|
|
445 return 0;
|
|
446 #} elsif ($line[0] !~ m/^((chr)?[MTXYNT_\d]+)$/) {
|
|
447 # warn ("1st column must contain name of seq_region (e.g. chr1 or 1) in '$file'");
|
|
448 # return 0;
|
|
449 #Commented this out for now due to HSCHR_RANDOM seqs
|
|
450 #How does the webcode handle this?
|
|
451 }
|
|
452 elsif ($line[1] !~ m/^\d+$/ && $line[2] =~ m/^\d+$/) {
|
|
453 warn "2nd and 3rd column must contain start and end respectively in '$file'\n";
|
|
454 return 0;
|
|
455 }
|
|
456 elsif ($line[5] !~ m/^[+-\.]$/) {
|
|
457 warn "6th column must define strand (either +, - or .) in '$file'\n";
|
|
458 return 0;
|
|
459 }
|
|
460
|
|
461 return 'bed';
|
|
462 }
|
|
463
|
|
464
|
|
465 #These subs are useful for implementing
|
|
466 #a farm mode in a run script, where a script can
|
|
467 #submit itself to the farm as slice based jobs
|
|
468
|
|
469 #strip cmd line params and associated arguments from a list
|
|
470 #should not be used to remove flag options i.e. no following args
|
|
471 #as this may cause removal of any following @ARGV;
|
|
472 #Can this be used on flattened args hash?
|
|
473
|
|
474 sub strip_param_args{
|
|
475 my ($args, @strip_params) = @_;
|
|
476
|
|
477 my $param_name;
|
|
478 my $seen_opt = 0;
|
|
479
|
|
480 foreach my $i(0..$#{$args}){
|
|
481
|
|
482 if($args->[$i] =~ /^[-]+/){
|
|
483 $seen_opt = 0;#Reset seen opt if we seen a new one
|
|
484
|
|
485 ($param_name = $args->[$i]) =~ s/^[-]+//;
|
|
486
|
|
487 if(grep/^${param_name}$/, @strip_params){
|
|
488 $seen_opt = 1;
|
|
489 }
|
|
490 }
|
|
491
|
|
492 #$args->[$i] = '' if $args->[$i] =~ /^[-]+farm/;#Only remove current flag
|
|
493 #$seen_opt = 1 if $args->[$i] =~ /^[-]+skip_slices/;
|
|
494 #$seen_opt = 1 if $args->[$i] =~ /^[-]+slice/;#Don't have full param name incase we have just specified -slice
|
|
495
|
|
496 $args->[$i] = '' if $seen_opt;#Remove option and args following option
|
|
497 }
|
|
498
|
|
499 return $args;
|
|
500 }
|
|
501
|
|
502
|
|
503 sub strip_param_flags{
|
|
504 my ($args, @strip_params) = @_;
|
|
505
|
|
506 my @args = @$args;
|
|
507
|
|
508 foreach my $flag(@strip_params){
|
|
509 @args = grep(!/[-]+${flag}$/, @args);
|
|
510 }
|
|
511
|
|
512 return \@args;
|
|
513 }
|
|
514
|
|
515 #Generates slices from names or optionally alll default top level nonref
|
|
516
|
|
517 sub generate_slices_from_names{
|
|
518 my ($slice_adaptor, $slice_names, $skip_slices, $highestlevel, $non_ref, $inc_dups, $assembly) = @_;
|
|
519
|
|
520 #Test if $assembly is old?
|
|
521
|
|
522
|
|
523
|
|
524 my (@slices, $slice, $sr_name);
|
|
525
|
|
526 if(@$slice_names){
|
|
527
|
|
528 foreach my $name(@$slice_names){
|
|
529 $slice = $slice_adaptor->fetch_by_region(undef, $name, undef, undef, undef, $assembly);
|
|
530
|
|
531 #WHy is this failing for hap regions?
|
|
532
|
|
533 if(! $slice){
|
|
534
|
|
535 #Need to eval this as it will break with incorrect formating
|
|
536
|
|
537 eval { $slice = $slice_adaptor->fetch_by_name($name) };
|
|
538
|
|
539 if(! $slice){
|
|
540 throw("Could not fetch slice by region or name:\t".$name);
|
|
541 }
|
|
542 }
|
|
543
|
|
544 $sr_name = $slice->seq_region_name;
|
|
545
|
|
546 next if(grep/^${sr_name}$/, @$skip_slices);
|
|
547 push @slices, $slice;
|
|
548 }
|
|
549 }
|
|
550 elsif($highestlevel){
|
|
551
|
|
552 my $level = 'toplevel';
|
|
553
|
|
554 if($assembly){
|
|
555 $level = 'chromosome';
|
|
556 warn "Cannot get toplevel for old assembly version $assembly, defaulting to 'chromosome' level";
|
|
557 #Would ignore old assembly and just fetch current assembly otherwise as there is no toplevel for old assemblies
|
|
558 #No need for projection on non-ref unassembled seqs as these will/should be identical
|
|
559 #Only need need to project assembled seq i.e. haps(lrgs?).
|
|
560 #Only rollback toplevel data when cleaning after projection, otherwise we may lose some data.
|
|
561 #Change default delete to use all toplevel ref seqs (and non-ref with cs version e.g. haps but not lrgs)
|
|
562 }
|
|
563
|
|
564 my @tmp_slices = @{$slice_adaptor->fetch_all($level, $assembly, $non_ref, $inc_dups)};
|
|
565
|
|
566 if(@$skip_slices){
|
|
567
|
|
568 foreach $slice(@tmp_slices){
|
|
569 $sr_name = $slice->seq_region_name;
|
|
570 push @slices, $slice if ! grep/^${sr_name}$/, @$skip_slices;
|
|
571 }
|
|
572 }
|
|
573 else{
|
|
574 @slices = @tmp_slices;
|
|
575 }
|
|
576 }
|
|
577 else{
|
|
578 throw('You must either pass an arrayref of slice names or specify the toplevel flag');
|
|
579 }
|
|
580
|
|
581
|
|
582 if(! @slices){
|
|
583 throw("You have specified slice_names and skip_slices paramters which have generated no slices.\nslice_names:\t".join(' ',@$slice_names)."\nskip_slices:\t".join(' ', @$skip_slices));
|
|
584 }
|
|
585
|
|
586 return \@slices;
|
|
587 }
|
|
588
|
|
589
|
|
590 # Tracking DB methods
|
|
591 # Move to DBAdaptor? Can we add this as a separate package in the same module?
|
|
592
|
|
593 sub get_current_regulatory_input_names{
|
|
594 my ($tdb, $efg_db, $focus) = @_;
|
|
595
|
|
596 #Validate is production?
|
|
597 my $sql;
|
|
598
|
|
599
|
|
600
|
|
601 if($focus){
|
|
602 $focus = 'Focus';
|
|
603 $sql = 'SELECT efgdb_set_name from dataset where is_focus=true and is_current=true and species="'.$efg_db->species.'"';
|
|
604 }
|
|
605 else{
|
|
606 $focus = 'Non-focus';
|
|
607 #0 rather than false so we don't get NULLs
|
|
608 $sql = 'SELECT efgdb_set_name from dataset where is_focus=0 and is_current=true and species="'.$efg_db->species.'"';
|
|
609 }
|
|
610
|
|
611
|
|
612 #Currently efgdb_set_name can either be data_set or feature_set name!
|
|
613 #Need to standardise this
|
|
614
|
|
615 my @prd_names = @{$tdb->db_handle->selectcol_arrayref($sql)};
|
|
616 my @names;
|
|
617 my @failed_sets;
|
|
618
|
|
619 foreach my $prd_name(@prd_names){
|
|
620
|
|
621 $sql = "SELECT name from feature_set where name like '${prd_name}%'";
|
|
622 my @tmp_names = @{$efg_db->dbc->db_handle->selectcol_arrayref($sql)};
|
|
623
|
|
624 #This is causing problems with multiple feature sets with differing analyses
|
|
625
|
|
626 #Do this via InputSets(using query extension?) instead of using like?
|
|
627
|
|
628 #This is very hacky right now to get it to work
|
|
629 #Need to standardise and review tracking db data.
|
|
630
|
|
631 if(scalar(@tmp_names) > 1){
|
|
632
|
|
633 $sql = "SELECT name from feature_set where name ='${prd_name}_ccat_histone'";
|
|
634 @tmp_names = @{$efg_db->dbc->db_handle->selectcol_arrayref($sql)};
|
|
635
|
|
636 if(scalar(@tmp_names) == 1){
|
|
637 push @names, $tmp_names[0];
|
|
638 }else{
|
|
639 push @failed_sets, $prd_name;
|
|
640 }
|
|
641 }
|
|
642 elsif(scalar(@tmp_names) == 0){
|
|
643 push @failed_sets, $prd_name;
|
|
644 }
|
|
645 else{
|
|
646 push @names, $tmp_names[0];
|
|
647 }
|
|
648
|
|
649 }
|
|
650
|
|
651 if(@failed_sets){
|
|
652 throw("Failed to find unique $focus FeatureSets for production dataset names:\n\t".
|
|
653 join("\n\t", @failed_sets)."\n");
|
|
654 }
|
|
655
|
|
656 return @names;
|
|
657 }
|
|
658
|
|
659 #Handy function to add an external_db entry when needed
|
|
660 sub add_external_db{
|
|
661 my ($efg_db, $db_name,$db_release,$db_display_name) = @_;
|
|
662 my $sql = "select external_db_id from external_db where db_name='$db_name' and db_release='$db_release'";
|
|
663 my ($db_id) = $efg_db->dbc->db_handle->selectrow_array($sql);
|
|
664 if($db_id){
|
|
665 warn "External DB $db_name $db_release already exists in db with db_id $db_id\n";
|
|
666 } else {
|
|
667 #TODO check if it there was a failure
|
|
668 $efg_db->dbc->do("insert into external_db (db_name, db_release, status, dbprimary_acc_linkable, priority, db_display_name, type) values('$db_name', '$db_release', 'KNOWNXREF', '1', '5', '$db_display_name', 'MISC')");
|
|
669 }
|
|
670
|
|
671 }
|
|
672
|
|
673 1;
|