0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
|
|
24 schema conversion scripts
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 my $serverroot = '/path/to/ensembl';
|
|
29 my $support = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
|
|
30
|
|
31 # parse common options
|
|
32 $support->parse_common_options;
|
|
33
|
|
34 # parse extra options for your script
|
|
35 $support->parse_extra_options( 'string_opt=s', 'numeric_opt=n' );
|
|
36
|
|
37 # ask user if he wants to run script with these parameters
|
|
38 $support->confirm_params;
|
|
39
|
|
40 # see individual method documentation for more stuff
|
|
41
|
|
42 =head1 DESCRIPTION
|
|
43
|
|
44 This module is a collection of common methods and provides helper
|
|
45 functions for the Vega release and schema conversion scripts. Amongst
|
|
46 others, it reads options from a config file, parses commandline options
|
|
47 and does logging.
|
|
48
|
|
49 =head1 METHODS
|
|
50
|
|
51 =cut
|
|
52
|
|
53 package Bio::EnsEMBL::Utils::ConversionSupport;
|
|
54
|
|
55 use strict;
|
|
56 use warnings;
|
|
57 no warnings 'uninitialized';
|
|
58
|
|
59 use Getopt::Long;
|
|
60 use Text::Wrap;
|
|
61 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
62 use FindBin qw($Bin $Script);
|
|
63 use POSIX qw(strftime);
|
|
64 use Cwd qw(abs_path);
|
|
65 use DBI;
|
|
66 use Data::Dumper;
|
|
67 use Fcntl qw(:flock SEEK_END);
|
|
68
|
|
69 my $species_c = 1; #counter to be used for each database connection made
|
|
70
|
|
71 =head2 new
|
|
72
|
|
73 Arg[1] : String $serverroot - root directory of your ensembl sandbox
|
|
74 Example : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
|
|
75 '/path/to/ensembl');
|
|
76 Description : constructor
|
|
77 Return type : Bio::EnsEMBL::Utils::ConversionSupport object
|
|
78 Exceptions : thrown if no serverroot is provided
|
|
79 Caller : general
|
|
80
|
|
81 =cut
|
|
82
|
|
83 sub new {
|
|
84 my $class = shift;
|
|
85 (my $serverroot = shift) or throw("You must supply a serverroot.");
|
|
86 my $self = {
|
|
87 '_serverroot' => $serverroot,
|
|
88 '_param' => { interactive => 1 },
|
|
89 '_warnings' => 0,
|
|
90 };
|
|
91 bless ($self, $class);
|
|
92 return $self;
|
|
93 }
|
|
94
|
|
95 =head2 parse_common_options
|
|
96
|
|
97 Example : $support->parse_common_options;
|
|
98 Description : This method reads options from a configuration file and parses
|
|
99 some commandline options that are common to all scripts (like
|
|
100 db connection settings, help, dry-run). Commandline options
|
|
101 will override config file settings.
|
|
102
|
|
103 All options will be accessible via $self->param('name').
|
|
104 Return type : true on success
|
|
105 Exceptions : thrown if configuration file can't be opened
|
|
106 Caller : general
|
|
107
|
|
108 =cut
|
|
109
|
|
110 sub parse_common_options {
|
|
111 my $self = shift;
|
|
112
|
|
113 # read commandline options
|
|
114 my %h;
|
|
115 Getopt::Long::Configure("pass_through");
|
|
116 &GetOptions( \%h,
|
|
117 'dbname|db_name=s',
|
|
118 'host|dbhost|db_host=s',
|
|
119 'port|dbport|db_port=n',
|
|
120 'user|dbuser|db_user=s',
|
|
121 'pass|dbpass|db_pass=s',
|
|
122 'conffile|conf=s',
|
|
123 'logfile|log=s',
|
|
124 'nolog',
|
|
125 'logpath=s',
|
|
126 'log_base_path=s',
|
|
127 'logappend|log_append',
|
|
128 'verbose|v',
|
|
129 'interactive|i=s',
|
|
130 'dry_run|dry|n',
|
|
131 'help|h|?',
|
|
132 );
|
|
133
|
|
134 # reads config file
|
|
135 my $conffile = $h{'conffile'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini";
|
|
136 $conffile = abs_path($conffile);
|
|
137 if (-e $conffile) {
|
|
138 open(CONF, $conffile) or throw(
|
|
139 "Unable to open configuration file $conffile for reading: $!");
|
|
140 my $serverroot = $self->serverroot;
|
|
141 while (<CONF>) {
|
|
142 chomp;
|
|
143
|
|
144 # remove comments
|
|
145 s/^[#;].*//;
|
|
146 s/\s+[;].*$//;
|
|
147
|
|
148 # read options into internal parameter datastructure, removing whitespace
|
|
149 next unless (/(\w\S*)\s*=\s*(\S*)\s*/);
|
|
150 my $name = $1;
|
|
151 my $val = $2;
|
|
152 if ($val =~ /\$SERVERROOT/) {
|
|
153 $val =~ s/\$SERVERROOT/$serverroot/g;
|
|
154 $val = abs_path($val);
|
|
155 }
|
|
156 $self->param($name, $val);
|
|
157 }
|
|
158 $self->param('conffile', $conffile);
|
|
159 }
|
|
160 elsif ($conffile) {
|
|
161 warning("Unable to open configuration file $conffile for reading: $!");
|
|
162 }
|
|
163
|
|
164 # override configured parameter with commandline options
|
|
165 map { $self->param($_, $h{$_}) } keys %h;
|
|
166
|
|
167 return (1) if $self->param('nolog');
|
|
168
|
|
169 # if logpath & logfile are not set, set them here to /ensemblweb/vega_dev/shared/logs/conversion/DBNAME/SCRIPNAME_NN.log
|
|
170 if (! defined($self->param('log_base_path'))) {
|
|
171 $self->param('log_base_path','/ensemblweb/shared/logs/conversion/');
|
|
172 }
|
|
173 my $dbname = $self->param('dbname');
|
|
174 $dbname =~ s/^vega_//;
|
|
175 if (not (defined($self->param('logpath')) )){
|
|
176 $self->param('logpath', $self->param('log_base_path')."/".$dbname."/" );
|
|
177 }
|
|
178 if ( not defined $self->param('logfile') ){
|
|
179 my $log = $Script;
|
|
180 $log =~ s/.pl$//g;
|
|
181 my $counter;
|
|
182 for ($counter=1 ; (-e $self->param('logpath')."/".$log."_".sprintf("%03d", $counter).".log"); $counter++){
|
|
183 # warn $self->param('logpath')."/".$log."_".$counter.".log";
|
|
184 }
|
|
185 $self->param('logfile', $log."_".sprintf("%03d", $counter).".log");
|
|
186 }
|
|
187 return(1);
|
|
188 }
|
|
189
|
|
190 =head2 parse_extra_options
|
|
191
|
|
192 Arg[1-N] : option descriptors that will be passed on to Getopt::Long
|
|
193 Example : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
|
|
194 Description : Parse extra commandline options by passing them on to
|
|
195 Getopt::Long and storing parameters in $self->param('name).
|
|
196 Return type : true on success
|
|
197 Exceptions : none (caugth by $self->error)
|
|
198 Caller : general
|
|
199
|
|
200 =cut
|
|
201
|
|
202 sub parse_extra_options {
|
|
203 my ($self, @params) = @_;
|
|
204 Getopt::Long::Configure("no_pass_through");
|
|
205 eval {
|
|
206 # catch warnings to pass to $self->error
|
|
207 local $SIG{__WARN__} = sub { die @_; };
|
|
208 &GetOptions(\%{ $self->{'_param'} }, @params);
|
|
209 };
|
|
210 $self->error($@) if $@;
|
|
211 return(1);
|
|
212 }
|
|
213
|
|
214 =head2 allowed_params
|
|
215
|
|
216 Arg[1-N] : (optional) List of allowed parameters to set
|
|
217 Example : my @allowed = $self->allowed_params(qw(param1 param2));
|
|
218 Description : Getter/setter for allowed parameters. This is used by
|
|
219 $self->confirm_params() to avoid cluttering of output with
|
|
220 conffile entries not relevant for a given script. You can use
|
|
221 $self->get_common_params() as a shortcut to set them.
|
|
222 Return type : Array - list of allowed parameters
|
|
223 Exceptions : none
|
|
224 Caller : general
|
|
225
|
|
226 =cut
|
|
227
|
|
228 sub allowed_params {
|
|
229 my $self = shift;
|
|
230
|
|
231 # setter
|
|
232 if (@_) {
|
|
233 @{ $self->{'_allowed_params'} } = @_;
|
|
234 }
|
|
235
|
|
236 # getter
|
|
237 if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
|
|
238 return @{ $self->{'_allowed_params'} };
|
|
239 } else {
|
|
240 return ();
|
|
241 }
|
|
242 }
|
|
243
|
|
244 =head2 get_common_params
|
|
245
|
|
246 Example : my @allowed_params = $self->get_common_params, 'extra_param';
|
|
247 Description : Returns a list of commonly used parameters in the conversion
|
|
248 scripts. Shortcut for setting allowed parameters with
|
|
249 $self->allowed_params().
|
|
250 Return type : Array - list of common parameters
|
|
251 Exceptions : none
|
|
252 Caller : general
|
|
253
|
|
254 =cut
|
|
255
|
|
256 sub get_common_params {
|
|
257 return qw(
|
|
258 conffile
|
|
259 dbname
|
|
260 host
|
|
261 port
|
|
262 user
|
|
263 pass
|
|
264 nolog
|
|
265 logpath
|
|
266 log_base_path
|
|
267 logfile
|
|
268 logappend
|
|
269 verbose
|
|
270 interactive
|
|
271 dry_run
|
|
272 );
|
|
273 }
|
|
274
|
|
275 =head2 get_loutre_params
|
|
276
|
|
277 Arg : (optional) return a list to parse or not
|
|
278 Example : $support->parse_extra_options($support->get_loutre_params('parse'))
|
|
279 Description : Returns a list of commonly used loutre db parameters - parse option is
|
|
280 simply used to distinguish between reporting and parsing parameters
|
|
281 Return type : Array - list of common parameters
|
|
282 Exceptions : none
|
|
283 Caller : general
|
|
284
|
|
285 =cut
|
|
286
|
|
287 sub get_loutre_params {
|
|
288 my ($self,$p) = @_;
|
|
289 if ($p) {
|
|
290 return qw(
|
|
291 loutrehost=s
|
|
292 loutreport=s
|
|
293 loutreuser=s
|
|
294 loutrepass:s
|
|
295 loutredbname=s
|
|
296 );
|
|
297 }
|
|
298 else {
|
|
299 return qw(
|
|
300 loutrehost
|
|
301 loutreport
|
|
302 loutreuser
|
|
303 loutrepass
|
|
304 loutredbname
|
|
305 );
|
|
306 }
|
|
307 }
|
|
308
|
|
309 =head2 remove_vega_params
|
|
310
|
|
311 Example : $support->remove_vega_params;
|
|
312 Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when
|
|
313 working exclusively with loutre
|
|
314 Return type : none
|
|
315 Exceptions : none
|
|
316 Caller : general
|
|
317
|
|
318 =cut
|
|
319
|
|
320 sub remove_vega_params {
|
|
321 my $self = shift;
|
|
322 foreach my $param (qw(dbname host port user pass)) {
|
|
323 $self->{'_param'}{$param} = undef;
|
|
324 }
|
|
325 }
|
|
326
|
|
327 =head2 confirm_params
|
|
328
|
|
329 Example : $support->confirm_params;
|
|
330 Description : Prints a table of parameters that were collected from config
|
|
331 file and commandline and asks user to confirm if he wants
|
|
332 to proceed.
|
|
333 Return type : true on success
|
|
334 Exceptions : none
|
|
335 Caller : general
|
|
336
|
|
337 =cut
|
|
338
|
|
339 sub confirm_params {
|
|
340 my $self = shift;
|
|
341
|
|
342 # print parameter table
|
|
343 print "Running script with these parameters:\n\n";
|
|
344 print $self->list_all_params;
|
|
345
|
|
346 if ($self->param('host') eq 'ensweb-1-10') {
|
|
347 # ask user if he wants to proceed
|
|
348 exit unless $self->user_proceed("**************\n\n You're working on ensdb-1-10! Is that correct and you want to continue ?\n\n**************");
|
|
349 }
|
|
350 else {
|
|
351 # ask user if he wants to proceed
|
|
352 exit unless $self->user_proceed("Continue?");
|
|
353 }
|
|
354 return(1);
|
|
355 }
|
|
356
|
|
357 =head2 list_all_params
|
|
358
|
|
359 Example : print LOG $support->list_all_params;
|
|
360 Description : prints a table of the parameters used in the script
|
|
361 Return type : String - the table to print
|
|
362 Exceptions : none
|
|
363 Caller : general
|
|
364
|
|
365 =cut
|
|
366
|
|
367 sub list_all_params {
|
|
368 my $self = shift;
|
|
369 my $txt = sprintf " %-21s%-90s\n", qw(PARAMETER VALUE);
|
|
370 $txt .= " " . "-"x121 . "\n";
|
|
371 $Text::Wrap::colums = 130;
|
|
372 my @params = $self->allowed_params;
|
|
373 foreach my $key (@params) {
|
|
374 my @vals = $self->param($key);
|
|
375 if (@vals) {
|
|
376 $txt .= Text::Wrap::wrap( sprintf(' %-21s', $key),
|
|
377 ' 'x24,
|
|
378 join(", ", @vals)
|
|
379 ) . "\n";
|
|
380 }
|
|
381 }
|
|
382 $txt .= "\n";
|
|
383 return $txt;
|
|
384 }
|
|
385
|
|
386 =head2 create_commandline_options
|
|
387
|
|
388 Arg[1] : Hashref $settings - hashref describing what to do
|
|
389 Allowed keys:
|
|
390 allowed_params => 0|1 # use all allowed parameters
|
|
391 exclude => [] # listref of parameters to exclude
|
|
392 replace => {param => newval} # replace value of param with
|
|
393 # newval
|
|
394 Example : $support->create_commandline_options({
|
|
395 allowed_params => 1,
|
|
396 exclude => ['verbose'],
|
|
397 replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
|
|
398 });
|
|
399 Description : Creates a commandline options string that can be passed to any
|
|
400 other script using ConversionSupport.
|
|
401 Return type : String - commandline options string
|
|
402 Exceptions : none
|
|
403 Caller : general
|
|
404
|
|
405 =cut
|
|
406
|
|
407 sub create_commandline_options {
|
|
408 my ($self, $settings, $param_hash) = @_;
|
|
409 my %param_hash = $param_hash ? %$param_hash : ();
|
|
410
|
|
411 # get all allowed parameters
|
|
412 if ($settings->{'allowed_params'}) {
|
|
413 # exclude params explicitly stated
|
|
414 my %exclude = map { $_ => 1 } @{ $settings->{'exclude'} || [] };
|
|
415 foreach my $param ($self->allowed_params) {
|
|
416 unless ($exclude{$param}) {
|
|
417 my ($first, @rest) = $self->param($param);
|
|
418 next unless (defined($first));
|
|
419
|
|
420 if (@rest) {
|
|
421 $first = join(",", $first, @rest);
|
|
422 }
|
|
423 $param_hash{$param} = $first;
|
|
424 }
|
|
425 }
|
|
426 }
|
|
427
|
|
428 # replace values
|
|
429 foreach my $key (keys %{ $settings->{'replace'} || {} }) {
|
|
430 $param_hash{$key} = $settings->{'replace'}->{$key};
|
|
431 }
|
|
432
|
|
433 # create the commandline options string
|
|
434 my $options_string;
|
|
435 foreach my $param (keys %param_hash) {
|
|
436 $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
|
|
437 }
|
|
438 return $options_string;
|
|
439 }
|
|
440
|
|
441 =head2 check_required_params
|
|
442
|
|
443 Arg[1-N] : List @params - parameters to check
|
|
444 Example : $self->check_required_params(qw(dbname host port));
|
|
445 Description : Checks $self->param to make sure the requested parameters
|
|
446 have been set. Dies if parameters are missing.
|
|
447 Return type : true on success
|
|
448 Exceptions : none
|
|
449 Caller : general
|
|
450
|
|
451 =cut
|
|
452
|
|
453 sub check_required_params {
|
|
454 my ($self, @params) = @_;
|
|
455 my @missing = ();
|
|
456 foreach my $param (@params) {
|
|
457 push @missing, $param unless defined $self->param($param);
|
|
458 }
|
|
459 if (@missing) {
|
|
460 throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
|
|
461 }
|
|
462 return(1);
|
|
463 }
|
|
464
|
|
465 =head2 user_proceed
|
|
466
|
|
467 Arg[1] : (optional) String $text - notification text to present to user
|
|
468 Example : # run a code snipped conditionally
|
|
469 if ($support->user_proceed("Run the next code snipped?")) {
|
|
470 # run some code
|
|
471 }
|
|
472
|
|
473 # exit if requested by user
|
|
474 exit unless ($support->user_proceed("Want to continue?"));
|
|
475 Description : If running interactively, the user is asked if he wants to
|
|
476 perform a script action. If he doesn't, this section is skipped
|
|
477 and the script proceeds with the code. When running
|
|
478 non-interactively, the section is run by default.
|
|
479 Return type : TRUE to proceed, FALSE to skip.
|
|
480 Exceptions : none
|
|
481 Caller : general
|
|
482
|
|
483 =cut
|
|
484
|
|
485 sub user_proceed {
|
|
486 my ($self, $text) = @_;
|
|
487
|
|
488 if ($self->param('interactive')) {
|
|
489 print "$text\n" if $text;
|
|
490 print "[y/N] ";
|
|
491 my $input = lc(<>);
|
|
492 chomp $input;
|
|
493 unless ($input eq 'y') {
|
|
494 print "Skipping.\n";
|
|
495 return(0);
|
|
496 }
|
|
497 }
|
|
498
|
|
499 return(1);
|
|
500 }
|
|
501
|
|
502 =head2 user_confirm
|
|
503
|
|
504 Description : DEPRECATED - please use user_proceed() instead
|
|
505
|
|
506 =cut
|
|
507
|
|
508 sub user_confirm {
|
|
509 my $self = shift;
|
|
510 exit unless $self->user_proceed("Continue?");
|
|
511 }
|
|
512
|
|
513 =head2 read_user_input
|
|
514
|
|
515 Arg[1] : (optional) String $text - notification text to present to user
|
|
516 Example : my $ret = $support->read_user_input("Choose a number [1/2/3]");
|
|
517 if ($ret == 1) {
|
|
518 # do something
|
|
519 } elsif ($ret == 2) {
|
|
520 # do something else
|
|
521 }
|
|
522 Description : If running interactively, the user is asked for input.
|
|
523 Return type : String - user's input
|
|
524 Exceptions : none
|
|
525 Caller : general
|
|
526
|
|
527 =cut
|
|
528
|
|
529 sub read_user_input {
|
|
530 my ($self, $text) = @_;
|
|
531
|
|
532 if ($self->param('interactive')) {
|
|
533 print "$text\n" if $text;
|
|
534 my $input = <>;
|
|
535 chomp $input;
|
|
536 return $input;
|
|
537 }
|
|
538 }
|
|
539
|
|
540 =head2 comma_to_list
|
|
541
|
|
542 Arg[1-N] : list of parameter names to parse
|
|
543 Example : $support->comma_to_list('chromosomes');
|
|
544 Description : Transparently converts comma-separated lists into arrays (to
|
|
545 allow different styles of commandline options, see perldoc
|
|
546 Getopt::Long for details). Parameters are converted in place
|
|
547 (accessible through $self->param('name')).
|
|
548 Return type : true on success
|
|
549 Exceptions : none
|
|
550 Caller : general
|
|
551
|
|
552 =cut
|
|
553
|
|
554 sub comma_to_list {
|
|
555 my $self = shift;
|
|
556 foreach my $param (@_) {
|
|
557 $self->param($param,
|
|
558 split (/,/, join (',', $self->param($param))));
|
|
559 }
|
|
560 return(1);
|
|
561 }
|
|
562
|
|
563 =head2 list_or_file
|
|
564
|
|
565 Arg[1] : Name of parameter to parse
|
|
566 Example : $support->list_or_file('gene');
|
|
567 Description : Determines whether a parameter holds a list or it is a filename
|
|
568 to read the list entries from.
|
|
569 Return type : true on success
|
|
570 Exceptions : thrown if list file can't be opened
|
|
571 Caller : general
|
|
572
|
|
573 =cut
|
|
574
|
|
575 sub list_or_file {
|
|
576 my ($self, $param) = @_;
|
|
577 my @vals = $self->param($param);
|
|
578 return unless (@vals);
|
|
579
|
|
580 my $firstval = $vals[0];
|
|
581 if (scalar(@vals) == 1 && -e $firstval) {
|
|
582 # we didn't get a list of values, but a file to read values from
|
|
583 @vals = ();
|
|
584 open(IN, $firstval) or throw("Cannot open $firstval for reading: $!");
|
|
585 while(<IN>){
|
|
586 chomp;
|
|
587 push(@vals, $_);
|
|
588 }
|
|
589 close(IN);
|
|
590 $self->param($param, @vals);
|
|
591 }
|
|
592 $self->comma_to_list($param);
|
|
593 return(1);
|
|
594 }
|
|
595
|
|
596 =head2 param
|
|
597
|
|
598 Arg[1] : Parameter name
|
|
599 Arg[2-N] : (optional) List of values to set
|
|
600 Example : my $dbname = $support->param('dbname');
|
|
601 $support->param('port', 3306);
|
|
602 $support->param('chromosomes', 1, 6, 'X');
|
|
603 Description : Getter/setter for parameters. Accepts single-value params and
|
|
604 list params.
|
|
605 Return type : Scalar value for single-value parameters, array of values for
|
|
606 list parameters
|
|
607 Exceptions : thrown if no parameter name is supplied
|
|
608 Caller : general
|
|
609
|
|
610 =cut
|
|
611
|
|
612 sub param {
|
|
613 my $self = shift;
|
|
614 my $name = shift or throw("You must supply a parameter name");
|
|
615
|
|
616 # setter
|
|
617 if (@_) {
|
|
618 if (scalar(@_) == 1) {
|
|
619 # single value
|
|
620 $self->{'_param'}->{$name} = shift;
|
|
621 } else {
|
|
622 # list of values
|
|
623 undef $self->{'_param'}->{$name};
|
|
624 @{ $self->{'_param'}->{$name} } = @_;
|
|
625 }
|
|
626 }
|
|
627
|
|
628 # getter
|
|
629 if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
|
|
630 # list parameter
|
|
631 return @{ $self->{'_param'}->{$name} };
|
|
632 } elsif (defined($self->{'_param'}->{$name})) {
|
|
633 # single-value parameter
|
|
634 return $self->{'_param'}->{$name};
|
|
635 } else {
|
|
636 return ();
|
|
637 }
|
|
638 }
|
|
639
|
|
640 =head2 error
|
|
641
|
|
642 Arg[1] : (optional) String - error message
|
|
643 Example : $support->error("An error occurred: $@");
|
|
644 exit(0) if $support->error;
|
|
645 Description : Getter/setter for error messages
|
|
646 Return type : String - error message
|
|
647 Exceptions : none
|
|
648 Caller : general
|
|
649
|
|
650 =cut
|
|
651
|
|
652 sub error {
|
|
653 my $self = shift;
|
|
654 $self->{'_error'} = shift if (@_);
|
|
655 return $self->{'_error'};
|
|
656 }
|
|
657
|
|
658 =head2 warnings
|
|
659
|
|
660 Example : print LOG "There were ".$support->warnings." warnings.\n";
|
|
661 Description : Returns the number of warnings encountered while running the
|
|
662 script (the warning counter is increased by $self->log_warning).
|
|
663 Return type : Int - number of warnings
|
|
664 Exceptions : none
|
|
665 Caller : general
|
|
666
|
|
667 =cut
|
|
668
|
|
669 sub warnings {
|
|
670 my $self = shift;
|
|
671 return $self->{'_warnings'};
|
|
672 }
|
|
673
|
|
674 =head2 serverroot
|
|
675
|
|
676 Arg[1] : (optional) String - root directory of your ensembl sandbox
|
|
677 Example : my $serverroot = $support->serverroot;
|
|
678 Description : Getter/setter for the root directory of your ensembl sandbox.
|
|
679 This is set when ConversionSupport object is created, so
|
|
680 usually only used as a getter.
|
|
681 Return type : String - the server root directory
|
|
682 Exceptions : none
|
|
683 Caller : general
|
|
684
|
|
685 =cut
|
|
686
|
|
687 sub serverroot {
|
|
688 my $self = shift;
|
|
689 $self->{'_serverroot'} = shift if (@_);
|
|
690 return $self->{'_serverroot'};
|
|
691 }
|
|
692
|
|
693 =head2 get_database
|
|
694
|
|
695 Arg[1] : String $database - the type of database to connect to
|
|
696 (eg core, otter)
|
|
697 Arg[2] : (optional) String $prefix - the prefix used for retrieving the
|
|
698 connection settings from the configuration
|
|
699 Example : my $db = $support->get_database('core');
|
|
700 Description : Connects to the database specified.
|
|
701 Return type : DBAdaptor of the appropriate type
|
|
702 Exceptions : thrown if asking for unknown database
|
|
703 Caller : general
|
|
704
|
|
705 =cut
|
|
706
|
|
707 sub get_database {
|
|
708 my $self = shift;
|
|
709 my $database = shift or throw("You must provide a database");
|
|
710 my $prefix = shift || '';
|
|
711 $self->check_required_params(
|
|
712 "${prefix}host",
|
|
713 "${prefix}port",
|
|
714 "${prefix}user",
|
|
715 "${prefix}dbname",
|
|
716 );
|
|
717 my %adaptors = (
|
|
718 core => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
|
|
719 ensembl => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
|
|
720 evega => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
|
|
721 otter => 'Bio::Otter::DBSQL::DBAdaptor',
|
|
722 vega => 'Bio::Otter::DBSQL::DBAdaptor',
|
|
723 compara => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor',
|
|
724 loutre => 'Bio::Vega::DBSQL::DBAdaptor',
|
|
725 );
|
|
726 throw("Unknown database: $database") unless $adaptors{$database};
|
|
727
|
|
728 $self->dynamic_use($adaptors{$database});
|
|
729 my $species = 'species' . $species_c;
|
|
730 my $dba = $adaptors{$database}->new(
|
|
731 -host => $self->param("${prefix}host"),
|
|
732 -port => $self->param("${prefix}port"),
|
|
733 -user => $self->param("${prefix}user"),
|
|
734 -pass => $self->param("${prefix}pass") || '',
|
|
735 -dbname => $self->param("${prefix}dbname"),
|
|
736 -group => $database,
|
|
737 -species => $species,
|
|
738 );
|
|
739 #can use this approach to get dna from another db
|
|
740 # my $dna_db = $adaptors{$database}->new(
|
|
741 # -host => 'otterlive',
|
|
742 # -port => '3301',
|
|
743 # -user => $self->param("${prefix}user"),
|
|
744 # -pass => $self->param("${prefix}pass"),
|
|
745 # -dbname => 'loutre_human',
|
|
746 # );
|
|
747 # $dba->dnadb($dna_db);
|
|
748
|
|
749 # otherwise explicitely set the dnadb to itself - by default the Registry assumes
|
|
750 # a group 'core' for this now
|
|
751 $dba->dnadb($dba);
|
|
752
|
|
753 $species_c++;
|
|
754
|
|
755 $self->{'_dba'}->{$database} = $dba;
|
|
756 $self->{'_dba'}->{'default'} = $dba unless $self->{'_dba'}->{'default'};
|
|
757 return $self->{'_dba'}->{$database};
|
|
758 }
|
|
759
|
|
760
|
|
761 =head2 get_dbconnection
|
|
762
|
|
763 Arg[1] : (optional) String $prefix - the prefix used for retrieving the
|
|
764 connection settings from the configuration
|
|
765 Example : my $dbh = $self->get_dbconnection;
|
|
766 Description : Connects to the database server specified. You don't have to
|
|
767 specify a database name (this is useful for running commands
|
|
768 like $dbh->do('show databases')).
|
|
769 Return type : DBI database handle
|
|
770 Exceptions : thrown if connection fails
|
|
771 Caller : general
|
|
772 Status : At Risk
|
|
773
|
|
774 =cut
|
|
775
|
|
776 sub get_dbconnection {
|
|
777 my $self = shift;
|
|
778 my $prefix = shift;
|
|
779
|
|
780 $self->check_required_params(
|
|
781 "${prefix}host",
|
|
782 "${prefix}port",
|
|
783 "${prefix}user",
|
|
784 );
|
|
785
|
|
786 my $dsn = "DBI:" . ($self->param('driver')||'mysql') .
|
|
787 ":host=" . $self->param("${prefix}host") .
|
|
788 ";port=" . $self->param("${prefix}port");
|
|
789
|
|
790 if ($self->param("${prefix}dbname")) {
|
|
791 $dsn .= ";dbname=".$self->param("${prefix}dbname");
|
|
792 }
|
|
793
|
|
794 # warn $dsn;
|
|
795
|
|
796 my $dbh;
|
|
797 eval{
|
|
798 $dbh = DBI->connect($dsn, $self->param("${prefix}user"),
|
|
799 $self->param("${prefix}pass"), {'RaiseError' => 1, 'PrintError' => 0});
|
|
800 };
|
|
801
|
|
802 if (!$dbh || $@ || !$dbh->ping) {
|
|
803 $self->log_error("Could not connect to db server as user ".
|
|
804 $self->param("${prefix}user") .
|
|
805 " using [$dsn] as a locator:\n" . $DBI::errstr . $@);
|
|
806 }
|
|
807
|
|
808 $self->{'_dbh'} = $dbh;
|
|
809 return $self->{'_dbh'};
|
|
810
|
|
811 }
|
|
812
|
|
813
|
|
814 =head2 dba
|
|
815
|
|
816 Arg[1] : (optional) String $database - type of db apaptor to retrieve
|
|
817 Example : my $dba = $support->dba;
|
|
818 Description : Getter for database adaptor. Returns default (i.e. created
|
|
819 first) db adaptor if no argument is provided.
|
|
820 Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor
|
|
821 Exceptions : none
|
|
822 Caller : general
|
|
823
|
|
824 =cut
|
|
825
|
|
826 sub dba {
|
|
827 my ($self, $database) = shift;
|
|
828 return $self->{'_dba'}->{$database} || $self->{'_dba'}->{'default'};
|
|
829 }
|
|
830
|
|
831 =head2 dynamic_use
|
|
832
|
|
833 Arg [1] : String $classname - The name of the class to require/import
|
|
834 Example : $self->dynamic_use('Bio::EnsEMBL::DBSQL::DBAdaptor');
|
|
835 Description: Requires and imports the methods for the classname provided,
|
|
836 checks the symbol table so that it doesnot re-require modules
|
|
837 that have already been required.
|
|
838 Returntype : true on success
|
|
839 Exceptions : Warns to standard error if module fails to compile
|
|
840 Caller : internal
|
|
841
|
|
842 =cut
|
|
843
|
|
844 sub dynamic_use {
|
|
845 my ($self, $classname) = @_;
|
|
846 my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ? ($1,$2) : ('::', $classname);
|
|
847
|
|
848 no strict 'refs';
|
|
849 # return if module has already been imported
|
|
850 return 1 if $parent_namespace->{$module.'::'} && %{ $parent_namespace->{$module.'::'}||{} };
|
|
851
|
|
852 eval "require $classname";
|
|
853 throw("Failed to require $classname: $@") if ($@);
|
|
854 $classname->import();
|
|
855
|
|
856 return 1;
|
|
857 }
|
|
858
|
|
859 =head2 get_chrlength
|
|
860
|
|
861 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
|
|
862 Arg[2] : (optional) String $version - coord_system version
|
|
863 Arg[3] : (optional) String $type - type of region eg chromsome (defaults to 'toplevel')
|
|
864 Arg[4] : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX)
|
|
865 Arg[5] : (optional) Override chromosome parameter filtering with this array reference. Empty denotes all.
|
|
866 Example : my $chr_length = $support->get_chrlength($dba);
|
|
867 Description : Get all chromosomes and their length from the database. Return
|
|
868 chr_name/length for the chromosomes the user requested (or all
|
|
869 chromosomes by default)
|
|
870 Return type : Hashref - chromosome_name => length
|
|
871 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
872 Caller : general
|
|
873
|
|
874 =cut
|
|
875
|
|
876 sub get_chrlength {
|
|
877 my ($self, $dba, $version,$type,$include_non_reference,$chroms) = @_;
|
|
878 $dba ||= $self->dba;
|
|
879 $type ||= 'toplevel';
|
|
880 throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n")
|
|
881 unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
|
|
882
|
|
883 my $sa = $dba->get_SliceAdaptor;
|
|
884
|
|
885 my @chromosomes = map { $_->seq_region_name }
|
|
886 @{ $sa->fetch_all($type,$version,$include_non_reference) };
|
|
887 my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes;
|
|
888
|
|
889 my @wanted = $self->param('chromosomes');
|
|
890 @wanted = @$chroms if defined $chroms and ref($chroms) eq 'ARRAY';
|
|
891
|
|
892 if (@wanted) {
|
|
893 # check if user supplied invalid chromosome names
|
|
894 foreach my $chr (@wanted) {
|
|
895 my $found = 0;
|
|
896 foreach my $chr_from_db (keys %chr) {
|
|
897 if ($chr_from_db eq $chr) {
|
|
898 $found = 1;
|
|
899 last;
|
|
900 }
|
|
901 }
|
|
902 unless ($found) {
|
|
903 warning("Didn't find chromosome $chr in database " .
|
|
904 $self->param('dbname'));
|
|
905 }
|
|
906 }
|
|
907
|
|
908 # filter to requested chromosomes only
|
|
909 HASH:
|
|
910 foreach my $chr_from_db (keys %chr) {
|
|
911 foreach my $chr (@wanted) {
|
|
912 if ($chr_from_db eq $chr) {
|
|
913 next HASH;
|
|
914 }
|
|
915 }
|
|
916 delete($chr{$chr_from_db});
|
|
917 }
|
|
918 }
|
|
919
|
|
920 return \%chr;
|
|
921 }
|
|
922
|
|
923 =head2 get_ensembl_chr_mapping
|
|
924
|
|
925 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
|
|
926 Arg[2] : (optional) String $version - coord_system version
|
|
927 Example : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba);
|
|
928 Description : Gets a mapping between Vega chromosome names and their
|
|
929 equivalent Ensembl chromosomes. Works with non-reference chromosomes
|
|
930 Return type : Hashref - Vega name => Ensembl name
|
|
931 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
932 Caller : general
|
|
933
|
|
934 =cut
|
|
935
|
|
936 sub get_ensembl_chr_mapping {
|
|
937 my ($self, $dba, $version) = @_;
|
|
938 $dba ||= $self->dba;
|
|
939 throw("get_ensembl_chr_mapping should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
|
|
940
|
|
941 my $sa = $dba->get_SliceAdaptor;
|
|
942 my @chromosomes = map { $_->seq_region_name }
|
|
943 @{ $sa->fetch_all('chromosome', $version, 1) };
|
|
944
|
|
945 my %chrs;
|
|
946 foreach my $chr (@chromosomes) {
|
|
947 my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version);
|
|
948 my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') };
|
|
949 if ($ensembl_name_attr) {
|
|
950 $chrs{$chr} = $ensembl_name_attr->value;
|
|
951 } else {
|
|
952 $chrs{$chr} = $chr;
|
|
953 }
|
|
954 }
|
|
955 return \%chrs;
|
|
956 }
|
|
957
|
|
958 =head2 get_taxonomy_id
|
|
959
|
|
960 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
|
|
961 Example : my $sid = $support->get_taxonony_id($dba);
|
|
962 Description : Retrieves the taxononmy ID from the meta table
|
|
963 Return type : Int - the taxonomy ID
|
|
964 Exceptions : thrown if no taxonomy ID is found in the database
|
|
965 Caller : general
|
|
966
|
|
967 =cut
|
|
968
|
|
969 sub get_taxonomy_id {
|
|
970 my ($self, $dba) = @_;
|
|
971 $dba ||= $self->dba;
|
|
972 my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id"';
|
|
973 my $sth = $dba->dbc->db_handle->prepare($sql);
|
|
974 $sth->execute;
|
|
975 my ($tid) = $sth->fetchrow_array;
|
|
976 $sth->finish;
|
|
977 $self->throw("Could not determine taxonomy_id from database.") unless $tid;
|
|
978 return $tid;
|
|
979 }
|
|
980
|
|
981 =head2 get_species_scientific_name
|
|
982
|
|
983 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
|
|
984 Example : my $species = $support->get_species_scientific_name($dba);
|
|
985 Description : Retrieves the species scientific name (Genus species) from the
|
|
986 meta table
|
|
987 Return type : String - species scientific name
|
|
988 Exceptions : thrown if species name can not be determined from db
|
|
989 Caller : general
|
|
990
|
|
991 =cut
|
|
992
|
|
993 sub get_species_scientific_name {
|
|
994 my ($self, $dba) = @_;
|
|
995 $dba ||= $self->dba;
|
|
996 my $sql = "SELECT meta_value FROM meta WHERE meta_key = \'species.scientific_name\'";
|
|
997 my $sth = $dba->dbc->db_handle->prepare($sql);
|
|
998 $sth->execute;
|
|
999 my @sp;
|
|
1000 while (my @row = $sth->fetchrow_array) {
|
|
1001 push @sp, $row[0];
|
|
1002 }
|
|
1003 if (! @sp || @sp > 1) {
|
|
1004 $self->throw("Could not retrieve a single species scientific name from database.");
|
|
1005 }
|
|
1006 $sth->finish;
|
|
1007 my $species = $sp[0];
|
|
1008 $species =~ s/ /_/g;
|
|
1009 return $species;
|
|
1010 }
|
|
1011
|
|
1012 =head2 species
|
|
1013
|
|
1014 Arg[1] : (optional) String $species - species name to set
|
|
1015 Example : my $species = $support->species;
|
|
1016 my $url = "http://vega.sanger.ac.uk/$species/";
|
|
1017 Description : Getter/setter for species name (Genus_species). If not set, it's
|
|
1018 determined from database's meta table
|
|
1019 Return type : String - species name
|
|
1020 Exceptions : none
|
|
1021 Caller : general
|
|
1022
|
|
1023 =cut
|
|
1024
|
|
1025 sub species {
|
|
1026 my $self = shift;
|
|
1027 $self->{'_species'} = shift if (@_);
|
|
1028 # get species name from database if not set
|
|
1029 unless ($self->{'_species'}) {
|
|
1030 $self->{'_species'} = $self->get_species_scientific_name;
|
|
1031 }
|
|
1032 return $self->{'_species'};
|
|
1033 }
|
|
1034
|
|
1035 =head2 sort_chromosomes
|
|
1036
|
|
1037 Arg[1] : (optional) Hashref $chr_hashref - Hashref with chr_name as keys
|
|
1038 Example : my $chr = { '6-COX' => 1, '1' => 1, 'X' => 1 };
|
|
1039 my @sorted = $support->sort_chromosomes($chr);
|
|
1040 Description : Sorts chromosomes in an intuitive way (numerically, then
|
|
1041 alphabetically). If no chromosome hashref is passed, it's
|
|
1042 retrieve by calling $self->get_chrlength()
|
|
1043 Return type : List - sorted chromosome names
|
|
1044 Exceptions : thrown if no hashref is provided
|
|
1045 Caller : general
|
|
1046
|
|
1047 =cut
|
|
1048
|
|
1049 sub sort_chromosomes {
|
|
1050 my ($self, $chr_hashref) = @_;
|
|
1051 $chr_hashref = $self->get_chrlength unless ($chr_hashref);
|
|
1052 throw("You have to pass a hashref of your chromosomes")
|
|
1053 unless ($chr_hashref and ref($chr_hashref) eq 'HASH');
|
|
1054 return (sort _by_chr_num keys %$chr_hashref);
|
|
1055 }
|
|
1056
|
|
1057 =head2 _by_chr_num
|
|
1058
|
|
1059 Example : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
|
|
1060 Description : Subroutine to use in sort for sorting chromosomes. Sorts
|
|
1061 numerically, then alphabetically
|
|
1062 Return type : values to be used by sort
|
|
1063 Exceptions : none
|
|
1064 Caller : internal ($self->sort_chromosomes)
|
|
1065
|
|
1066 =cut
|
|
1067
|
|
1068 sub _by_chr_num {
|
|
1069 my @awords = split /-/, $a;
|
|
1070 my @bwords = split /-/, $b;
|
|
1071
|
|
1072 my $anum = $awords[0];
|
|
1073 my $bnum = $bwords[0];
|
|
1074
|
|
1075 if ($anum !~ /^[0-9]*$/) {
|
|
1076 if ($bnum !~ /^[0-9]*$/) {
|
|
1077 return $anum cmp $bnum;
|
|
1078 } else {
|
|
1079 return 1;
|
|
1080 }
|
|
1081 }
|
|
1082 if ($bnum !~ /^[0-9]*$/) {
|
|
1083 return -1;
|
|
1084 }
|
|
1085
|
|
1086 if ($anum <=> $bnum) {
|
|
1087 return $anum <=> $bnum;
|
|
1088 } else {
|
|
1089 if ($#awords == 0) {
|
|
1090 return -1;
|
|
1091 } elsif ($#bwords == 0) {
|
|
1092 return 1;
|
|
1093 } else {
|
|
1094 return $awords[1] cmp $bwords[1];
|
|
1095 }
|
|
1096 }
|
|
1097 }
|
|
1098
|
|
1099 =head2 split_chromosomes_by_size
|
|
1100
|
|
1101 Arg[1] : (optional) Int $cutoff - the cutoff in bp between small and
|
|
1102 large chromosomes
|
|
1103 Arg[2] : (optional) Boolean to include duplicate regions, ie PAR or not
|
|
1104 (default is no)
|
|
1105 Arg[3] : (optional) Coordsystem version to retrieve
|
|
1106
|
|
1107 Example : my $chr_slices = $support->split_chromosomes_by_size;
|
|
1108 foreach my $block_size (keys %{ $chr_slices }) {
|
|
1109 print "Chromosomes with blocksize $block_size: ";
|
|
1110 print join(", ", map { $_->seq_region_name }
|
|
1111 @{ $chr_slices->{$block_size} });
|
|
1112 }
|
|
1113 Description : Determines block sizes for storing DensityFeatures on
|
|
1114 chromosomes, and return slices for each chromosome. The block
|
|
1115 size is determined so that you have 150 bins for the smallest
|
|
1116 chromosome over 5 Mb in length. For chromosomes smaller than 5
|
|
1117 Mb, an additional smaller block size is used to yield 150 bins
|
|
1118 for the overall smallest chromosome. This will result in
|
|
1119 reasonable resolution for small chromosomes and high
|
|
1120 performance for big ones. Does not return non-reference seq_regions
|
|
1121 Return type : Hashref (key: block size; value: Arrayref of chromosome
|
|
1122 Bio::EnsEMBL::Slices)
|
|
1123 Exceptions : none
|
|
1124 Caller : density scripts
|
|
1125
|
|
1126 =cut
|
|
1127
|
|
1128 sub split_chromosomes_by_size {
|
|
1129 my $self = shift;
|
|
1130 my $cutoff = shift || 5000000;
|
|
1131 my $dup = shift || 0;
|
|
1132 my $cs_version = shift;
|
|
1133 my $include_non_reference = 1; #get non reference slices
|
|
1134 my $slice_adaptor = $self->dba->get_SliceAdaptor;
|
|
1135 my $top_slices;
|
|
1136 if ($self->param('chromosomes')) {
|
|
1137 foreach my $chr ($self->param('chromosomes')) {
|
|
1138 push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr);
|
|
1139 }
|
|
1140 } else {
|
|
1141 $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,$include_non_reference,$dup);
|
|
1142 }
|
|
1143
|
|
1144 # filter out patches, if present
|
|
1145 $top_slices = [ grep { $_->is_reference or $self->is_haplotype($_,$self->dba) } @$top_slices ];
|
|
1146
|
|
1147 my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);
|
|
1148 foreach my $slice (@{ $top_slices }) {
|
|
1149 next if ($slice->length eq 10000); #hack for chrY pseudoslice
|
|
1150 if ($slice->length < $cutoff) {
|
|
1151 if (! $min_small_chr or ($min_small_chr > $slice->length)) {
|
|
1152 $min_small_chr = $slice->length;
|
|
1153 }
|
|
1154 # push small chromosomes onto $small_chr
|
|
1155 push @{ $small_chr }, $slice;
|
|
1156 }
|
|
1157 elsif (! $min_big_chr or ($min_big_chr > $slice->length) ){
|
|
1158 $min_big_chr = $slice->length;
|
|
1159 }
|
|
1160 # push _all_ chromosomes onto $big_chr
|
|
1161 push @{ $big_chr }, $slice;
|
|
1162 }
|
|
1163 my $chr_slices;
|
|
1164 $chr_slices->{int($min_big_chr/150)} = $big_chr if $min_big_chr;
|
|
1165 $chr_slices->{int($min_small_chr/150)} = $small_chr if $min_small_chr;
|
|
1166 return $chr_slices;
|
|
1167 }
|
|
1168
|
|
1169 =head2 log
|
|
1170
|
|
1171 Arg[1] : String $txt - the text to log
|
|
1172 Arg[2] : Int $indent - indentation level for log message
|
|
1173 Example : my $log = $support->log_filehandle;
|
|
1174 $support->log('Log foo.\n', 1);
|
|
1175 Description : Logs a message to the filehandle initialised by calling
|
|
1176 $self->log_filehandle(). You can supply an indentation level
|
|
1177 to get nice hierarchical log messages.
|
|
1178 Return type : true on success
|
|
1179 Exceptions : thrown when no filehandle can be obtained
|
|
1180 Caller : general
|
|
1181
|
|
1182 =cut
|
|
1183
|
|
1184 sub log {
|
|
1185 my ($self, $txt, $indent) = @_;
|
|
1186 $indent ||= 0;
|
|
1187
|
|
1188 # strip off leading linebreaks so that indenting doesn't break
|
|
1189 $txt =~ s/^(\n*)//;
|
|
1190
|
|
1191 $txt = $1." "x$indent . $txt;
|
|
1192 my $fh = $self->{'_log_filehandle'};
|
|
1193 throw("Unable to obtain log filehandle") unless $fh;
|
|
1194 print $fh "$txt";
|
|
1195 return(1);
|
|
1196 }
|
|
1197
|
|
1198 =head2 lock_log
|
|
1199
|
|
1200 Description : Use flock-style locks to lock log and fastforward to end.
|
|
1201 Useful if log is being written to by multiple processes.
|
|
1202 =cut
|
|
1203
|
|
1204 sub lock_log {
|
|
1205 my ($self) = @_;
|
|
1206
|
|
1207 my $fh = $self->{'_log_filehandle'};
|
|
1208 return if -t $fh or -p $fh; # Shouldn't lock such things
|
|
1209 flock($self->{'_log_filehandle'},LOCK_EX) || return 0;
|
|
1210 seek($self->{'_log_filehandle'},0,SEEK_END); # fail ok, prob not reg file
|
|
1211 return 1;
|
|
1212 }
|
|
1213
|
|
1214 =head2 unlock_log
|
|
1215
|
|
1216 Description : Unlock log previously locked by lock_log.
|
|
1217
|
|
1218 =cut
|
|
1219
|
|
1220 sub unlock_log {
|
|
1221 my ($self) = @_;
|
|
1222
|
|
1223 my $fh = $self->{'_log_filehandle'};
|
|
1224 return if -t $fh or -p $fh; # We don't lock such things
|
|
1225 # flush is implicit in flock
|
|
1226 flock($self->{'_log_filehandle'},LOCK_UN) || return 0;
|
|
1227 return 1;
|
|
1228 }
|
|
1229
|
|
1230 =head2 log_warning
|
|
1231
|
|
1232 Arg[1] : String $txt - the warning text to log
|
|
1233 Arg[2] : Int $indent - indentation level for log message
|
|
1234 Arg[3] : Bool - add a line break before warning if true
|
|
1235 Example : my $log = $support->log_filehandle;
|
|
1236 $support->log_warning('Log foo.\n', 1);
|
|
1237 Description : Logs a message via $self->log and increases the warning counter.
|
|
1238 Return type : true on success
|
|
1239 Exceptions : none
|
|
1240 Caller : general
|
|
1241
|
|
1242 =cut
|
|
1243
|
|
1244 sub log_warning {
|
|
1245 my ($self, $txt, $indent, $break) = @_;
|
|
1246 $txt = "WARNING: " . $txt;
|
|
1247 $txt = "\n$txt" if ($break);
|
|
1248 $self->log($txt, $indent);
|
|
1249 $self->{'_warnings'}++;
|
|
1250 return(1);
|
|
1251 }
|
|
1252
|
|
1253 =head2 log_error
|
|
1254
|
|
1255 Arg[1] : String $txt - the error text to log
|
|
1256 Arg[2] : Int $indent - indentation level for log message
|
|
1257 Example : my $log = $support->log_filehandle;
|
|
1258 $support->log_error('Log foo.\n', 1);
|
|
1259 Description : Logs a message via $self->log and exits the script.
|
|
1260 Return type : none
|
|
1261 Exceptions : none
|
|
1262 Caller : general
|
|
1263
|
|
1264 =cut
|
|
1265
|
|
1266 sub log_error {
|
|
1267 my ($self, $txt, $indent) = @_;
|
|
1268 $txt = "ERROR: ".$txt;
|
|
1269 $self->log($txt, $indent);
|
|
1270 $self->log("Exiting.\n");
|
|
1271 exit;
|
|
1272 }
|
|
1273
|
|
1274 =head2 log_verbose
|
|
1275
|
|
1276 Arg[1] : String $txt - the warning text to log
|
|
1277 Arg[2] : Int $indent - indentation level for log message
|
|
1278 Example : my $log = $support->log_filehandle;
|
|
1279 $support->log_verbose('Log this verbose message.\n', 1);
|
|
1280 Description : Logs a message via $self->log if --verbose option was used
|
|
1281 Return type : TRUE on success, FALSE if not verbose
|
|
1282 Exceptions : none
|
|
1283 Caller : general
|
|
1284
|
|
1285 =cut
|
|
1286
|
|
1287 sub log_verbose {
|
|
1288 my ($self, $txt, $indent) = @_;
|
|
1289 return(0) unless $self->param('verbose');
|
|
1290 $self->log($txt, $indent);
|
|
1291 return(1);
|
|
1292 }
|
|
1293
|
|
1294 =head2 log_stamped
|
|
1295
|
|
1296 Arg[1] : String $txt - the warning text to log
|
|
1297 Arg[2] : Int $indent - indentation level for log message
|
|
1298 Example : my $log = $support->log_filehandle;
|
|
1299 $support->log_stamped('Log this stamped message.\n', 1);
|
|
1300 Description : Appends timestamp and memory usage to a message and logs it via
|
|
1301 $self->log
|
|
1302 Return type : TRUE on success
|
|
1303 Exceptions : none
|
|
1304 Caller : general
|
|
1305
|
|
1306 =cut
|
|
1307
|
|
1308 sub log_stamped {
|
|
1309 my ($self, $txt, $indent) = @_;
|
|
1310 # append timestamp and memory usage to log text
|
|
1311 $txt =~ s/(\n*)$//;
|
|
1312 $txt .= " ".$self->date_and_mem.$1;
|
|
1313 $self->log($txt, $indent);
|
|
1314 return(1);
|
|
1315 }
|
|
1316
|
|
1317 =head2 log_filehandle
|
|
1318
|
|
1319 Arg[1] : (optional) String $mode - file access mode
|
|
1320 Example : my $log = $support->log_filehandle;
|
|
1321 # print to the filehandle
|
|
1322 print $log 'Lets start logging...\n';
|
|
1323 # log via the wrapper $self->log()
|
|
1324 $support->log('Another log message.\n');
|
|
1325 Description : Returns a filehandle for logging (STDERR by default, logfile if
|
|
1326 set from config or commandline). You can use the filehandle
|
|
1327 directly to print to, or use the smart wrapper $self->log().
|
|
1328 Logging mode (truncate or append) can be set by passing the
|
|
1329 mode as an argument to log_filehandle(), or with the
|
|
1330 --logappend commandline option (default: truncate)
|
|
1331 Return type : Filehandle - the filehandle to log to
|
|
1332 Exceptions : thrown if logfile can't be opened
|
|
1333 Caller : general
|
|
1334
|
|
1335 =cut
|
|
1336
|
|
1337 sub log_filehandle {
|
|
1338 my ($self, $mode, $date) = @_;
|
|
1339 $mode ||= '>';
|
|
1340 $mode = '>>' if ($self->param('logappend'));
|
|
1341 my $fh = \*STDERR;
|
|
1342 if (my $logfile = $self->param('logfile')) {
|
|
1343 $logfile .= "_$date" if $date;
|
|
1344 if (my $logpath = $self->param('logpath')) {
|
|
1345 unless (-e $logpath) {
|
|
1346 system("mkdir $logpath") == 0 or
|
|
1347 $self->log_error("Can't create log dir $logpath: $!\n");
|
|
1348 }
|
|
1349 $logfile = "$logpath/$logfile";
|
|
1350 }
|
|
1351 open($fh, "$mode", $logfile) or throw(
|
|
1352 "Unable to open $logfile for writing: $!");
|
|
1353 }
|
|
1354 $self->{'_log_filehandle'} = $fh;
|
|
1355 return $self->{'_log_filehandle'};
|
|
1356 }
|
|
1357
|
|
1358 =head2 filehandle
|
|
1359
|
|
1360 Arg[1] : String $mode - file access mode
|
|
1361 Arg[2] : String $file - input or output file
|
|
1362 Example : my $fh = $support->filehandle('>>', '/path/to/file');
|
|
1363 # print to the filehandle
|
|
1364 print $fh 'Your text goes here...\n';
|
|
1365 Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading
|
|
1366 by default) to print to or read from.
|
|
1367 Return type : Filehandle - the filehandle
|
|
1368 Exceptions : thrown if file can't be opened
|
|
1369 Caller : general
|
|
1370
|
|
1371 =cut
|
|
1372
|
|
1373 sub filehandle {
|
|
1374 my ($self, $mode, $file) = @_;
|
|
1375 $mode ||= ">";
|
|
1376 my $fh;
|
|
1377 if ($file) {
|
|
1378 open($fh, "$mode", $file) or throw(
|
|
1379 "Unable to open $file for writing: $!");
|
|
1380 } elsif ($mode =~ />/) {
|
|
1381 $fh = \*STDOUT;
|
|
1382 } elsif ($mode =~ /</) {
|
|
1383 $fh = \*STDIN;
|
|
1384 }
|
|
1385 return $fh;
|
|
1386 }
|
|
1387
|
|
1388 =head2 init_log_date
|
|
1389
|
|
1390 Example : $support->init_log_date;
|
|
1391 Description : Opens a filehandle to a logfile with the date in the file name
|
|
1392 Return type : Filehandle - the log filehandle
|
|
1393 Exceptions : none
|
|
1394 Caller : general
|
|
1395
|
|
1396 =cut
|
|
1397
|
|
1398 sub init_log_date {
|
|
1399 my $self = shift;
|
|
1400 my $date = $self->date;
|
|
1401 return $self->init_log($date);
|
|
1402 }
|
|
1403
|
|
1404 =head2 init_log
|
|
1405
|
|
1406 Example : $support->init_log;
|
|
1407 Description : Opens a filehandle to the logfile and prints some header
|
|
1408 information to this file. This includes script name, date, user
|
|
1409 running the script and parameters the script will be running
|
|
1410 with.
|
|
1411 Return type : Filehandle - the log filehandle
|
|
1412 Exceptions : none
|
|
1413 Caller : general
|
|
1414
|
|
1415 =cut
|
|
1416
|
|
1417 sub init_log {
|
|
1418 my $self = shift;
|
|
1419 my $date = shift;
|
|
1420
|
|
1421 # get a log filehandle
|
|
1422 my $log = $self->log_filehandle(undef,$date);
|
|
1423
|
|
1424 # print script name, date, user who is running it
|
|
1425 my $hostname = `hostname`;
|
|
1426 chomp $hostname;
|
|
1427 my $script = "$hostname:$Bin/$Script";
|
|
1428 my $user = `whoami`;
|
|
1429 chomp $user;
|
|
1430 $self->log("Script: $script\nDate: ".$self->date_and_time."\nUser: $user\n");
|
|
1431
|
|
1432 # print parameters the script is running with
|
|
1433 $self->log("Parameters:\n\n");
|
|
1434 $self->log($self->list_all_params);
|
|
1435
|
|
1436 # remember start time
|
|
1437 $self->{'_start_time'} = time;
|
|
1438
|
|
1439 return $log;
|
|
1440 }
|
|
1441
|
|
1442 =head2 finish_log
|
|
1443
|
|
1444 Example : $support->finish_log;
|
|
1445 Description : Writes footer information to a logfile. This includes the
|
|
1446 number of logged warnings, timestamp and memory footprint.
|
|
1447 Return type : TRUE on success
|
|
1448 Exceptions : none
|
|
1449 Caller : general
|
|
1450
|
|
1451 =cut
|
|
1452
|
|
1453 sub finish_log {
|
|
1454 my $self = shift;
|
|
1455 $self->log("\nAll done. ".$self->warnings." warnings. ");
|
|
1456 if ($self->{'_start_time'}) {
|
|
1457 $self->log("Runtime ");
|
|
1458 my $diff = time - $self->{'_start_time'};
|
|
1459 my $sec = $diff % 60;
|
|
1460 $diff = ($diff - $sec) / 60;
|
|
1461 my $min = $diff % 60;
|
|
1462 my $hours = ($diff - $min) / 60;
|
|
1463 $self->log("${hours}h ${min}min ${sec}sec ");
|
|
1464 }
|
|
1465 $self->log($self->date_and_mem."\n\n");
|
|
1466 return(1);
|
|
1467 }
|
|
1468
|
|
1469 =head2 date_and_mem
|
|
1470
|
|
1471 Example : print LOG "Time, memory usage: ".$support->date_and_mem."\n";
|
|
1472 Description : Prints a timestamp and the memory usage of your script.
|
|
1473 Return type : String - timestamp and memory usage
|
|
1474 Exceptions : none
|
|
1475 Caller : general
|
|
1476
|
|
1477 =cut
|
|
1478
|
|
1479 sub date_and_mem {
|
|
1480 my $date = strftime "%Y-%m-%d %T", localtime;
|
|
1481 my $mem = `ps -p $$ -o vsz |tail -1`;
|
|
1482 chomp $mem;
|
|
1483 return "[$date, mem $mem]";
|
|
1484 }
|
|
1485
|
|
1486 =head2 date
|
|
1487
|
|
1488 Example : print "Date: " . $support->date . "\n";
|
|
1489 Description : Prints a nicely formatted datetamp (YYYY-DD-MM)
|
|
1490 Return type : String - the timestamp
|
|
1491 Exceptions : none
|
|
1492 Caller : general
|
|
1493
|
|
1494 =cut
|
|
1495
|
|
1496 sub date {
|
|
1497 return strftime "%Y-%m-%d", localtime;
|
|
1498 }
|
|
1499
|
|
1500 =head2 date_and_time
|
|
1501
|
|
1502 Example : print "Date: " . $support->date . "\n";
|
|
1503 Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
|
|
1504 Return type : String - the timestamp
|
|
1505 Exceptions : none
|
|
1506 Caller : general
|
|
1507
|
|
1508 =cut
|
|
1509
|
|
1510 sub date_and_time {
|
|
1511 return strftime "%Y-%m-%d %T", localtime;
|
|
1512 }
|
|
1513
|
|
1514 =head2 format_time
|
|
1515
|
|
1516 Example : print $support->format_time($gene->modifed_date) . "\n";
|
|
1517 Description : Prints timestamps from the database
|
|
1518 Return type : String - nicely formatted time stamp
|
|
1519 Exceptions : none
|
|
1520 Caller : general
|
|
1521
|
|
1522 =cut
|
|
1523
|
|
1524
|
|
1525 sub date_format {
|
|
1526 my( $self, $time, $format ) = @_;
|
|
1527 my( $d,$m,$y) = (localtime($time))[3,4,5];
|
|
1528 my %S = ('d'=>sprintf('%02d',$d),'m'=>sprintf('%02d',$m+1),'y'=>$y+1900);
|
|
1529 (my $res = $format ) =~s/%(\w)/$S{$1}/ge;
|
|
1530 return $res;
|
|
1531 }
|
|
1532
|
|
1533
|
|
1534 =head2 mem
|
|
1535
|
|
1536 Example : print "Memory usage: " . $support->mem . "\n";
|
|
1537 Description : Prints the memory used by your script. Not sure about platform
|
|
1538 dependence of this call ...
|
|
1539 Return type : String - memory usage
|
|
1540 Exceptions : none
|
|
1541 Caller : general
|
|
1542
|
|
1543 =cut
|
|
1544
|
|
1545 sub mem {
|
|
1546 my $mem = `ps -p $$ -o vsz |tail -1`;
|
|
1547 chomp $mem;
|
|
1548 return $mem;
|
|
1549 }
|
|
1550
|
|
1551 =head2 commify
|
|
1552
|
|
1553 Arg[1] : Int $num - a number to commify
|
|
1554 Example : print "An easy to read number: ".$self->commify(100000000);
|
|
1555 # will print 100,000,000
|
|
1556 Description : put commas into a number to make it easier to read
|
|
1557 Return type : a string representing the commified number
|
|
1558 Exceptions : none
|
|
1559 Caller : general
|
|
1560 Status : stable
|
|
1561
|
|
1562 =cut
|
|
1563
|
|
1564 sub commify {
|
|
1565 my $self = shift;
|
|
1566 my $num = shift;
|
|
1567
|
|
1568 $num = reverse($num);
|
|
1569 $num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
|
|
1570
|
|
1571 return scalar reverse $num;
|
|
1572 }
|
|
1573
|
|
1574 =head2 fetch_non_hidden_slices
|
|
1575
|
|
1576 Arg[1] : B::E::SliceAdaptor
|
|
1577 Arg[2] : B::E::AttributeAdaptor
|
|
1578 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
|
|
1579 Arg[4] : string $coord_system_version (optional) - 'otter' by default
|
|
1580 Example : $chroms = $support->fetch_non_hidden_slice($sa,$aa);
|
|
1581 Description : retrieve all slices from a loutre database that don't have a hidden attribute.
|
|
1582 Doesn't retrieve non-reference slices
|
|
1583 Return type : arrayref
|
|
1584 Caller : general
|
|
1585 Status : stable
|
|
1586
|
|
1587 =cut
|
|
1588
|
|
1589 sub fetch_non_hidden_slices {
|
|
1590 my $self = shift;
|
|
1591 my $aa = shift or throw("You must supply an attribute adaptor");
|
|
1592 my $sa = shift or throw("You must supply a slice adaptor");
|
|
1593 my $cs = shift || 'chromosome';
|
|
1594 my $cv = shift || 'Otter';
|
|
1595 my $visible_chroms;
|
|
1596 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
|
|
1597 my $chrom_name = $chrom->name;
|
|
1598 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
|
|
1599 if ( scalar(@$attribs) > 1 ) {
|
|
1600 $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
|
|
1601 }
|
|
1602 elsif ($attribs->[0]->value == 0) {
|
|
1603 push @$visible_chroms, $chrom;
|
|
1604 }
|
|
1605 elsif ($attribs->[0]->value == 1) {
|
|
1606 $self->log_verbose("chromosome $chrom_name is hidden\n");
|
|
1607 }
|
|
1608 else {
|
|
1609 $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
|
|
1610 }
|
|
1611 }
|
|
1612 return $visible_chroms;
|
|
1613 }
|
|
1614
|
|
1615 =head2 get_non_hidden_slice_names
|
|
1616
|
|
1617 Arg[1] : B::E::SliceAdaptor
|
|
1618 Arg[2] : B::E::AttributeAdaptor
|
|
1619 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
|
|
1620 Arg[4] : string $coord_system_version (optional) - 'otter' by default
|
|
1621 Example : $chrom_names = $support->get_non_hidden_slice_names($sa,$aa);
|
|
1622 Description : retrieve names of all slices from a loutre database that don't have a hidden attribute.
|
|
1623 Doesn't retrieve non-reference slices
|
|
1624 Return type : arrayref of names of all non-hidden slices
|
|
1625 Caller : general
|
|
1626 Status : stable
|
|
1627
|
|
1628 =cut
|
|
1629
|
|
1630 sub get_non_hidden_slice_names {
|
|
1631 my $self = shift;
|
|
1632 my $aa = shift or throw("You must supply an attribute adaptor");
|
|
1633 my $sa = shift or throw("You must supply a slice adaptor");
|
|
1634 my $cs = shift || 'chromosome';
|
|
1635 my $cv = shift || 'Otter';
|
|
1636 my $visible_chrom_names;
|
|
1637 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
|
|
1638 my $chrom_name = $chrom->seq_region_name;
|
|
1639 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
|
|
1640 if ( scalar(@$attribs) > 1 ) {
|
|
1641 $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
|
|
1642 }
|
|
1643 elsif ($attribs->[0]->value == 0) {
|
|
1644 push @$visible_chrom_names, $chrom_name;
|
|
1645 }
|
|
1646 elsif ($attribs->[0]->value == 1) {
|
|
1647 $self->log_verbose("chromosome $chrom_name is hidden\n");
|
|
1648 }
|
|
1649 else {
|
|
1650 $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
|
|
1651 }
|
|
1652 }
|
|
1653 return $visible_chrom_names;
|
|
1654 }
|
|
1655
|
|
1656
|
|
1657 =head2 get_wanted_chromosomes
|
|
1658
|
|
1659 Arg[1] : B::E::SliceAdaptor
|
|
1660 Arg[2] : B::E::AttributeAdaptor
|
|
1661 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
|
|
1662 Arg[4] : string $coord_system_version (optional) - 'otter' by default
|
|
1663 Example : $chr_names = $support->get_wanted_chromosomes($laa,$lsa);
|
|
1664 Description : retrieve names of slices from a lutra database that are ready for dumping to Vega.
|
|
1665 Deals with list of names to ignore (ignore_chr = LIST)
|
|
1666 Return type : arrayref of slices
|
|
1667 Caller : general
|
|
1668 Status : stable
|
|
1669
|
|
1670 =cut
|
|
1671
|
|
1672 sub get_wanted_chromosomes {
|
|
1673 my $self = shift;
|
|
1674 my $aa = shift or throw("You must supply an attribute adaptor");
|
|
1675 my $sa = shift or throw("You must supply a slice adaptor");
|
|
1676 my $cs = shift || 'chromosome';
|
|
1677 my $cv = shift || 'Otter';
|
|
1678 my $export_mode = $self->param('release_type');
|
|
1679 my $release = $self->param('vega_release');
|
|
1680 my $names;
|
|
1681 my $chroms = $self->fetch_non_hidden_slices($aa,$sa,$cs,$cv);
|
|
1682 CHROM:
|
|
1683 foreach my $chrom (@$chroms) {
|
|
1684 my $attribs = $aa->fetch_all_by_Slice($chrom);
|
|
1685 my $vals = $self->get_attrib_values($attribs,'vega_export_mod');
|
|
1686 if (scalar(@$vals > 1)) {
|
|
1687 $self->log_warning ("Multiple attribs for \'vega_export_mod\', please fix before continuing");
|
|
1688 exit;
|
|
1689 }
|
|
1690 next CHROM if (! grep { $_ eq $export_mode} @$vals);
|
|
1691 $vals = $self->get_attrib_values($attribs,'vega_release',$release);
|
|
1692 if (scalar(@$vals > 1)) {
|
|
1693 $self->log_warning ("Multiple attribs for \'vega_release\' value = $release , please fix before continuing");
|
|
1694 exit;
|
|
1695 }
|
|
1696 next CHROM if (! grep { $_ eq $release} @$vals);
|
|
1697 my $name = $chrom->seq_region_name;
|
|
1698 if (my @ignored = $self->param('ignore_chr')) {
|
|
1699 next CHROM if (grep {$_ eq $name} @ignored);
|
|
1700 }
|
|
1701 push @{$names}, $name;
|
|
1702 }
|
|
1703 return $names;
|
|
1704 }
|
|
1705
|
|
1706 =head2 is_haplotype
|
|
1707
|
|
1708 Arg[1] : B::E::Slice
|
|
1709 Arg[2]: : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
|
|
1710 Description : Is the slice a Vega haplotype? At the moment this is
|
|
1711 implemented by testing for presence of vega_ref_chrom but non_ref
|
|
1712 which is correct in practice, but really misses the prupose of
|
|
1713 vega_ref_chrom, so this might bite us if that changes.
|
|
1714 Return type : boolean
|
|
1715
|
|
1716 =cut
|
|
1717
|
|
1718 sub is_haplotype {
|
|
1719 my ($self,$slice,$dba) = @_;
|
|
1720
|
|
1721 $dba ||= $self->dba;
|
|
1722 my $aa = $dba->get_adaptor('Attribute');
|
|
1723
|
|
1724 my $attribs = $aa->fetch_all_by_Slice($slice);
|
|
1725 return (@{$self->get_attrib_values($attribs,'vega_ref_chrom')} and
|
|
1726 @{$self->get_attrib_values($attribs,'non_ref',1)});
|
|
1727 }
|
|
1728
|
|
1729 =head2 get_unique_genes
|
|
1730
|
|
1731 Arg[1] : B::E::Slice
|
|
1732 Arg[2] : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
|
|
1733 Example : $genes = $support->get_unique_genes($slice,$dba);
|
|
1734 Description : Retrieve genes that are only on the slice itself - used for human where assembly patches
|
|
1735 are in the assembly_exception table. Needs the PATCHes to have 'non_ref' seq_region_attributes.
|
|
1736 Return type : arrayref of genes
|
|
1737 Caller : general
|
|
1738 Status : stable
|
|
1739
|
|
1740 =cut
|
|
1741
|
|
1742 sub get_unique_genes {
|
|
1743 my $self = shift;
|
|
1744 my ($slice,$dba) = @_;
|
|
1745 $slice or throw("You must supply a slice");
|
|
1746 $dba ||= $self->dba;
|
|
1747
|
|
1748 my $sa = $dba->get_adaptor('Slice');
|
|
1749 my $ga = $dba->get_adaptor('Gene');
|
|
1750 my $patch = 0;
|
|
1751 my $genes = [];
|
|
1752 if ( ! $slice->is_reference() and ! $self->is_haplotype($slice,$dba) ) {
|
|
1753 # if ( 0 ) {
|
|
1754 $patch = 1;
|
|
1755 my $slices = $sa->fetch_by_region_unique( $slice->coord_system_name(),$slice->seq_region_name(),undef,undef,undef,$slice->coord_system()->version() );
|
|
1756 foreach my $slice ( @{$slices} ) {
|
|
1757 push @$genes,@{$ga->fetch_all_by_Slice($slice)};
|
|
1758 # my $start = $slice->start;
|
|
1759 }
|
|
1760 }
|
|
1761 else {
|
|
1762 $genes = $ga->fetch_all_by_Slice($slice);
|
|
1763 }
|
|
1764 return ($genes, $patch);
|
|
1765 }
|
|
1766
|
|
1767
|
|
1768
|
|
1769 =head2 get_attrib_values
|
|
1770
|
|
1771 Arg[1] : Arrayref of B::E::Attributes
|
|
1772 Arg[2] : 'code' to search for
|
|
1773 Arg[3] : 'value' to search for (optional)
|
|
1774 Example : my $c = $self->get_attrib_values($attribs,'name'));
|
|
1775 Description : (i) In the absence of an attribute value argument, examines an arrayref
|
|
1776 of B::E::Attributes for a particular attribute type, returning the values
|
|
1777 for each attribute of that type. Can therefore be used to test for the
|
|
1778 number of attributes of that type.
|
|
1779 (ii) In the presence of the optional value argument it returns all
|
|
1780 attributes with that value ie can be used to test for the presence of an
|
|
1781 attribute with that particular value.
|
|
1782 Return type : arrayref of values for that attribute
|
|
1783 Caller : general
|
|
1784 Status : stable
|
|
1785
|
|
1786 =cut
|
|
1787
|
|
1788 sub get_attrib_values {
|
|
1789 my $self = shift;
|
|
1790 my $attribs = shift;
|
|
1791 my $code = shift;
|
|
1792 my $value = shift;
|
|
1793 if (my @atts = grep {$_->code eq $code } @$attribs) {
|
|
1794 my $r = [];
|
|
1795 if ($value) {
|
|
1796 if (my @values = grep {$_->value eq $value} @atts) {
|
|
1797 foreach (@values) {
|
|
1798 push @$r, $_->value;
|
|
1799 }
|
|
1800 return $r;
|
|
1801 }
|
|
1802 else {
|
|
1803 return [];
|
|
1804 }
|
|
1805 }
|
|
1806 else {
|
|
1807 foreach (@atts) {
|
|
1808 push @$r, $_->value;
|
|
1809 }
|
|
1810 return $r;
|
|
1811 }
|
|
1812 }
|
|
1813 else {
|
|
1814 return [];
|
|
1815 }
|
|
1816 }
|
|
1817
|
|
1818 =head2 fix_attrib_value
|
|
1819
|
|
1820 Arg[1] : Arrayref of existing B::E::Attributes
|
|
1821 Arg[2] : dbID of object
|
|
1822 Arg[3] : name of object (just for reporting)
|
|
1823 Arg[4] : attrib_type.code
|
|
1824 Arg[5] : attrib_type.value
|
|
1825 Arg[6] : interactive ? (0 by default)
|
|
1826 Arg[7] : table
|
|
1827 Example : $support->fix_attrib_value($attribs,$chr_id,$chr_name,'vega_export_mod','N',1);
|
|
1828 Description : adds a new attribute to an object, or updates an existing attribute with a new value
|
|
1829 Can be run in interactive or non-interactive mode (default)
|
|
1830 Return type : arrayref of results
|
|
1831 Caller : general
|
|
1832 Status : only ever tested with seq_region_attributes to date
|
|
1833
|
|
1834 =cut
|
|
1835
|
|
1836 sub fix_attrib_value {
|
|
1837 my $self = shift;
|
|
1838 my $attribs = shift;
|
|
1839 my $id = shift;
|
|
1840 my $name = shift;
|
|
1841 my $code = shift;
|
|
1842 my $value = shift;
|
|
1843 my $interact = shift || 0;
|
|
1844 my $table = shift || 'seq_region_attrib';
|
|
1845
|
|
1846 #transiently set interactive parameter to zero
|
|
1847 my $int_before;
|
|
1848 if (! $interact) {
|
|
1849 $int_before = $self->param('interactive');
|
|
1850 $self->param('interactive',0);
|
|
1851 }
|
|
1852
|
|
1853 #get any existing value(s) for this attribute
|
|
1854 my $existings = $self->get_attrib_values($attribs,$code);
|
|
1855
|
|
1856 #add a new attribute if there is none...
|
|
1857 if (! @$existings ) {
|
|
1858 if ($self->user_proceed("Do you want to set $name attrib (code = $code) to value $value ?")) {
|
|
1859 my $r = $self->store_new_attribute($id,$code,$value);
|
|
1860
|
|
1861 #reset interactive parameter
|
|
1862 $self->param('interactive',$int_before) if (! $interact);
|
|
1863 return $r;
|
|
1864 }
|
|
1865 }
|
|
1866 #...warn and exit if you're trying to update more than one value for the same attribute...
|
|
1867 elsif (scalar @$existings > 1) {
|
|
1868 $self->log_warning("You shouldn't be trying to update multiple attributes with the same code at once ($name:$code,$value), looks like you have duplicate entries in the (seq_region_)attrib table\n");
|
|
1869 exit;
|
|
1870 }
|
|
1871
|
|
1872 #...or update an attribute with new values...
|
|
1873 else {
|
|
1874 my $existing = $existings->[0];
|
|
1875 if ($existing ne $value) {
|
|
1876 if ($self->user_proceed("Do you want to reset $name attrib (code = $code) from $existing to $value ?")) {
|
|
1877 my $r = $self->update_attribute($id,$code,$value);
|
|
1878 $self->param('interactive',$int_before) if (! $interact);
|
|
1879 push @$r, $existing;
|
|
1880 return $r;
|
|
1881 }
|
|
1882 }
|
|
1883 #...or make no change
|
|
1884 else {
|
|
1885 $self->param('interactive',$int_before) if (! $interact);
|
|
1886 return [];
|
|
1887 }
|
|
1888 }
|
|
1889 }
|
|
1890
|
|
1891 =head2 _get_attrib_id
|
|
1892
|
|
1893 Arg[1] : attrib_type.code
|
|
1894 Arg[2] : database handle
|
|
1895 Example : $self->_get_attrib_id('name',$dbh)
|
|
1896 Description : get attrib_type.attrib_type_id from a attrib_type.code
|
|
1897 Return type : attrib_type.attrib_type_id
|
|
1898 Caller : internal
|
|
1899 Status : stable
|
|
1900
|
|
1901 =cut
|
|
1902
|
|
1903 sub _get_attrib_id {
|
|
1904 my $self = shift;
|
|
1905 my $attrib_code = shift;
|
|
1906 my $dbh = shift;
|
|
1907 my ($attrib_id) = $dbh->selectrow_array(
|
|
1908 qq(select attrib_type_id
|
|
1909 from attrib_type
|
|
1910 where code = ?),
|
|
1911 {},
|
|
1912 ($attrib_code)
|
|
1913 );
|
|
1914 if (! $attrib_id) {
|
|
1915 $self->log_warning("There is no attrib_type_id for code $attrib_code, please patch the attrib_table\n");
|
|
1916 exit;
|
|
1917 }
|
|
1918 else {
|
|
1919 return $attrib_id;
|
|
1920 }
|
|
1921 }
|
|
1922
|
|
1923 =head2 store_new_attribute
|
|
1924
|
|
1925 Arg[1] : seq_region.seq_region_id
|
|
1926 Arg[2] : attrib_type.code
|
|
1927 Arg[3] : attrib_type.value
|
|
1928 ARG[4] : table to update (seq_region_attribute by default)
|
|
1929 Example : $support->store_new_attribute(23,name,5);
|
|
1930 Description : uses MySQL to store an entry (code and value) in an attribute table
|
|
1931 (seq_region_attrib by default)
|
|
1932 Return type : array_ref
|
|
1933 Caller : general
|
|
1934 Status : stable
|
|
1935
|
|
1936 =cut
|
|
1937
|
|
1938 sub store_new_attribute {
|
|
1939 my $self = shift;
|
|
1940 my $sr_id = shift;
|
|
1941 my $attrib_code = shift;
|
|
1942 my $attrib_value = shift || '';
|
|
1943 my $table = shift || 'seq_region_attrib';
|
|
1944
|
|
1945 #get database handle
|
|
1946 my $dbh = $self->get_dbconnection('loutre');
|
|
1947 #get attrib_type_id for this particular attribute
|
|
1948 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
|
|
1949 #store
|
|
1950 my $r = $dbh->do(
|
|
1951 qq(insert into $table
|
|
1952 values (?,?,?)),
|
|
1953 {},
|
|
1954 ($sr_id,$attrib_id,$attrib_value)
|
|
1955 );
|
|
1956 return ['Stored',$r];
|
|
1957 }
|
|
1958
|
|
1959 =head2 update_attribute
|
|
1960
|
|
1961 Arg[1] : seq_region.seq_region_id
|
|
1962 Arg[2] : attrib_type.code
|
|
1963 Arg[3] : attrib_type.value
|
|
1964 ARG[4] : table to update (seq_region_attribute by default)
|
|
1965 Example : $support->update_attribute(23,name,5);
|
|
1966 Description : uses MySQL to update an attribute table (seq_region_attrib by default)
|
|
1967 Return type : array_ref
|
|
1968 Caller : general
|
|
1969 Status : stable
|
|
1970
|
|
1971 =cut
|
|
1972
|
|
1973 sub update_attribute {
|
|
1974 my $self = shift;
|
|
1975 my $sr_id = shift;
|
|
1976 my $attrib_code = shift;
|
|
1977 my $attrib_value = shift;
|
|
1978 my $table = shift || 'seq_region_attrib';
|
|
1979 my $dbh = $self->get_dbconnection('loutre');
|
|
1980 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
|
|
1981 #update
|
|
1982 my $r = $dbh->do(
|
|
1983 qq(update $table
|
|
1984 set value = ?
|
|
1985 where seq_region_id = $sr_id
|
|
1986 and attrib_type_id = $attrib_id),
|
|
1987 {},
|
|
1988 ($attrib_value)
|
|
1989 );
|
|
1990 return ['Updated',$r];
|
|
1991 }
|
|
1992
|
|
1993
|
|
1994 =head2 remove_duplicate_attribs
|
|
1995
|
|
1996 Arg[1] : db handle
|
|
1997 Arg[2] : table
|
|
1998 Example : $support->remove_duplicate_attribs($dbh,'gene');
|
|
1999 Description : uses MySQL to remove duplicate entries from an attribute table
|
|
2000 Return type : none
|
|
2001 Caller : general
|
|
2002 Status : stable
|
|
2003
|
|
2004 =cut
|
|
2005
|
|
2006 sub remove_duplicate_attribs {
|
|
2007 my $self = shift;
|
|
2008 my $dbh = shift;
|
|
2009 my $table = shift;
|
|
2010 $dbh->do(qq(create table nondup_${table}_attrib like ${table}_attrib));
|
|
2011 $dbh->do(qq(insert into nondup_${table}_attrib (select ${table}_id, attrib_type_id, value from ${table}_attrib group by ${table}_id, attrib_type_id, value)));
|
|
2012 $dbh->do(qq(delete from ${table}_attrib));
|
|
2013 $dbh->do(qq(insert into ${table}_attrib (select ${table}_id, attrib_type_id, value from nondup_${table}_attrib)));
|
|
2014 $dbh->do(qq(drop table nondup_${table}_attrib));
|
|
2015 }
|
|
2016
|
|
2017
|
|
2018 1;
|