comparison variant_effect_predictor/Bio/EnsEMBL/Utils/ConversionSupport.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
24 schema conversion scripts
25
26 =head1 SYNOPSIS
27
28 my $serverroot = '/path/to/ensembl';
29 my $support = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
30
31 # parse common options
32 $support->parse_common_options;
33
34 # parse extra options for your script
35 $support->parse_extra_options( 'string_opt=s', 'numeric_opt=n' );
36
37 # ask user if he wants to run script with these parameters
38 $support->confirm_params;
39
40 # see individual method documentation for more stuff
41
42 =head1 DESCRIPTION
43
44 This module is a collection of common methods and provides helper
45 functions for the Vega release and schema conversion scripts. Amongst
46 others, it reads options from a config file, parses commandline options
47 and does logging.
48
49 =head1 METHODS
50
51 =cut
52
53 package Bio::EnsEMBL::Utils::ConversionSupport;
54
55 use strict;
56 use warnings;
57 no warnings 'uninitialized';
58
59 use Getopt::Long;
60 use Text::Wrap;
61 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
62 use FindBin qw($Bin $Script);
63 use POSIX qw(strftime);
64 use Cwd qw(abs_path);
65 use DBI;
66 use Data::Dumper;
67 use Fcntl qw(:flock SEEK_END);
68
69 my $species_c = 1; #counter to be used for each database connection made
70
71 =head2 new
72
73 Arg[1] : String $serverroot - root directory of your ensembl sandbox
74 Example : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
75 '/path/to/ensembl');
76 Description : constructor
77 Return type : Bio::EnsEMBL::Utils::ConversionSupport object
78 Exceptions : thrown if no serverroot is provided
79 Caller : general
80
81 =cut
82
83 sub new {
84 my $class = shift;
85 (my $serverroot = shift) or throw("You must supply a serverroot.");
86 my $self = {
87 '_serverroot' => $serverroot,
88 '_param' => { interactive => 1 },
89 '_warnings' => 0,
90 };
91 bless ($self, $class);
92 return $self;
93 }
94
95 =head2 parse_common_options
96
97 Example : $support->parse_common_options;
98 Description : This method reads options from a configuration file and parses
99 some commandline options that are common to all scripts (like
100 db connection settings, help, dry-run). Commandline options
101 will override config file settings.
102
103 All options will be accessible via $self->param('name').
104 Return type : true on success
105 Exceptions : thrown if configuration file can't be opened
106 Caller : general
107
108 =cut
109
110 sub parse_common_options {
111 my $self = shift;
112
113 # read commandline options
114 my %h;
115 Getopt::Long::Configure("pass_through");
116 &GetOptions( \%h,
117 'dbname|db_name=s',
118 'host|dbhost|db_host=s',
119 'port|dbport|db_port=n',
120 'user|dbuser|db_user=s',
121 'pass|dbpass|db_pass=s',
122 'conffile|conf=s',
123 'logfile|log=s',
124 'nolog',
125 'logpath=s',
126 'log_base_path=s',
127 'logappend|log_append',
128 'verbose|v',
129 'interactive|i=s',
130 'dry_run|dry|n',
131 'help|h|?',
132 );
133
134 # reads config file
135 my $conffile = $h{'conffile'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini";
136 $conffile = abs_path($conffile);
137 if (-e $conffile) {
138 open(CONF, $conffile) or throw(
139 "Unable to open configuration file $conffile for reading: $!");
140 my $serverroot = $self->serverroot;
141 while (<CONF>) {
142 chomp;
143
144 # remove comments
145 s/^[#;].*//;
146 s/\s+[;].*$//;
147
148 # read options into internal parameter datastructure, removing whitespace
149 next unless (/(\w\S*)\s*=\s*(\S*)\s*/);
150 my $name = $1;
151 my $val = $2;
152 if ($val =~ /\$SERVERROOT/) {
153 $val =~ s/\$SERVERROOT/$serverroot/g;
154 $val = abs_path($val);
155 }
156 $self->param($name, $val);
157 }
158 $self->param('conffile', $conffile);
159 }
160 elsif ($conffile) {
161 warning("Unable to open configuration file $conffile for reading: $!");
162 }
163
164 # override configured parameter with commandline options
165 map { $self->param($_, $h{$_}) } keys %h;
166
167 return (1) if $self->param('nolog');
168
169 # if logpath & logfile are not set, set them here to /ensemblweb/vega_dev/shared/logs/conversion/DBNAME/SCRIPNAME_NN.log
170 if (! defined($self->param('log_base_path'))) {
171 $self->param('log_base_path','/ensemblweb/shared/logs/conversion/');
172 }
173 my $dbname = $self->param('dbname');
174 $dbname =~ s/^vega_//;
175 if (not (defined($self->param('logpath')) )){
176 $self->param('logpath', $self->param('log_base_path')."/".$dbname."/" );
177 }
178 if ( not defined $self->param('logfile') ){
179 my $log = $Script;
180 $log =~ s/.pl$//g;
181 my $counter;
182 for ($counter=1 ; (-e $self->param('logpath')."/".$log."_".sprintf("%03d", $counter).".log"); $counter++){
183 # warn $self->param('logpath')."/".$log."_".$counter.".log";
184 }
185 $self->param('logfile', $log."_".sprintf("%03d", $counter).".log");
186 }
187 return(1);
188 }
189
190 =head2 parse_extra_options
191
192 Arg[1-N] : option descriptors that will be passed on to Getopt::Long
193 Example : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
194 Description : Parse extra commandline options by passing them on to
195 Getopt::Long and storing parameters in $self->param('name).
196 Return type : true on success
197 Exceptions : none (caugth by $self->error)
198 Caller : general
199
200 =cut
201
202 sub parse_extra_options {
203 my ($self, @params) = @_;
204 Getopt::Long::Configure("no_pass_through");
205 eval {
206 # catch warnings to pass to $self->error
207 local $SIG{__WARN__} = sub { die @_; };
208 &GetOptions(\%{ $self->{'_param'} }, @params);
209 };
210 $self->error($@) if $@;
211 return(1);
212 }
213
214 =head2 allowed_params
215
216 Arg[1-N] : (optional) List of allowed parameters to set
217 Example : my @allowed = $self->allowed_params(qw(param1 param2));
218 Description : Getter/setter for allowed parameters. This is used by
219 $self->confirm_params() to avoid cluttering of output with
220 conffile entries not relevant for a given script. You can use
221 $self->get_common_params() as a shortcut to set them.
222 Return type : Array - list of allowed parameters
223 Exceptions : none
224 Caller : general
225
226 =cut
227
228 sub allowed_params {
229 my $self = shift;
230
231 # setter
232 if (@_) {
233 @{ $self->{'_allowed_params'} } = @_;
234 }
235
236 # getter
237 if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
238 return @{ $self->{'_allowed_params'} };
239 } else {
240 return ();
241 }
242 }
243
244 =head2 get_common_params
245
246 Example : my @allowed_params = $self->get_common_params, 'extra_param';
247 Description : Returns a list of commonly used parameters in the conversion
248 scripts. Shortcut for setting allowed parameters with
249 $self->allowed_params().
250 Return type : Array - list of common parameters
251 Exceptions : none
252 Caller : general
253
254 =cut
255
256 sub get_common_params {
257 return qw(
258 conffile
259 dbname
260 host
261 port
262 user
263 pass
264 nolog
265 logpath
266 log_base_path
267 logfile
268 logappend
269 verbose
270 interactive
271 dry_run
272 );
273 }
274
275 =head2 get_loutre_params
276
277 Arg : (optional) return a list to parse or not
278 Example : $support->parse_extra_options($support->get_loutre_params('parse'))
279 Description : Returns a list of commonly used loutre db parameters - parse option is
280 simply used to distinguish between reporting and parsing parameters
281 Return type : Array - list of common parameters
282 Exceptions : none
283 Caller : general
284
285 =cut
286
287 sub get_loutre_params {
288 my ($self,$p) = @_;
289 if ($p) {
290 return qw(
291 loutrehost=s
292 loutreport=s
293 loutreuser=s
294 loutrepass:s
295 loutredbname=s
296 );
297 }
298 else {
299 return qw(
300 loutrehost
301 loutreport
302 loutreuser
303 loutrepass
304 loutredbname
305 );
306 }
307 }
308
309 =head2 remove_vega_params
310
311 Example : $support->remove_vega_params;
312 Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when
313 working exclusively with loutre
314 Return type : none
315 Exceptions : none
316 Caller : general
317
318 =cut
319
320 sub remove_vega_params {
321 my $self = shift;
322 foreach my $param (qw(dbname host port user pass)) {
323 $self->{'_param'}{$param} = undef;
324 }
325 }
326
327 =head2 confirm_params
328
329 Example : $support->confirm_params;
330 Description : Prints a table of parameters that were collected from config
331 file and commandline and asks user to confirm if he wants
332 to proceed.
333 Return type : true on success
334 Exceptions : none
335 Caller : general
336
337 =cut
338
339 sub confirm_params {
340 my $self = shift;
341
342 # print parameter table
343 print "Running script with these parameters:\n\n";
344 print $self->list_all_params;
345
346 if ($self->param('host') eq 'ensweb-1-10') {
347 # ask user if he wants to proceed
348 exit unless $self->user_proceed("**************\n\n You're working on ensdb-1-10! Is that correct and you want to continue ?\n\n**************");
349 }
350 else {
351 # ask user if he wants to proceed
352 exit unless $self->user_proceed("Continue?");
353 }
354 return(1);
355 }
356
357 =head2 list_all_params
358
359 Example : print LOG $support->list_all_params;
360 Description : prints a table of the parameters used in the script
361 Return type : String - the table to print
362 Exceptions : none
363 Caller : general
364
365 =cut
366
367 sub list_all_params {
368 my $self = shift;
369 my $txt = sprintf " %-21s%-90s\n", qw(PARAMETER VALUE);
370 $txt .= " " . "-"x121 . "\n";
371 $Text::Wrap::colums = 130;
372 my @params = $self->allowed_params;
373 foreach my $key (@params) {
374 my @vals = $self->param($key);
375 if (@vals) {
376 $txt .= Text::Wrap::wrap( sprintf(' %-21s', $key),
377 ' 'x24,
378 join(", ", @vals)
379 ) . "\n";
380 }
381 }
382 $txt .= "\n";
383 return $txt;
384 }
385
386 =head2 create_commandline_options
387
388 Arg[1] : Hashref $settings - hashref describing what to do
389 Allowed keys:
390 allowed_params => 0|1 # use all allowed parameters
391 exclude => [] # listref of parameters to exclude
392 replace => {param => newval} # replace value of param with
393 # newval
394 Example : $support->create_commandline_options({
395 allowed_params => 1,
396 exclude => ['verbose'],
397 replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
398 });
399 Description : Creates a commandline options string that can be passed to any
400 other script using ConversionSupport.
401 Return type : String - commandline options string
402 Exceptions : none
403 Caller : general
404
405 =cut
406
407 sub create_commandline_options {
408 my ($self, $settings, $param_hash) = @_;
409 my %param_hash = $param_hash ? %$param_hash : ();
410
411 # get all allowed parameters
412 if ($settings->{'allowed_params'}) {
413 # exclude params explicitly stated
414 my %exclude = map { $_ => 1 } @{ $settings->{'exclude'} || [] };
415 foreach my $param ($self->allowed_params) {
416 unless ($exclude{$param}) {
417 my ($first, @rest) = $self->param($param);
418 next unless (defined($first));
419
420 if (@rest) {
421 $first = join(",", $first, @rest);
422 }
423 $param_hash{$param} = $first;
424 }
425 }
426 }
427
428 # replace values
429 foreach my $key (keys %{ $settings->{'replace'} || {} }) {
430 $param_hash{$key} = $settings->{'replace'}->{$key};
431 }
432
433 # create the commandline options string
434 my $options_string;
435 foreach my $param (keys %param_hash) {
436 $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
437 }
438 return $options_string;
439 }
440
441 =head2 check_required_params
442
443 Arg[1-N] : List @params - parameters to check
444 Example : $self->check_required_params(qw(dbname host port));
445 Description : Checks $self->param to make sure the requested parameters
446 have been set. Dies if parameters are missing.
447 Return type : true on success
448 Exceptions : none
449 Caller : general
450
451 =cut
452
453 sub check_required_params {
454 my ($self, @params) = @_;
455 my @missing = ();
456 foreach my $param (@params) {
457 push @missing, $param unless defined $self->param($param);
458 }
459 if (@missing) {
460 throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
461 }
462 return(1);
463 }
464
465 =head2 user_proceed
466
467 Arg[1] : (optional) String $text - notification text to present to user
468 Example : # run a code snipped conditionally
469 if ($support->user_proceed("Run the next code snipped?")) {
470 # run some code
471 }
472
473 # exit if requested by user
474 exit unless ($support->user_proceed("Want to continue?"));
475 Description : If running interactively, the user is asked if he wants to
476 perform a script action. If he doesn't, this section is skipped
477 and the script proceeds with the code. When running
478 non-interactively, the section is run by default.
479 Return type : TRUE to proceed, FALSE to skip.
480 Exceptions : none
481 Caller : general
482
483 =cut
484
485 sub user_proceed {
486 my ($self, $text) = @_;
487
488 if ($self->param('interactive')) {
489 print "$text\n" if $text;
490 print "[y/N] ";
491 my $input = lc(<>);
492 chomp $input;
493 unless ($input eq 'y') {
494 print "Skipping.\n";
495 return(0);
496 }
497 }
498
499 return(1);
500 }
501
502 =head2 user_confirm
503
504 Description : DEPRECATED - please use user_proceed() instead
505
506 =cut
507
508 sub user_confirm {
509 my $self = shift;
510 exit unless $self->user_proceed("Continue?");
511 }
512
513 =head2 read_user_input
514
515 Arg[1] : (optional) String $text - notification text to present to user
516 Example : my $ret = $support->read_user_input("Choose a number [1/2/3]");
517 if ($ret == 1) {
518 # do something
519 } elsif ($ret == 2) {
520 # do something else
521 }
522 Description : If running interactively, the user is asked for input.
523 Return type : String - user's input
524 Exceptions : none
525 Caller : general
526
527 =cut
528
529 sub read_user_input {
530 my ($self, $text) = @_;
531
532 if ($self->param('interactive')) {
533 print "$text\n" if $text;
534 my $input = <>;
535 chomp $input;
536 return $input;
537 }
538 }
539
540 =head2 comma_to_list
541
542 Arg[1-N] : list of parameter names to parse
543 Example : $support->comma_to_list('chromosomes');
544 Description : Transparently converts comma-separated lists into arrays (to
545 allow different styles of commandline options, see perldoc
546 Getopt::Long for details). Parameters are converted in place
547 (accessible through $self->param('name')).
548 Return type : true on success
549 Exceptions : none
550 Caller : general
551
552 =cut
553
554 sub comma_to_list {
555 my $self = shift;
556 foreach my $param (@_) {
557 $self->param($param,
558 split (/,/, join (',', $self->param($param))));
559 }
560 return(1);
561 }
562
563 =head2 list_or_file
564
565 Arg[1] : Name of parameter to parse
566 Example : $support->list_or_file('gene');
567 Description : Determines whether a parameter holds a list or it is a filename
568 to read the list entries from.
569 Return type : true on success
570 Exceptions : thrown if list file can't be opened
571 Caller : general
572
573 =cut
574
575 sub list_or_file {
576 my ($self, $param) = @_;
577 my @vals = $self->param($param);
578 return unless (@vals);
579
580 my $firstval = $vals[0];
581 if (scalar(@vals) == 1 && -e $firstval) {
582 # we didn't get a list of values, but a file to read values from
583 @vals = ();
584 open(IN, $firstval) or throw("Cannot open $firstval for reading: $!");
585 while(<IN>){
586 chomp;
587 push(@vals, $_);
588 }
589 close(IN);
590 $self->param($param, @vals);
591 }
592 $self->comma_to_list($param);
593 return(1);
594 }
595
596 =head2 param
597
598 Arg[1] : Parameter name
599 Arg[2-N] : (optional) List of values to set
600 Example : my $dbname = $support->param('dbname');
601 $support->param('port', 3306);
602 $support->param('chromosomes', 1, 6, 'X');
603 Description : Getter/setter for parameters. Accepts single-value params and
604 list params.
605 Return type : Scalar value for single-value parameters, array of values for
606 list parameters
607 Exceptions : thrown if no parameter name is supplied
608 Caller : general
609
610 =cut
611
612 sub param {
613 my $self = shift;
614 my $name = shift or throw("You must supply a parameter name");
615
616 # setter
617 if (@_) {
618 if (scalar(@_) == 1) {
619 # single value
620 $self->{'_param'}->{$name} = shift;
621 } else {
622 # list of values
623 undef $self->{'_param'}->{$name};
624 @{ $self->{'_param'}->{$name} } = @_;
625 }
626 }
627
628 # getter
629 if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
630 # list parameter
631 return @{ $self->{'_param'}->{$name} };
632 } elsif (defined($self->{'_param'}->{$name})) {
633 # single-value parameter
634 return $self->{'_param'}->{$name};
635 } else {
636 return ();
637 }
638 }
639
640 =head2 error
641
642 Arg[1] : (optional) String - error message
643 Example : $support->error("An error occurred: $@");
644 exit(0) if $support->error;
645 Description : Getter/setter for error messages
646 Return type : String - error message
647 Exceptions : none
648 Caller : general
649
650 =cut
651
652 sub error {
653 my $self = shift;
654 $self->{'_error'} = shift if (@_);
655 return $self->{'_error'};
656 }
657
658 =head2 warnings
659
660 Example : print LOG "There were ".$support->warnings." warnings.\n";
661 Description : Returns the number of warnings encountered while running the
662 script (the warning counter is increased by $self->log_warning).
663 Return type : Int - number of warnings
664 Exceptions : none
665 Caller : general
666
667 =cut
668
669 sub warnings {
670 my $self = shift;
671 return $self->{'_warnings'};
672 }
673
674 =head2 serverroot
675
676 Arg[1] : (optional) String - root directory of your ensembl sandbox
677 Example : my $serverroot = $support->serverroot;
678 Description : Getter/setter for the root directory of your ensembl sandbox.
679 This is set when ConversionSupport object is created, so
680 usually only used as a getter.
681 Return type : String - the server root directory
682 Exceptions : none
683 Caller : general
684
685 =cut
686
687 sub serverroot {
688 my $self = shift;
689 $self->{'_serverroot'} = shift if (@_);
690 return $self->{'_serverroot'};
691 }
692
693 =head2 get_database
694
695 Arg[1] : String $database - the type of database to connect to
696 (eg core, otter)
697 Arg[2] : (optional) String $prefix - the prefix used for retrieving the
698 connection settings from the configuration
699 Example : my $db = $support->get_database('core');
700 Description : Connects to the database specified.
701 Return type : DBAdaptor of the appropriate type
702 Exceptions : thrown if asking for unknown database
703 Caller : general
704
705 =cut
706
707 sub get_database {
708 my $self = shift;
709 my $database = shift or throw("You must provide a database");
710 my $prefix = shift || '';
711 $self->check_required_params(
712 "${prefix}host",
713 "${prefix}port",
714 "${prefix}user",
715 "${prefix}dbname",
716 );
717 my %adaptors = (
718 core => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
719 ensembl => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
720 evega => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
721 otter => 'Bio::Otter::DBSQL::DBAdaptor',
722 vega => 'Bio::Otter::DBSQL::DBAdaptor',
723 compara => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor',
724 loutre => 'Bio::Vega::DBSQL::DBAdaptor',
725 );
726 throw("Unknown database: $database") unless $adaptors{$database};
727
728 $self->dynamic_use($adaptors{$database});
729 my $species = 'species' . $species_c;
730 my $dba = $adaptors{$database}->new(
731 -host => $self->param("${prefix}host"),
732 -port => $self->param("${prefix}port"),
733 -user => $self->param("${prefix}user"),
734 -pass => $self->param("${prefix}pass") || '',
735 -dbname => $self->param("${prefix}dbname"),
736 -group => $database,
737 -species => $species,
738 );
739 #can use this approach to get dna from another db
740 # my $dna_db = $adaptors{$database}->new(
741 # -host => 'otterlive',
742 # -port => '3301',
743 # -user => $self->param("${prefix}user"),
744 # -pass => $self->param("${prefix}pass"),
745 # -dbname => 'loutre_human',
746 # );
747 # $dba->dnadb($dna_db);
748
749 # otherwise explicitely set the dnadb to itself - by default the Registry assumes
750 # a group 'core' for this now
751 $dba->dnadb($dba);
752
753 $species_c++;
754
755 $self->{'_dba'}->{$database} = $dba;
756 $self->{'_dba'}->{'default'} = $dba unless $self->{'_dba'}->{'default'};
757 return $self->{'_dba'}->{$database};
758 }
759
760
761 =head2 get_dbconnection
762
763 Arg[1] : (optional) String $prefix - the prefix used for retrieving the
764 connection settings from the configuration
765 Example : my $dbh = $self->get_dbconnection;
766 Description : Connects to the database server specified. You don't have to
767 specify a database name (this is useful for running commands
768 like $dbh->do('show databases')).
769 Return type : DBI database handle
770 Exceptions : thrown if connection fails
771 Caller : general
772 Status : At Risk
773
774 =cut
775
776 sub get_dbconnection {
777 my $self = shift;
778 my $prefix = shift;
779
780 $self->check_required_params(
781 "${prefix}host",
782 "${prefix}port",
783 "${prefix}user",
784 );
785
786 my $dsn = "DBI:" . ($self->param('driver')||'mysql') .
787 ":host=" . $self->param("${prefix}host") .
788 ";port=" . $self->param("${prefix}port");
789
790 if ($self->param("${prefix}dbname")) {
791 $dsn .= ";dbname=".$self->param("${prefix}dbname");
792 }
793
794 # warn $dsn;
795
796 my $dbh;
797 eval{
798 $dbh = DBI->connect($dsn, $self->param("${prefix}user"),
799 $self->param("${prefix}pass"), {'RaiseError' => 1, 'PrintError' => 0});
800 };
801
802 if (!$dbh || $@ || !$dbh->ping) {
803 $self->log_error("Could not connect to db server as user ".
804 $self->param("${prefix}user") .
805 " using [$dsn] as a locator:\n" . $DBI::errstr . $@);
806 }
807
808 $self->{'_dbh'} = $dbh;
809 return $self->{'_dbh'};
810
811 }
812
813
814 =head2 dba
815
816 Arg[1] : (optional) String $database - type of db apaptor to retrieve
817 Example : my $dba = $support->dba;
818 Description : Getter for database adaptor. Returns default (i.e. created
819 first) db adaptor if no argument is provided.
820 Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor
821 Exceptions : none
822 Caller : general
823
824 =cut
825
826 sub dba {
827 my ($self, $database) = shift;
828 return $self->{'_dba'}->{$database} || $self->{'_dba'}->{'default'};
829 }
830
831 =head2 dynamic_use
832
833 Arg [1] : String $classname - The name of the class to require/import
834 Example : $self->dynamic_use('Bio::EnsEMBL::DBSQL::DBAdaptor');
835 Description: Requires and imports the methods for the classname provided,
836 checks the symbol table so that it doesnot re-require modules
837 that have already been required.
838 Returntype : true on success
839 Exceptions : Warns to standard error if module fails to compile
840 Caller : internal
841
842 =cut
843
844 sub dynamic_use {
845 my ($self, $classname) = @_;
846 my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ? ($1,$2) : ('::', $classname);
847
848 no strict 'refs';
849 # return if module has already been imported
850 return 1 if $parent_namespace->{$module.'::'} && %{ $parent_namespace->{$module.'::'}||{} };
851
852 eval "require $classname";
853 throw("Failed to require $classname: $@") if ($@);
854 $classname->import();
855
856 return 1;
857 }
858
859 =head2 get_chrlength
860
861 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
862 Arg[2] : (optional) String $version - coord_system version
863 Arg[3] : (optional) String $type - type of region eg chromsome (defaults to 'toplevel')
864 Arg[4] : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX)
865 Arg[5] : (optional) Override chromosome parameter filtering with this array reference. Empty denotes all.
866 Example : my $chr_length = $support->get_chrlength($dba);
867 Description : Get all chromosomes and their length from the database. Return
868 chr_name/length for the chromosomes the user requested (or all
869 chromosomes by default)
870 Return type : Hashref - chromosome_name => length
871 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
872 Caller : general
873
874 =cut
875
876 sub get_chrlength {
877 my ($self, $dba, $version,$type,$include_non_reference,$chroms) = @_;
878 $dba ||= $self->dba;
879 $type ||= 'toplevel';
880 throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n")
881 unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
882
883 my $sa = $dba->get_SliceAdaptor;
884
885 my @chromosomes = map { $_->seq_region_name }
886 @{ $sa->fetch_all($type,$version,$include_non_reference) };
887 my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes;
888
889 my @wanted = $self->param('chromosomes');
890 @wanted = @$chroms if defined $chroms and ref($chroms) eq 'ARRAY';
891
892 if (@wanted) {
893 # check if user supplied invalid chromosome names
894 foreach my $chr (@wanted) {
895 my $found = 0;
896 foreach my $chr_from_db (keys %chr) {
897 if ($chr_from_db eq $chr) {
898 $found = 1;
899 last;
900 }
901 }
902 unless ($found) {
903 warning("Didn't find chromosome $chr in database " .
904 $self->param('dbname'));
905 }
906 }
907
908 # filter to requested chromosomes only
909 HASH:
910 foreach my $chr_from_db (keys %chr) {
911 foreach my $chr (@wanted) {
912 if ($chr_from_db eq $chr) {
913 next HASH;
914 }
915 }
916 delete($chr{$chr_from_db});
917 }
918 }
919
920 return \%chr;
921 }
922
923 =head2 get_ensembl_chr_mapping
924
925 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
926 Arg[2] : (optional) String $version - coord_system version
927 Example : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba);
928 Description : Gets a mapping between Vega chromosome names and their
929 equivalent Ensembl chromosomes. Works with non-reference chromosomes
930 Return type : Hashref - Vega name => Ensembl name
931 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
932 Caller : general
933
934 =cut
935
936 sub get_ensembl_chr_mapping {
937 my ($self, $dba, $version) = @_;
938 $dba ||= $self->dba;
939 throw("get_ensembl_chr_mapping should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
940
941 my $sa = $dba->get_SliceAdaptor;
942 my @chromosomes = map { $_->seq_region_name }
943 @{ $sa->fetch_all('chromosome', $version, 1) };
944
945 my %chrs;
946 foreach my $chr (@chromosomes) {
947 my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version);
948 my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') };
949 if ($ensembl_name_attr) {
950 $chrs{$chr} = $ensembl_name_attr->value;
951 } else {
952 $chrs{$chr} = $chr;
953 }
954 }
955 return \%chrs;
956 }
957
958 =head2 get_taxonomy_id
959
960 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
961 Example : my $sid = $support->get_taxonony_id($dba);
962 Description : Retrieves the taxononmy ID from the meta table
963 Return type : Int - the taxonomy ID
964 Exceptions : thrown if no taxonomy ID is found in the database
965 Caller : general
966
967 =cut
968
969 sub get_taxonomy_id {
970 my ($self, $dba) = @_;
971 $dba ||= $self->dba;
972 my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id"';
973 my $sth = $dba->dbc->db_handle->prepare($sql);
974 $sth->execute;
975 my ($tid) = $sth->fetchrow_array;
976 $sth->finish;
977 $self->throw("Could not determine taxonomy_id from database.") unless $tid;
978 return $tid;
979 }
980
981 =head2 get_species_scientific_name
982
983 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
984 Example : my $species = $support->get_species_scientific_name($dba);
985 Description : Retrieves the species scientific name (Genus species) from the
986 meta table
987 Return type : String - species scientific name
988 Exceptions : thrown if species name can not be determined from db
989 Caller : general
990
991 =cut
992
993 sub get_species_scientific_name {
994 my ($self, $dba) = @_;
995 $dba ||= $self->dba;
996 my $sql = "SELECT meta_value FROM meta WHERE meta_key = \'species.scientific_name\'";
997 my $sth = $dba->dbc->db_handle->prepare($sql);
998 $sth->execute;
999 my @sp;
1000 while (my @row = $sth->fetchrow_array) {
1001 push @sp, $row[0];
1002 }
1003 if (! @sp || @sp > 1) {
1004 $self->throw("Could not retrieve a single species scientific name from database.");
1005 }
1006 $sth->finish;
1007 my $species = $sp[0];
1008 $species =~ s/ /_/g;
1009 return $species;
1010 }
1011
1012 =head2 species
1013
1014 Arg[1] : (optional) String $species - species name to set
1015 Example : my $species = $support->species;
1016 my $url = "http://vega.sanger.ac.uk/$species/";
1017 Description : Getter/setter for species name (Genus_species). If not set, it's
1018 determined from database's meta table
1019 Return type : String - species name
1020 Exceptions : none
1021 Caller : general
1022
1023 =cut
1024
1025 sub species {
1026 my $self = shift;
1027 $self->{'_species'} = shift if (@_);
1028 # get species name from database if not set
1029 unless ($self->{'_species'}) {
1030 $self->{'_species'} = $self->get_species_scientific_name;
1031 }
1032 return $self->{'_species'};
1033 }
1034
1035 =head2 sort_chromosomes
1036
1037 Arg[1] : (optional) Hashref $chr_hashref - Hashref with chr_name as keys
1038 Example : my $chr = { '6-COX' => 1, '1' => 1, 'X' => 1 };
1039 my @sorted = $support->sort_chromosomes($chr);
1040 Description : Sorts chromosomes in an intuitive way (numerically, then
1041 alphabetically). If no chromosome hashref is passed, it's
1042 retrieve by calling $self->get_chrlength()
1043 Return type : List - sorted chromosome names
1044 Exceptions : thrown if no hashref is provided
1045 Caller : general
1046
1047 =cut
1048
1049 sub sort_chromosomes {
1050 my ($self, $chr_hashref) = @_;
1051 $chr_hashref = $self->get_chrlength unless ($chr_hashref);
1052 throw("You have to pass a hashref of your chromosomes")
1053 unless ($chr_hashref and ref($chr_hashref) eq 'HASH');
1054 return (sort _by_chr_num keys %$chr_hashref);
1055 }
1056
1057 =head2 _by_chr_num
1058
1059 Example : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
1060 Description : Subroutine to use in sort for sorting chromosomes. Sorts
1061 numerically, then alphabetically
1062 Return type : values to be used by sort
1063 Exceptions : none
1064 Caller : internal ($self->sort_chromosomes)
1065
1066 =cut
1067
1068 sub _by_chr_num {
1069 my @awords = split /-/, $a;
1070 my @bwords = split /-/, $b;
1071
1072 my $anum = $awords[0];
1073 my $bnum = $bwords[0];
1074
1075 if ($anum !~ /^[0-9]*$/) {
1076 if ($bnum !~ /^[0-9]*$/) {
1077 return $anum cmp $bnum;
1078 } else {
1079 return 1;
1080 }
1081 }
1082 if ($bnum !~ /^[0-9]*$/) {
1083 return -1;
1084 }
1085
1086 if ($anum <=> $bnum) {
1087 return $anum <=> $bnum;
1088 } else {
1089 if ($#awords == 0) {
1090 return -1;
1091 } elsif ($#bwords == 0) {
1092 return 1;
1093 } else {
1094 return $awords[1] cmp $bwords[1];
1095 }
1096 }
1097 }
1098
1099 =head2 split_chromosomes_by_size
1100
1101 Arg[1] : (optional) Int $cutoff - the cutoff in bp between small and
1102 large chromosomes
1103 Arg[2] : (optional) Boolean to include duplicate regions, ie PAR or not
1104 (default is no)
1105 Arg[3] : (optional) Coordsystem version to retrieve
1106
1107 Example : my $chr_slices = $support->split_chromosomes_by_size;
1108 foreach my $block_size (keys %{ $chr_slices }) {
1109 print "Chromosomes with blocksize $block_size: ";
1110 print join(", ", map { $_->seq_region_name }
1111 @{ $chr_slices->{$block_size} });
1112 }
1113 Description : Determines block sizes for storing DensityFeatures on
1114 chromosomes, and return slices for each chromosome. The block
1115 size is determined so that you have 150 bins for the smallest
1116 chromosome over 5 Mb in length. For chromosomes smaller than 5
1117 Mb, an additional smaller block size is used to yield 150 bins
1118 for the overall smallest chromosome. This will result in
1119 reasonable resolution for small chromosomes and high
1120 performance for big ones. Does not return non-reference seq_regions
1121 Return type : Hashref (key: block size; value: Arrayref of chromosome
1122 Bio::EnsEMBL::Slices)
1123 Exceptions : none
1124 Caller : density scripts
1125
1126 =cut
1127
1128 sub split_chromosomes_by_size {
1129 my $self = shift;
1130 my $cutoff = shift || 5000000;
1131 my $dup = shift || 0;
1132 my $cs_version = shift;
1133 my $include_non_reference = 1; #get non reference slices
1134 my $slice_adaptor = $self->dba->get_SliceAdaptor;
1135 my $top_slices;
1136 if ($self->param('chromosomes')) {
1137 foreach my $chr ($self->param('chromosomes')) {
1138 push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr);
1139 }
1140 } else {
1141 $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,$include_non_reference,$dup);
1142 }
1143
1144 # filter out patches, if present
1145 $top_slices = [ grep { $_->is_reference or $self->is_haplotype($_,$self->dba) } @$top_slices ];
1146
1147 my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);
1148 foreach my $slice (@{ $top_slices }) {
1149 next if ($slice->length eq 10000); #hack for chrY pseudoslice
1150 if ($slice->length < $cutoff) {
1151 if (! $min_small_chr or ($min_small_chr > $slice->length)) {
1152 $min_small_chr = $slice->length;
1153 }
1154 # push small chromosomes onto $small_chr
1155 push @{ $small_chr }, $slice;
1156 }
1157 elsif (! $min_big_chr or ($min_big_chr > $slice->length) ){
1158 $min_big_chr = $slice->length;
1159 }
1160 # push _all_ chromosomes onto $big_chr
1161 push @{ $big_chr }, $slice;
1162 }
1163 my $chr_slices;
1164 $chr_slices->{int($min_big_chr/150)} = $big_chr if $min_big_chr;
1165 $chr_slices->{int($min_small_chr/150)} = $small_chr if $min_small_chr;
1166 return $chr_slices;
1167 }
1168
1169 =head2 log
1170
1171 Arg[1] : String $txt - the text to log
1172 Arg[2] : Int $indent - indentation level for log message
1173 Example : my $log = $support->log_filehandle;
1174 $support->log('Log foo.\n', 1);
1175 Description : Logs a message to the filehandle initialised by calling
1176 $self->log_filehandle(). You can supply an indentation level
1177 to get nice hierarchical log messages.
1178 Return type : true on success
1179 Exceptions : thrown when no filehandle can be obtained
1180 Caller : general
1181
1182 =cut
1183
1184 sub log {
1185 my ($self, $txt, $indent) = @_;
1186 $indent ||= 0;
1187
1188 # strip off leading linebreaks so that indenting doesn't break
1189 $txt =~ s/^(\n*)//;
1190
1191 $txt = $1." "x$indent . $txt;
1192 my $fh = $self->{'_log_filehandle'};
1193 throw("Unable to obtain log filehandle") unless $fh;
1194 print $fh "$txt";
1195 return(1);
1196 }
1197
1198 =head2 lock_log
1199
1200 Description : Use flock-style locks to lock log and fastforward to end.
1201 Useful if log is being written to by multiple processes.
1202 =cut
1203
1204 sub lock_log {
1205 my ($self) = @_;
1206
1207 my $fh = $self->{'_log_filehandle'};
1208 return if -t $fh or -p $fh; # Shouldn't lock such things
1209 flock($self->{'_log_filehandle'},LOCK_EX) || return 0;
1210 seek($self->{'_log_filehandle'},0,SEEK_END); # fail ok, prob not reg file
1211 return 1;
1212 }
1213
1214 =head2 unlock_log
1215
1216 Description : Unlock log previously locked by lock_log.
1217
1218 =cut
1219
1220 sub unlock_log {
1221 my ($self) = @_;
1222
1223 my $fh = $self->{'_log_filehandle'};
1224 return if -t $fh or -p $fh; # We don't lock such things
1225 # flush is implicit in flock
1226 flock($self->{'_log_filehandle'},LOCK_UN) || return 0;
1227 return 1;
1228 }
1229
1230 =head2 log_warning
1231
1232 Arg[1] : String $txt - the warning text to log
1233 Arg[2] : Int $indent - indentation level for log message
1234 Arg[3] : Bool - add a line break before warning if true
1235 Example : my $log = $support->log_filehandle;
1236 $support->log_warning('Log foo.\n', 1);
1237 Description : Logs a message via $self->log and increases the warning counter.
1238 Return type : true on success
1239 Exceptions : none
1240 Caller : general
1241
1242 =cut
1243
1244 sub log_warning {
1245 my ($self, $txt, $indent, $break) = @_;
1246 $txt = "WARNING: " . $txt;
1247 $txt = "\n$txt" if ($break);
1248 $self->log($txt, $indent);
1249 $self->{'_warnings'}++;
1250 return(1);
1251 }
1252
1253 =head2 log_error
1254
1255 Arg[1] : String $txt - the error text to log
1256 Arg[2] : Int $indent - indentation level for log message
1257 Example : my $log = $support->log_filehandle;
1258 $support->log_error('Log foo.\n', 1);
1259 Description : Logs a message via $self->log and exits the script.
1260 Return type : none
1261 Exceptions : none
1262 Caller : general
1263
1264 =cut
1265
1266 sub log_error {
1267 my ($self, $txt, $indent) = @_;
1268 $txt = "ERROR: ".$txt;
1269 $self->log($txt, $indent);
1270 $self->log("Exiting.\n");
1271 exit;
1272 }
1273
1274 =head2 log_verbose
1275
1276 Arg[1] : String $txt - the warning text to log
1277 Arg[2] : Int $indent - indentation level for log message
1278 Example : my $log = $support->log_filehandle;
1279 $support->log_verbose('Log this verbose message.\n', 1);
1280 Description : Logs a message via $self->log if --verbose option was used
1281 Return type : TRUE on success, FALSE if not verbose
1282 Exceptions : none
1283 Caller : general
1284
1285 =cut
1286
1287 sub log_verbose {
1288 my ($self, $txt, $indent) = @_;
1289 return(0) unless $self->param('verbose');
1290 $self->log($txt, $indent);
1291 return(1);
1292 }
1293
1294 =head2 log_stamped
1295
1296 Arg[1] : String $txt - the warning text to log
1297 Arg[2] : Int $indent - indentation level for log message
1298 Example : my $log = $support->log_filehandle;
1299 $support->log_stamped('Log this stamped message.\n', 1);
1300 Description : Appends timestamp and memory usage to a message and logs it via
1301 $self->log
1302 Return type : TRUE on success
1303 Exceptions : none
1304 Caller : general
1305
1306 =cut
1307
1308 sub log_stamped {
1309 my ($self, $txt, $indent) = @_;
1310 # append timestamp and memory usage to log text
1311 $txt =~ s/(\n*)$//;
1312 $txt .= " ".$self->date_and_mem.$1;
1313 $self->log($txt, $indent);
1314 return(1);
1315 }
1316
1317 =head2 log_filehandle
1318
1319 Arg[1] : (optional) String $mode - file access mode
1320 Example : my $log = $support->log_filehandle;
1321 # print to the filehandle
1322 print $log 'Lets start logging...\n';
1323 # log via the wrapper $self->log()
1324 $support->log('Another log message.\n');
1325 Description : Returns a filehandle for logging (STDERR by default, logfile if
1326 set from config or commandline). You can use the filehandle
1327 directly to print to, or use the smart wrapper $self->log().
1328 Logging mode (truncate or append) can be set by passing the
1329 mode as an argument to log_filehandle(), or with the
1330 --logappend commandline option (default: truncate)
1331 Return type : Filehandle - the filehandle to log to
1332 Exceptions : thrown if logfile can't be opened
1333 Caller : general
1334
1335 =cut
1336
1337 sub log_filehandle {
1338 my ($self, $mode, $date) = @_;
1339 $mode ||= '>';
1340 $mode = '>>' if ($self->param('logappend'));
1341 my $fh = \*STDERR;
1342 if (my $logfile = $self->param('logfile')) {
1343 $logfile .= "_$date" if $date;
1344 if (my $logpath = $self->param('logpath')) {
1345 unless (-e $logpath) {
1346 system("mkdir $logpath") == 0 or
1347 $self->log_error("Can't create log dir $logpath: $!\n");
1348 }
1349 $logfile = "$logpath/$logfile";
1350 }
1351 open($fh, "$mode", $logfile) or throw(
1352 "Unable to open $logfile for writing: $!");
1353 }
1354 $self->{'_log_filehandle'} = $fh;
1355 return $self->{'_log_filehandle'};
1356 }
1357
1358 =head2 filehandle
1359
1360 Arg[1] : String $mode - file access mode
1361 Arg[2] : String $file - input or output file
1362 Example : my $fh = $support->filehandle('>>', '/path/to/file');
1363 # print to the filehandle
1364 print $fh 'Your text goes here...\n';
1365 Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading
1366 by default) to print to or read from.
1367 Return type : Filehandle - the filehandle
1368 Exceptions : thrown if file can't be opened
1369 Caller : general
1370
1371 =cut
1372
1373 sub filehandle {
1374 my ($self, $mode, $file) = @_;
1375 $mode ||= ">";
1376 my $fh;
1377 if ($file) {
1378 open($fh, "$mode", $file) or throw(
1379 "Unable to open $file for writing: $!");
1380 } elsif ($mode =~ />/) {
1381 $fh = \*STDOUT;
1382 } elsif ($mode =~ /</) {
1383 $fh = \*STDIN;
1384 }
1385 return $fh;
1386 }
1387
1388 =head2 init_log_date
1389
1390 Example : $support->init_log_date;
1391 Description : Opens a filehandle to a logfile with the date in the file name
1392 Return type : Filehandle - the log filehandle
1393 Exceptions : none
1394 Caller : general
1395
1396 =cut
1397
1398 sub init_log_date {
1399 my $self = shift;
1400 my $date = $self->date;
1401 return $self->init_log($date);
1402 }
1403
1404 =head2 init_log
1405
1406 Example : $support->init_log;
1407 Description : Opens a filehandle to the logfile and prints some header
1408 information to this file. This includes script name, date, user
1409 running the script and parameters the script will be running
1410 with.
1411 Return type : Filehandle - the log filehandle
1412 Exceptions : none
1413 Caller : general
1414
1415 =cut
1416
1417 sub init_log {
1418 my $self = shift;
1419 my $date = shift;
1420
1421 # get a log filehandle
1422 my $log = $self->log_filehandle(undef,$date);
1423
1424 # print script name, date, user who is running it
1425 my $hostname = `hostname`;
1426 chomp $hostname;
1427 my $script = "$hostname:$Bin/$Script";
1428 my $user = `whoami`;
1429 chomp $user;
1430 $self->log("Script: $script\nDate: ".$self->date_and_time."\nUser: $user\n");
1431
1432 # print parameters the script is running with
1433 $self->log("Parameters:\n\n");
1434 $self->log($self->list_all_params);
1435
1436 # remember start time
1437 $self->{'_start_time'} = time;
1438
1439 return $log;
1440 }
1441
1442 =head2 finish_log
1443
1444 Example : $support->finish_log;
1445 Description : Writes footer information to a logfile. This includes the
1446 number of logged warnings, timestamp and memory footprint.
1447 Return type : TRUE on success
1448 Exceptions : none
1449 Caller : general
1450
1451 =cut
1452
1453 sub finish_log {
1454 my $self = shift;
1455 $self->log("\nAll done. ".$self->warnings." warnings. ");
1456 if ($self->{'_start_time'}) {
1457 $self->log("Runtime ");
1458 my $diff = time - $self->{'_start_time'};
1459 my $sec = $diff % 60;
1460 $diff = ($diff - $sec) / 60;
1461 my $min = $diff % 60;
1462 my $hours = ($diff - $min) / 60;
1463 $self->log("${hours}h ${min}min ${sec}sec ");
1464 }
1465 $self->log($self->date_and_mem."\n\n");
1466 return(1);
1467 }
1468
1469 =head2 date_and_mem
1470
1471 Example : print LOG "Time, memory usage: ".$support->date_and_mem."\n";
1472 Description : Prints a timestamp and the memory usage of your script.
1473 Return type : String - timestamp and memory usage
1474 Exceptions : none
1475 Caller : general
1476
1477 =cut
1478
1479 sub date_and_mem {
1480 my $date = strftime "%Y-%m-%d %T", localtime;
1481 my $mem = `ps -p $$ -o vsz |tail -1`;
1482 chomp $mem;
1483 return "[$date, mem $mem]";
1484 }
1485
1486 =head2 date
1487
1488 Example : print "Date: " . $support->date . "\n";
1489 Description : Prints a nicely formatted datetamp (YYYY-DD-MM)
1490 Return type : String - the timestamp
1491 Exceptions : none
1492 Caller : general
1493
1494 =cut
1495
1496 sub date {
1497 return strftime "%Y-%m-%d", localtime;
1498 }
1499
1500 =head2 date_and_time
1501
1502 Example : print "Date: " . $support->date . "\n";
1503 Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
1504 Return type : String - the timestamp
1505 Exceptions : none
1506 Caller : general
1507
1508 =cut
1509
1510 sub date_and_time {
1511 return strftime "%Y-%m-%d %T", localtime;
1512 }
1513
1514 =head2 format_time
1515
1516 Example : print $support->format_time($gene->modifed_date) . "\n";
1517 Description : Prints timestamps from the database
1518 Return type : String - nicely formatted time stamp
1519 Exceptions : none
1520 Caller : general
1521
1522 =cut
1523
1524
1525 sub date_format {
1526 my( $self, $time, $format ) = @_;
1527 my( $d,$m,$y) = (localtime($time))[3,4,5];
1528 my %S = ('d'=>sprintf('%02d',$d),'m'=>sprintf('%02d',$m+1),'y'=>$y+1900);
1529 (my $res = $format ) =~s/%(\w)/$S{$1}/ge;
1530 return $res;
1531 }
1532
1533
1534 =head2 mem
1535
1536 Example : print "Memory usage: " . $support->mem . "\n";
1537 Description : Prints the memory used by your script. Not sure about platform
1538 dependence of this call ...
1539 Return type : String - memory usage
1540 Exceptions : none
1541 Caller : general
1542
1543 =cut
1544
1545 sub mem {
1546 my $mem = `ps -p $$ -o vsz |tail -1`;
1547 chomp $mem;
1548 return $mem;
1549 }
1550
1551 =head2 commify
1552
1553 Arg[1] : Int $num - a number to commify
1554 Example : print "An easy to read number: ".$self->commify(100000000);
1555 # will print 100,000,000
1556 Description : put commas into a number to make it easier to read
1557 Return type : a string representing the commified number
1558 Exceptions : none
1559 Caller : general
1560 Status : stable
1561
1562 =cut
1563
1564 sub commify {
1565 my $self = shift;
1566 my $num = shift;
1567
1568 $num = reverse($num);
1569 $num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
1570
1571 return scalar reverse $num;
1572 }
1573
1574 =head2 fetch_non_hidden_slices
1575
1576 Arg[1] : B::E::SliceAdaptor
1577 Arg[2] : B::E::AttributeAdaptor
1578 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1579 Arg[4] : string $coord_system_version (optional) - 'otter' by default
1580 Example : $chroms = $support->fetch_non_hidden_slice($sa,$aa);
1581 Description : retrieve all slices from a loutre database that don't have a hidden attribute.
1582 Doesn't retrieve non-reference slices
1583 Return type : arrayref
1584 Caller : general
1585 Status : stable
1586
1587 =cut
1588
1589 sub fetch_non_hidden_slices {
1590 my $self = shift;
1591 my $aa = shift or throw("You must supply an attribute adaptor");
1592 my $sa = shift or throw("You must supply a slice adaptor");
1593 my $cs = shift || 'chromosome';
1594 my $cv = shift || 'Otter';
1595 my $visible_chroms;
1596 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
1597 my $chrom_name = $chrom->name;
1598 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
1599 if ( scalar(@$attribs) > 1 ) {
1600 $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
1601 }
1602 elsif ($attribs->[0]->value == 0) {
1603 push @$visible_chroms, $chrom;
1604 }
1605 elsif ($attribs->[0]->value == 1) {
1606 $self->log_verbose("chromosome $chrom_name is hidden\n");
1607 }
1608 else {
1609 $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
1610 }
1611 }
1612 return $visible_chroms;
1613 }
1614
1615 =head2 get_non_hidden_slice_names
1616
1617 Arg[1] : B::E::SliceAdaptor
1618 Arg[2] : B::E::AttributeAdaptor
1619 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1620 Arg[4] : string $coord_system_version (optional) - 'otter' by default
1621 Example : $chrom_names = $support->get_non_hidden_slice_names($sa,$aa);
1622 Description : retrieve names of all slices from a loutre database that don't have a hidden attribute.
1623 Doesn't retrieve non-reference slices
1624 Return type : arrayref of names of all non-hidden slices
1625 Caller : general
1626 Status : stable
1627
1628 =cut
1629
1630 sub get_non_hidden_slice_names {
1631 my $self = shift;
1632 my $aa = shift or throw("You must supply an attribute adaptor");
1633 my $sa = shift or throw("You must supply a slice adaptor");
1634 my $cs = shift || 'chromosome';
1635 my $cv = shift || 'Otter';
1636 my $visible_chrom_names;
1637 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
1638 my $chrom_name = $chrom->seq_region_name;
1639 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
1640 if ( scalar(@$attribs) > 1 ) {
1641 $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
1642 }
1643 elsif ($attribs->[0]->value == 0) {
1644 push @$visible_chrom_names, $chrom_name;
1645 }
1646 elsif ($attribs->[0]->value == 1) {
1647 $self->log_verbose("chromosome $chrom_name is hidden\n");
1648 }
1649 else {
1650 $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
1651 }
1652 }
1653 return $visible_chrom_names;
1654 }
1655
1656
1657 =head2 get_wanted_chromosomes
1658
1659 Arg[1] : B::E::SliceAdaptor
1660 Arg[2] : B::E::AttributeAdaptor
1661 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1662 Arg[4] : string $coord_system_version (optional) - 'otter' by default
1663 Example : $chr_names = $support->get_wanted_chromosomes($laa,$lsa);
1664 Description : retrieve names of slices from a lutra database that are ready for dumping to Vega.
1665 Deals with list of names to ignore (ignore_chr = LIST)
1666 Return type : arrayref of slices
1667 Caller : general
1668 Status : stable
1669
1670 =cut
1671
1672 sub get_wanted_chromosomes {
1673 my $self = shift;
1674 my $aa = shift or throw("You must supply an attribute adaptor");
1675 my $sa = shift or throw("You must supply a slice adaptor");
1676 my $cs = shift || 'chromosome';
1677 my $cv = shift || 'Otter';
1678 my $export_mode = $self->param('release_type');
1679 my $release = $self->param('vega_release');
1680 my $names;
1681 my $chroms = $self->fetch_non_hidden_slices($aa,$sa,$cs,$cv);
1682 CHROM:
1683 foreach my $chrom (@$chroms) {
1684 my $attribs = $aa->fetch_all_by_Slice($chrom);
1685 my $vals = $self->get_attrib_values($attribs,'vega_export_mod');
1686 if (scalar(@$vals > 1)) {
1687 $self->log_warning ("Multiple attribs for \'vega_export_mod\', please fix before continuing");
1688 exit;
1689 }
1690 next CHROM if (! grep { $_ eq $export_mode} @$vals);
1691 $vals = $self->get_attrib_values($attribs,'vega_release',$release);
1692 if (scalar(@$vals > 1)) {
1693 $self->log_warning ("Multiple attribs for \'vega_release\' value = $release , please fix before continuing");
1694 exit;
1695 }
1696 next CHROM if (! grep { $_ eq $release} @$vals);
1697 my $name = $chrom->seq_region_name;
1698 if (my @ignored = $self->param('ignore_chr')) {
1699 next CHROM if (grep {$_ eq $name} @ignored);
1700 }
1701 push @{$names}, $name;
1702 }
1703 return $names;
1704 }
1705
1706 =head2 is_haplotype
1707
1708 Arg[1] : B::E::Slice
1709 Arg[2]: : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
1710 Description : Is the slice a Vega haplotype? At the moment this is
1711 implemented by testing for presence of vega_ref_chrom but non_ref
1712 which is correct in practice, but really misses the prupose of
1713 vega_ref_chrom, so this might bite us if that changes.
1714 Return type : boolean
1715
1716 =cut
1717
1718 sub is_haplotype {
1719 my ($self,$slice,$dba) = @_;
1720
1721 $dba ||= $self->dba;
1722 my $aa = $dba->get_adaptor('Attribute');
1723
1724 my $attribs = $aa->fetch_all_by_Slice($slice);
1725 return (@{$self->get_attrib_values($attribs,'vega_ref_chrom')} and
1726 @{$self->get_attrib_values($attribs,'non_ref',1)});
1727 }
1728
1729 =head2 get_unique_genes
1730
1731 Arg[1] : B::E::Slice
1732 Arg[2] : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
1733 Example : $genes = $support->get_unique_genes($slice,$dba);
1734 Description : Retrieve genes that are only on the slice itself - used for human where assembly patches
1735 are in the assembly_exception table. Needs the PATCHes to have 'non_ref' seq_region_attributes.
1736 Return type : arrayref of genes
1737 Caller : general
1738 Status : stable
1739
1740 =cut
1741
1742 sub get_unique_genes {
1743 my $self = shift;
1744 my ($slice,$dba) = @_;
1745 $slice or throw("You must supply a slice");
1746 $dba ||= $self->dba;
1747
1748 my $sa = $dba->get_adaptor('Slice');
1749 my $ga = $dba->get_adaptor('Gene');
1750 my $patch = 0;
1751 my $genes = [];
1752 if ( ! $slice->is_reference() and ! $self->is_haplotype($slice,$dba) ) {
1753 # if ( 0 ) {
1754 $patch = 1;
1755 my $slices = $sa->fetch_by_region_unique( $slice->coord_system_name(),$slice->seq_region_name(),undef,undef,undef,$slice->coord_system()->version() );
1756 foreach my $slice ( @{$slices} ) {
1757 push @$genes,@{$ga->fetch_all_by_Slice($slice)};
1758 # my $start = $slice->start;
1759 }
1760 }
1761 else {
1762 $genes = $ga->fetch_all_by_Slice($slice);
1763 }
1764 return ($genes, $patch);
1765 }
1766
1767
1768
1769 =head2 get_attrib_values
1770
1771 Arg[1] : Arrayref of B::E::Attributes
1772 Arg[2] : 'code' to search for
1773 Arg[3] : 'value' to search for (optional)
1774 Example : my $c = $self->get_attrib_values($attribs,'name'));
1775 Description : (i) In the absence of an attribute value argument, examines an arrayref
1776 of B::E::Attributes for a particular attribute type, returning the values
1777 for each attribute of that type. Can therefore be used to test for the
1778 number of attributes of that type.
1779 (ii) In the presence of the optional value argument it returns all
1780 attributes with that value ie can be used to test for the presence of an
1781 attribute with that particular value.
1782 Return type : arrayref of values for that attribute
1783 Caller : general
1784 Status : stable
1785
1786 =cut
1787
1788 sub get_attrib_values {
1789 my $self = shift;
1790 my $attribs = shift;
1791 my $code = shift;
1792 my $value = shift;
1793 if (my @atts = grep {$_->code eq $code } @$attribs) {
1794 my $r = [];
1795 if ($value) {
1796 if (my @values = grep {$_->value eq $value} @atts) {
1797 foreach (@values) {
1798 push @$r, $_->value;
1799 }
1800 return $r;
1801 }
1802 else {
1803 return [];
1804 }
1805 }
1806 else {
1807 foreach (@atts) {
1808 push @$r, $_->value;
1809 }
1810 return $r;
1811 }
1812 }
1813 else {
1814 return [];
1815 }
1816 }
1817
1818 =head2 fix_attrib_value
1819
1820 Arg[1] : Arrayref of existing B::E::Attributes
1821 Arg[2] : dbID of object
1822 Arg[3] : name of object (just for reporting)
1823 Arg[4] : attrib_type.code
1824 Arg[5] : attrib_type.value
1825 Arg[6] : interactive ? (0 by default)
1826 Arg[7] : table
1827 Example : $support->fix_attrib_value($attribs,$chr_id,$chr_name,'vega_export_mod','N',1);
1828 Description : adds a new attribute to an object, or updates an existing attribute with a new value
1829 Can be run in interactive or non-interactive mode (default)
1830 Return type : arrayref of results
1831 Caller : general
1832 Status : only ever tested with seq_region_attributes to date
1833
1834 =cut
1835
1836 sub fix_attrib_value {
1837 my $self = shift;
1838 my $attribs = shift;
1839 my $id = shift;
1840 my $name = shift;
1841 my $code = shift;
1842 my $value = shift;
1843 my $interact = shift || 0;
1844 my $table = shift || 'seq_region_attrib';
1845
1846 #transiently set interactive parameter to zero
1847 my $int_before;
1848 if (! $interact) {
1849 $int_before = $self->param('interactive');
1850 $self->param('interactive',0);
1851 }
1852
1853 #get any existing value(s) for this attribute
1854 my $existings = $self->get_attrib_values($attribs,$code);
1855
1856 #add a new attribute if there is none...
1857 if (! @$existings ) {
1858 if ($self->user_proceed("Do you want to set $name attrib (code = $code) to value $value ?")) {
1859 my $r = $self->store_new_attribute($id,$code,$value);
1860
1861 #reset interactive parameter
1862 $self->param('interactive',$int_before) if (! $interact);
1863 return $r;
1864 }
1865 }
1866 #...warn and exit if you're trying to update more than one value for the same attribute...
1867 elsif (scalar @$existings > 1) {
1868 $self->log_warning("You shouldn't be trying to update multiple attributes with the same code at once ($name:$code,$value), looks like you have duplicate entries in the (seq_region_)attrib table\n");
1869 exit;
1870 }
1871
1872 #...or update an attribute with new values...
1873 else {
1874 my $existing = $existings->[0];
1875 if ($existing ne $value) {
1876 if ($self->user_proceed("Do you want to reset $name attrib (code = $code) from $existing to $value ?")) {
1877 my $r = $self->update_attribute($id,$code,$value);
1878 $self->param('interactive',$int_before) if (! $interact);
1879 push @$r, $existing;
1880 return $r;
1881 }
1882 }
1883 #...or make no change
1884 else {
1885 $self->param('interactive',$int_before) if (! $interact);
1886 return [];
1887 }
1888 }
1889 }
1890
1891 =head2 _get_attrib_id
1892
1893 Arg[1] : attrib_type.code
1894 Arg[2] : database handle
1895 Example : $self->_get_attrib_id('name',$dbh)
1896 Description : get attrib_type.attrib_type_id from a attrib_type.code
1897 Return type : attrib_type.attrib_type_id
1898 Caller : internal
1899 Status : stable
1900
1901 =cut
1902
1903 sub _get_attrib_id {
1904 my $self = shift;
1905 my $attrib_code = shift;
1906 my $dbh = shift;
1907 my ($attrib_id) = $dbh->selectrow_array(
1908 qq(select attrib_type_id
1909 from attrib_type
1910 where code = ?),
1911 {},
1912 ($attrib_code)
1913 );
1914 if (! $attrib_id) {
1915 $self->log_warning("There is no attrib_type_id for code $attrib_code, please patch the attrib_table\n");
1916 exit;
1917 }
1918 else {
1919 return $attrib_id;
1920 }
1921 }
1922
1923 =head2 store_new_attribute
1924
1925 Arg[1] : seq_region.seq_region_id
1926 Arg[2] : attrib_type.code
1927 Arg[3] : attrib_type.value
1928 ARG[4] : table to update (seq_region_attribute by default)
1929 Example : $support->store_new_attribute(23,name,5);
1930 Description : uses MySQL to store an entry (code and value) in an attribute table
1931 (seq_region_attrib by default)
1932 Return type : array_ref
1933 Caller : general
1934 Status : stable
1935
1936 =cut
1937
1938 sub store_new_attribute {
1939 my $self = shift;
1940 my $sr_id = shift;
1941 my $attrib_code = shift;
1942 my $attrib_value = shift || '';
1943 my $table = shift || 'seq_region_attrib';
1944
1945 #get database handle
1946 my $dbh = $self->get_dbconnection('loutre');
1947 #get attrib_type_id for this particular attribute
1948 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
1949 #store
1950 my $r = $dbh->do(
1951 qq(insert into $table
1952 values (?,?,?)),
1953 {},
1954 ($sr_id,$attrib_id,$attrib_value)
1955 );
1956 return ['Stored',$r];
1957 }
1958
1959 =head2 update_attribute
1960
1961 Arg[1] : seq_region.seq_region_id
1962 Arg[2] : attrib_type.code
1963 Arg[3] : attrib_type.value
1964 ARG[4] : table to update (seq_region_attribute by default)
1965 Example : $support->update_attribute(23,name,5);
1966 Description : uses MySQL to update an attribute table (seq_region_attrib by default)
1967 Return type : array_ref
1968 Caller : general
1969 Status : stable
1970
1971 =cut
1972
1973 sub update_attribute {
1974 my $self = shift;
1975 my $sr_id = shift;
1976 my $attrib_code = shift;
1977 my $attrib_value = shift;
1978 my $table = shift || 'seq_region_attrib';
1979 my $dbh = $self->get_dbconnection('loutre');
1980 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
1981 #update
1982 my $r = $dbh->do(
1983 qq(update $table
1984 set value = ?
1985 where seq_region_id = $sr_id
1986 and attrib_type_id = $attrib_id),
1987 {},
1988 ($attrib_value)
1989 );
1990 return ['Updated',$r];
1991 }
1992
1993
1994 =head2 remove_duplicate_attribs
1995
1996 Arg[1] : db handle
1997 Arg[2] : table
1998 Example : $support->remove_duplicate_attribs($dbh,'gene');
1999 Description : uses MySQL to remove duplicate entries from an attribute table
2000 Return type : none
2001 Caller : general
2002 Status : stable
2003
2004 =cut
2005
2006 sub remove_duplicate_attribs {
2007 my $self = shift;
2008 my $dbh = shift;
2009 my $table = shift;
2010 $dbh->do(qq(create table nondup_${table}_attrib like ${table}_attrib));
2011 $dbh->do(qq(insert into nondup_${table}_attrib (select ${table}_id, attrib_type_id, value from ${table}_attrib group by ${table}_id, attrib_type_id, value)));
2012 $dbh->do(qq(delete from ${table}_attrib));
2013 $dbh->do(qq(insert into ${table}_attrib (select ${table}_id, attrib_type_id, value from nondup_${table}_attrib)));
2014 $dbh->do(qq(drop table nondup_${table}_attrib));
2015 }
2016
2017
2018 1;