Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/ConversionSupport.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and | |
| 24 schema conversion scripts | |
| 25 | |
| 26 =head1 SYNOPSIS | |
| 27 | |
| 28 my $serverroot = '/path/to/ensembl'; | |
| 29 my $support = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot); | |
| 30 | |
| 31 # parse common options | |
| 32 $support->parse_common_options; | |
| 33 | |
| 34 # parse extra options for your script | |
| 35 $support->parse_extra_options( 'string_opt=s', 'numeric_opt=n' ); | |
| 36 | |
| 37 # ask user if he wants to run script with these parameters | |
| 38 $support->confirm_params; | |
| 39 | |
| 40 # see individual method documentation for more stuff | |
| 41 | |
| 42 =head1 DESCRIPTION | |
| 43 | |
| 44 This module is a collection of common methods and provides helper | |
| 45 functions for the Vega release and schema conversion scripts. Amongst | |
| 46 others, it reads options from a config file, parses commandline options | |
| 47 and does logging. | |
| 48 | |
| 49 =head1 METHODS | |
| 50 | |
| 51 =cut | |
| 52 | |
| 53 package Bio::EnsEMBL::Utils::ConversionSupport; | |
| 54 | |
| 55 use strict; | |
| 56 use warnings; | |
| 57 no warnings 'uninitialized'; | |
| 58 | |
| 59 use Getopt::Long; | |
| 60 use Text::Wrap; | |
| 61 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
| 62 use FindBin qw($Bin $Script); | |
| 63 use POSIX qw(strftime); | |
| 64 use Cwd qw(abs_path); | |
| 65 use DBI; | |
| 66 use Data::Dumper; | |
| 67 use Fcntl qw(:flock SEEK_END); | |
| 68 | |
| 69 my $species_c = 1; #counter to be used for each database connection made | |
| 70 | |
| 71 =head2 new | |
| 72 | |
| 73 Arg[1] : String $serverroot - root directory of your ensembl sandbox | |
| 74 Example : my $support = new Bio::EnsEMBL::Utils::ConversionSupport( | |
| 75 '/path/to/ensembl'); | |
| 76 Description : constructor | |
| 77 Return type : Bio::EnsEMBL::Utils::ConversionSupport object | |
| 78 Exceptions : thrown if no serverroot is provided | |
| 79 Caller : general | |
| 80 | |
| 81 =cut | |
| 82 | |
| 83 sub new { | |
| 84 my $class = shift; | |
| 85 (my $serverroot = shift) or throw("You must supply a serverroot."); | |
| 86 my $self = { | |
| 87 '_serverroot' => $serverroot, | |
| 88 '_param' => { interactive => 1 }, | |
| 89 '_warnings' => 0, | |
| 90 }; | |
| 91 bless ($self, $class); | |
| 92 return $self; | |
| 93 } | |
| 94 | |
| 95 =head2 parse_common_options | |
| 96 | |
| 97 Example : $support->parse_common_options; | |
| 98 Description : This method reads options from a configuration file and parses | |
| 99 some commandline options that are common to all scripts (like | |
| 100 db connection settings, help, dry-run). Commandline options | |
| 101 will override config file settings. | |
| 102 | |
| 103 All options will be accessible via $self->param('name'). | |
| 104 Return type : true on success | |
| 105 Exceptions : thrown if configuration file can't be opened | |
| 106 Caller : general | |
| 107 | |
| 108 =cut | |
| 109 | |
| 110 sub parse_common_options { | |
| 111 my $self = shift; | |
| 112 | |
| 113 # read commandline options | |
| 114 my %h; | |
| 115 Getopt::Long::Configure("pass_through"); | |
| 116 &GetOptions( \%h, | |
| 117 'dbname|db_name=s', | |
| 118 'host|dbhost|db_host=s', | |
| 119 'port|dbport|db_port=n', | |
| 120 'user|dbuser|db_user=s', | |
| 121 'pass|dbpass|db_pass=s', | |
| 122 'conffile|conf=s', | |
| 123 'logfile|log=s', | |
| 124 'nolog', | |
| 125 'logpath=s', | |
| 126 'log_base_path=s', | |
| 127 'logappend|log_append', | |
| 128 'verbose|v', | |
| 129 'interactive|i=s', | |
| 130 'dry_run|dry|n', | |
| 131 'help|h|?', | |
| 132 ); | |
| 133 | |
| 134 # reads config file | |
| 135 my $conffile = $h{'conffile'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini"; | |
| 136 $conffile = abs_path($conffile); | |
| 137 if (-e $conffile) { | |
| 138 open(CONF, $conffile) or throw( | |
| 139 "Unable to open configuration file $conffile for reading: $!"); | |
| 140 my $serverroot = $self->serverroot; | |
| 141 while (<CONF>) { | |
| 142 chomp; | |
| 143 | |
| 144 # remove comments | |
| 145 s/^[#;].*//; | |
| 146 s/\s+[;].*$//; | |
| 147 | |
| 148 # read options into internal parameter datastructure, removing whitespace | |
| 149 next unless (/(\w\S*)\s*=\s*(\S*)\s*/); | |
| 150 my $name = $1; | |
| 151 my $val = $2; | |
| 152 if ($val =~ /\$SERVERROOT/) { | |
| 153 $val =~ s/\$SERVERROOT/$serverroot/g; | |
| 154 $val = abs_path($val); | |
| 155 } | |
| 156 $self->param($name, $val); | |
| 157 } | |
| 158 $self->param('conffile', $conffile); | |
| 159 } | |
| 160 elsif ($conffile) { | |
| 161 warning("Unable to open configuration file $conffile for reading: $!"); | |
| 162 } | |
| 163 | |
| 164 # override configured parameter with commandline options | |
| 165 map { $self->param($_, $h{$_}) } keys %h; | |
| 166 | |
| 167 return (1) if $self->param('nolog'); | |
| 168 | |
| 169 # if logpath & logfile are not set, set them here to /ensemblweb/vega_dev/shared/logs/conversion/DBNAME/SCRIPNAME_NN.log | |
| 170 if (! defined($self->param('log_base_path'))) { | |
| 171 $self->param('log_base_path','/ensemblweb/shared/logs/conversion/'); | |
| 172 } | |
| 173 my $dbname = $self->param('dbname'); | |
| 174 $dbname =~ s/^vega_//; | |
| 175 if (not (defined($self->param('logpath')) )){ | |
| 176 $self->param('logpath', $self->param('log_base_path')."/".$dbname."/" ); | |
| 177 } | |
| 178 if ( not defined $self->param('logfile') ){ | |
| 179 my $log = $Script; | |
| 180 $log =~ s/.pl$//g; | |
| 181 my $counter; | |
| 182 for ($counter=1 ; (-e $self->param('logpath')."/".$log."_".sprintf("%03d", $counter).".log"); $counter++){ | |
| 183 # warn $self->param('logpath')."/".$log."_".$counter.".log"; | |
| 184 } | |
| 185 $self->param('logfile', $log."_".sprintf("%03d", $counter).".log"); | |
| 186 } | |
| 187 return(1); | |
| 188 } | |
| 189 | |
| 190 =head2 parse_extra_options | |
| 191 | |
| 192 Arg[1-N] : option descriptors that will be passed on to Getopt::Long | |
| 193 Example : $support->parse_extra_options('string_opt=s', 'numeric_opt=n'); | |
| 194 Description : Parse extra commandline options by passing them on to | |
| 195 Getopt::Long and storing parameters in $self->param('name). | |
| 196 Return type : true on success | |
| 197 Exceptions : none (caugth by $self->error) | |
| 198 Caller : general | |
| 199 | |
| 200 =cut | |
| 201 | |
| 202 sub parse_extra_options { | |
| 203 my ($self, @params) = @_; | |
| 204 Getopt::Long::Configure("no_pass_through"); | |
| 205 eval { | |
| 206 # catch warnings to pass to $self->error | |
| 207 local $SIG{__WARN__} = sub { die @_; }; | |
| 208 &GetOptions(\%{ $self->{'_param'} }, @params); | |
| 209 }; | |
| 210 $self->error($@) if $@; | |
| 211 return(1); | |
| 212 } | |
| 213 | |
| 214 =head2 allowed_params | |
| 215 | |
| 216 Arg[1-N] : (optional) List of allowed parameters to set | |
| 217 Example : my @allowed = $self->allowed_params(qw(param1 param2)); | |
| 218 Description : Getter/setter for allowed parameters. This is used by | |
| 219 $self->confirm_params() to avoid cluttering of output with | |
| 220 conffile entries not relevant for a given script. You can use | |
| 221 $self->get_common_params() as a shortcut to set them. | |
| 222 Return type : Array - list of allowed parameters | |
| 223 Exceptions : none | |
| 224 Caller : general | |
| 225 | |
| 226 =cut | |
| 227 | |
| 228 sub allowed_params { | |
| 229 my $self = shift; | |
| 230 | |
| 231 # setter | |
| 232 if (@_) { | |
| 233 @{ $self->{'_allowed_params'} } = @_; | |
| 234 } | |
| 235 | |
| 236 # getter | |
| 237 if (ref($self->{'_allowed_params'}) eq 'ARRAY') { | |
| 238 return @{ $self->{'_allowed_params'} }; | |
| 239 } else { | |
| 240 return (); | |
| 241 } | |
| 242 } | |
| 243 | |
| 244 =head2 get_common_params | |
| 245 | |
| 246 Example : my @allowed_params = $self->get_common_params, 'extra_param'; | |
| 247 Description : Returns a list of commonly used parameters in the conversion | |
| 248 scripts. Shortcut for setting allowed parameters with | |
| 249 $self->allowed_params(). | |
| 250 Return type : Array - list of common parameters | |
| 251 Exceptions : none | |
| 252 Caller : general | |
| 253 | |
| 254 =cut | |
| 255 | |
| 256 sub get_common_params { | |
| 257 return qw( | |
| 258 conffile | |
| 259 dbname | |
| 260 host | |
| 261 port | |
| 262 user | |
| 263 pass | |
| 264 nolog | |
| 265 logpath | |
| 266 log_base_path | |
| 267 logfile | |
| 268 logappend | |
| 269 verbose | |
| 270 interactive | |
| 271 dry_run | |
| 272 ); | |
| 273 } | |
| 274 | |
| 275 =head2 get_loutre_params | |
| 276 | |
| 277 Arg : (optional) return a list to parse or not | |
| 278 Example : $support->parse_extra_options($support->get_loutre_params('parse')) | |
| 279 Description : Returns a list of commonly used loutre db parameters - parse option is | |
| 280 simply used to distinguish between reporting and parsing parameters | |
| 281 Return type : Array - list of common parameters | |
| 282 Exceptions : none | |
| 283 Caller : general | |
| 284 | |
| 285 =cut | |
| 286 | |
| 287 sub get_loutre_params { | |
| 288 my ($self,$p) = @_; | |
| 289 if ($p) { | |
| 290 return qw( | |
| 291 loutrehost=s | |
| 292 loutreport=s | |
| 293 loutreuser=s | |
| 294 loutrepass:s | |
| 295 loutredbname=s | |
| 296 ); | |
| 297 } | |
| 298 else { | |
| 299 return qw( | |
| 300 loutrehost | |
| 301 loutreport | |
| 302 loutreuser | |
| 303 loutrepass | |
| 304 loutredbname | |
| 305 ); | |
| 306 } | |
| 307 } | |
| 308 | |
| 309 =head2 remove_vega_params | |
| 310 | |
| 311 Example : $support->remove_vega_params; | |
| 312 Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when | |
| 313 working exclusively with loutre | |
| 314 Return type : none | |
| 315 Exceptions : none | |
| 316 Caller : general | |
| 317 | |
| 318 =cut | |
| 319 | |
| 320 sub remove_vega_params { | |
| 321 my $self = shift; | |
| 322 foreach my $param (qw(dbname host port user pass)) { | |
| 323 $self->{'_param'}{$param} = undef; | |
| 324 } | |
| 325 } | |
| 326 | |
| 327 =head2 confirm_params | |
| 328 | |
| 329 Example : $support->confirm_params; | |
| 330 Description : Prints a table of parameters that were collected from config | |
| 331 file and commandline and asks user to confirm if he wants | |
| 332 to proceed. | |
| 333 Return type : true on success | |
| 334 Exceptions : none | |
| 335 Caller : general | |
| 336 | |
| 337 =cut | |
| 338 | |
| 339 sub confirm_params { | |
| 340 my $self = shift; | |
| 341 | |
| 342 # print parameter table | |
| 343 print "Running script with these parameters:\n\n"; | |
| 344 print $self->list_all_params; | |
| 345 | |
| 346 if ($self->param('host') eq 'ensweb-1-10') { | |
| 347 # ask user if he wants to proceed | |
| 348 exit unless $self->user_proceed("**************\n\n You're working on ensdb-1-10! Is that correct and you want to continue ?\n\n**************"); | |
| 349 } | |
| 350 else { | |
| 351 # ask user if he wants to proceed | |
| 352 exit unless $self->user_proceed("Continue?"); | |
| 353 } | |
| 354 return(1); | |
| 355 } | |
| 356 | |
| 357 =head2 list_all_params | |
| 358 | |
| 359 Example : print LOG $support->list_all_params; | |
| 360 Description : prints a table of the parameters used in the script | |
| 361 Return type : String - the table to print | |
| 362 Exceptions : none | |
| 363 Caller : general | |
| 364 | |
| 365 =cut | |
| 366 | |
| 367 sub list_all_params { | |
| 368 my $self = shift; | |
| 369 my $txt = sprintf " %-21s%-90s\n", qw(PARAMETER VALUE); | |
| 370 $txt .= " " . "-"x121 . "\n"; | |
| 371 $Text::Wrap::colums = 130; | |
| 372 my @params = $self->allowed_params; | |
| 373 foreach my $key (@params) { | |
| 374 my @vals = $self->param($key); | |
| 375 if (@vals) { | |
| 376 $txt .= Text::Wrap::wrap( sprintf(' %-21s', $key), | |
| 377 ' 'x24, | |
| 378 join(", ", @vals) | |
| 379 ) . "\n"; | |
| 380 } | |
| 381 } | |
| 382 $txt .= "\n"; | |
| 383 return $txt; | |
| 384 } | |
| 385 | |
| 386 =head2 create_commandline_options | |
| 387 | |
| 388 Arg[1] : Hashref $settings - hashref describing what to do | |
| 389 Allowed keys: | |
| 390 allowed_params => 0|1 # use all allowed parameters | |
| 391 exclude => [] # listref of parameters to exclude | |
| 392 replace => {param => newval} # replace value of param with | |
| 393 # newval | |
| 394 Example : $support->create_commandline_options({ | |
| 395 allowed_params => 1, | |
| 396 exclude => ['verbose'], | |
| 397 replace => { 'dbname' => 'homo_sapiens_vega_33_35e' } | |
| 398 }); | |
| 399 Description : Creates a commandline options string that can be passed to any | |
| 400 other script using ConversionSupport. | |
| 401 Return type : String - commandline options string | |
| 402 Exceptions : none | |
| 403 Caller : general | |
| 404 | |
| 405 =cut | |
| 406 | |
| 407 sub create_commandline_options { | |
| 408 my ($self, $settings, $param_hash) = @_; | |
| 409 my %param_hash = $param_hash ? %$param_hash : (); | |
| 410 | |
| 411 # get all allowed parameters | |
| 412 if ($settings->{'allowed_params'}) { | |
| 413 # exclude params explicitly stated | |
| 414 my %exclude = map { $_ => 1 } @{ $settings->{'exclude'} || [] }; | |
| 415 foreach my $param ($self->allowed_params) { | |
| 416 unless ($exclude{$param}) { | |
| 417 my ($first, @rest) = $self->param($param); | |
| 418 next unless (defined($first)); | |
| 419 | |
| 420 if (@rest) { | |
| 421 $first = join(",", $first, @rest); | |
| 422 } | |
| 423 $param_hash{$param} = $first; | |
| 424 } | |
| 425 } | |
| 426 } | |
| 427 | |
| 428 # replace values | |
| 429 foreach my $key (keys %{ $settings->{'replace'} || {} }) { | |
| 430 $param_hash{$key} = $settings->{'replace'}->{$key}; | |
| 431 } | |
| 432 | |
| 433 # create the commandline options string | |
| 434 my $options_string; | |
| 435 foreach my $param (keys %param_hash) { | |
| 436 $options_string .= sprintf("--%s %s ", $param, $param_hash{$param}); | |
| 437 } | |
| 438 return $options_string; | |
| 439 } | |
| 440 | |
| 441 =head2 check_required_params | |
| 442 | |
| 443 Arg[1-N] : List @params - parameters to check | |
| 444 Example : $self->check_required_params(qw(dbname host port)); | |
| 445 Description : Checks $self->param to make sure the requested parameters | |
| 446 have been set. Dies if parameters are missing. | |
| 447 Return type : true on success | |
| 448 Exceptions : none | |
| 449 Caller : general | |
| 450 | |
| 451 =cut | |
| 452 | |
| 453 sub check_required_params { | |
| 454 my ($self, @params) = @_; | |
| 455 my @missing = (); | |
| 456 foreach my $param (@params) { | |
| 457 push @missing, $param unless defined $self->param($param); | |
| 458 } | |
| 459 if (@missing) { | |
| 460 throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n"); | |
| 461 } | |
| 462 return(1); | |
| 463 } | |
| 464 | |
| 465 =head2 user_proceed | |
| 466 | |
| 467 Arg[1] : (optional) String $text - notification text to present to user | |
| 468 Example : # run a code snipped conditionally | |
| 469 if ($support->user_proceed("Run the next code snipped?")) { | |
| 470 # run some code | |
| 471 } | |
| 472 | |
| 473 # exit if requested by user | |
| 474 exit unless ($support->user_proceed("Want to continue?")); | |
| 475 Description : If running interactively, the user is asked if he wants to | |
| 476 perform a script action. If he doesn't, this section is skipped | |
| 477 and the script proceeds with the code. When running | |
| 478 non-interactively, the section is run by default. | |
| 479 Return type : TRUE to proceed, FALSE to skip. | |
| 480 Exceptions : none | |
| 481 Caller : general | |
| 482 | |
| 483 =cut | |
| 484 | |
| 485 sub user_proceed { | |
| 486 my ($self, $text) = @_; | |
| 487 | |
| 488 if ($self->param('interactive')) { | |
| 489 print "$text\n" if $text; | |
| 490 print "[y/N] "; | |
| 491 my $input = lc(<>); | |
| 492 chomp $input; | |
| 493 unless ($input eq 'y') { | |
| 494 print "Skipping.\n"; | |
| 495 return(0); | |
| 496 } | |
| 497 } | |
| 498 | |
| 499 return(1); | |
| 500 } | |
| 501 | |
| 502 =head2 user_confirm | |
| 503 | |
| 504 Description : DEPRECATED - please use user_proceed() instead | |
| 505 | |
| 506 =cut | |
| 507 | |
| 508 sub user_confirm { | |
| 509 my $self = shift; | |
| 510 exit unless $self->user_proceed("Continue?"); | |
| 511 } | |
| 512 | |
| 513 =head2 read_user_input | |
| 514 | |
| 515 Arg[1] : (optional) String $text - notification text to present to user | |
| 516 Example : my $ret = $support->read_user_input("Choose a number [1/2/3]"); | |
| 517 if ($ret == 1) { | |
| 518 # do something | |
| 519 } elsif ($ret == 2) { | |
| 520 # do something else | |
| 521 } | |
| 522 Description : If running interactively, the user is asked for input. | |
| 523 Return type : String - user's input | |
| 524 Exceptions : none | |
| 525 Caller : general | |
| 526 | |
| 527 =cut | |
| 528 | |
| 529 sub read_user_input { | |
| 530 my ($self, $text) = @_; | |
| 531 | |
| 532 if ($self->param('interactive')) { | |
| 533 print "$text\n" if $text; | |
| 534 my $input = <>; | |
| 535 chomp $input; | |
| 536 return $input; | |
| 537 } | |
| 538 } | |
| 539 | |
| 540 =head2 comma_to_list | |
| 541 | |
| 542 Arg[1-N] : list of parameter names to parse | |
| 543 Example : $support->comma_to_list('chromosomes'); | |
| 544 Description : Transparently converts comma-separated lists into arrays (to | |
| 545 allow different styles of commandline options, see perldoc | |
| 546 Getopt::Long for details). Parameters are converted in place | |
| 547 (accessible through $self->param('name')). | |
| 548 Return type : true on success | |
| 549 Exceptions : none | |
| 550 Caller : general | |
| 551 | |
| 552 =cut | |
| 553 | |
| 554 sub comma_to_list { | |
| 555 my $self = shift; | |
| 556 foreach my $param (@_) { | |
| 557 $self->param($param, | |
| 558 split (/,/, join (',', $self->param($param)))); | |
| 559 } | |
| 560 return(1); | |
| 561 } | |
| 562 | |
| 563 =head2 list_or_file | |
| 564 | |
| 565 Arg[1] : Name of parameter to parse | |
| 566 Example : $support->list_or_file('gene'); | |
| 567 Description : Determines whether a parameter holds a list or it is a filename | |
| 568 to read the list entries from. | |
| 569 Return type : true on success | |
| 570 Exceptions : thrown if list file can't be opened | |
| 571 Caller : general | |
| 572 | |
| 573 =cut | |
| 574 | |
| 575 sub list_or_file { | |
| 576 my ($self, $param) = @_; | |
| 577 my @vals = $self->param($param); | |
| 578 return unless (@vals); | |
| 579 | |
| 580 my $firstval = $vals[0]; | |
| 581 if (scalar(@vals) == 1 && -e $firstval) { | |
| 582 # we didn't get a list of values, but a file to read values from | |
| 583 @vals = (); | |
| 584 open(IN, $firstval) or throw("Cannot open $firstval for reading: $!"); | |
| 585 while(<IN>){ | |
| 586 chomp; | |
| 587 push(@vals, $_); | |
| 588 } | |
| 589 close(IN); | |
| 590 $self->param($param, @vals); | |
| 591 } | |
| 592 $self->comma_to_list($param); | |
| 593 return(1); | |
| 594 } | |
| 595 | |
| 596 =head2 param | |
| 597 | |
| 598 Arg[1] : Parameter name | |
| 599 Arg[2-N] : (optional) List of values to set | |
| 600 Example : my $dbname = $support->param('dbname'); | |
| 601 $support->param('port', 3306); | |
| 602 $support->param('chromosomes', 1, 6, 'X'); | |
| 603 Description : Getter/setter for parameters. Accepts single-value params and | |
| 604 list params. | |
| 605 Return type : Scalar value for single-value parameters, array of values for | |
| 606 list parameters | |
| 607 Exceptions : thrown if no parameter name is supplied | |
| 608 Caller : general | |
| 609 | |
| 610 =cut | |
| 611 | |
| 612 sub param { | |
| 613 my $self = shift; | |
| 614 my $name = shift or throw("You must supply a parameter name"); | |
| 615 | |
| 616 # setter | |
| 617 if (@_) { | |
| 618 if (scalar(@_) == 1) { | |
| 619 # single value | |
| 620 $self->{'_param'}->{$name} = shift; | |
| 621 } else { | |
| 622 # list of values | |
| 623 undef $self->{'_param'}->{$name}; | |
| 624 @{ $self->{'_param'}->{$name} } = @_; | |
| 625 } | |
| 626 } | |
| 627 | |
| 628 # getter | |
| 629 if (ref($self->{'_param'}->{$name}) eq 'ARRAY') { | |
| 630 # list parameter | |
| 631 return @{ $self->{'_param'}->{$name} }; | |
| 632 } elsif (defined($self->{'_param'}->{$name})) { | |
| 633 # single-value parameter | |
| 634 return $self->{'_param'}->{$name}; | |
| 635 } else { | |
| 636 return (); | |
| 637 } | |
| 638 } | |
| 639 | |
| 640 =head2 error | |
| 641 | |
| 642 Arg[1] : (optional) String - error message | |
| 643 Example : $support->error("An error occurred: $@"); | |
| 644 exit(0) if $support->error; | |
| 645 Description : Getter/setter for error messages | |
| 646 Return type : String - error message | |
| 647 Exceptions : none | |
| 648 Caller : general | |
| 649 | |
| 650 =cut | |
| 651 | |
| 652 sub error { | |
| 653 my $self = shift; | |
| 654 $self->{'_error'} = shift if (@_); | |
| 655 return $self->{'_error'}; | |
| 656 } | |
| 657 | |
| 658 =head2 warnings | |
| 659 | |
| 660 Example : print LOG "There were ".$support->warnings." warnings.\n"; | |
| 661 Description : Returns the number of warnings encountered while running the | |
| 662 script (the warning counter is increased by $self->log_warning). | |
| 663 Return type : Int - number of warnings | |
| 664 Exceptions : none | |
| 665 Caller : general | |
| 666 | |
| 667 =cut | |
| 668 | |
| 669 sub warnings { | |
| 670 my $self = shift; | |
| 671 return $self->{'_warnings'}; | |
| 672 } | |
| 673 | |
| 674 =head2 serverroot | |
| 675 | |
| 676 Arg[1] : (optional) String - root directory of your ensembl sandbox | |
| 677 Example : my $serverroot = $support->serverroot; | |
| 678 Description : Getter/setter for the root directory of your ensembl sandbox. | |
| 679 This is set when ConversionSupport object is created, so | |
| 680 usually only used as a getter. | |
| 681 Return type : String - the server root directory | |
| 682 Exceptions : none | |
| 683 Caller : general | |
| 684 | |
| 685 =cut | |
| 686 | |
| 687 sub serverroot { | |
| 688 my $self = shift; | |
| 689 $self->{'_serverroot'} = shift if (@_); | |
| 690 return $self->{'_serverroot'}; | |
| 691 } | |
| 692 | |
| 693 =head2 get_database | |
| 694 | |
| 695 Arg[1] : String $database - the type of database to connect to | |
| 696 (eg core, otter) | |
| 697 Arg[2] : (optional) String $prefix - the prefix used for retrieving the | |
| 698 connection settings from the configuration | |
| 699 Example : my $db = $support->get_database('core'); | |
| 700 Description : Connects to the database specified. | |
| 701 Return type : DBAdaptor of the appropriate type | |
| 702 Exceptions : thrown if asking for unknown database | |
| 703 Caller : general | |
| 704 | |
| 705 =cut | |
| 706 | |
| 707 sub get_database { | |
| 708 my $self = shift; | |
| 709 my $database = shift or throw("You must provide a database"); | |
| 710 my $prefix = shift || ''; | |
| 711 $self->check_required_params( | |
| 712 "${prefix}host", | |
| 713 "${prefix}port", | |
| 714 "${prefix}user", | |
| 715 "${prefix}dbname", | |
| 716 ); | |
| 717 my %adaptors = ( | |
| 718 core => 'Bio::EnsEMBL::DBSQL::DBAdaptor', | |
| 719 ensembl => 'Bio::EnsEMBL::DBSQL::DBAdaptor', | |
| 720 evega => 'Bio::EnsEMBL::DBSQL::DBAdaptor', | |
| 721 otter => 'Bio::Otter::DBSQL::DBAdaptor', | |
| 722 vega => 'Bio::Otter::DBSQL::DBAdaptor', | |
| 723 compara => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor', | |
| 724 loutre => 'Bio::Vega::DBSQL::DBAdaptor', | |
| 725 ); | |
| 726 throw("Unknown database: $database") unless $adaptors{$database}; | |
| 727 | |
| 728 $self->dynamic_use($adaptors{$database}); | |
| 729 my $species = 'species' . $species_c; | |
| 730 my $dba = $adaptors{$database}->new( | |
| 731 -host => $self->param("${prefix}host"), | |
| 732 -port => $self->param("${prefix}port"), | |
| 733 -user => $self->param("${prefix}user"), | |
| 734 -pass => $self->param("${prefix}pass") || '', | |
| 735 -dbname => $self->param("${prefix}dbname"), | |
| 736 -group => $database, | |
| 737 -species => $species, | |
| 738 ); | |
| 739 #can use this approach to get dna from another db | |
| 740 # my $dna_db = $adaptors{$database}->new( | |
| 741 # -host => 'otterlive', | |
| 742 # -port => '3301', | |
| 743 # -user => $self->param("${prefix}user"), | |
| 744 # -pass => $self->param("${prefix}pass"), | |
| 745 # -dbname => 'loutre_human', | |
| 746 # ); | |
| 747 # $dba->dnadb($dna_db); | |
| 748 | |
| 749 # otherwise explicitely set the dnadb to itself - by default the Registry assumes | |
| 750 # a group 'core' for this now | |
| 751 $dba->dnadb($dba); | |
| 752 | |
| 753 $species_c++; | |
| 754 | |
| 755 $self->{'_dba'}->{$database} = $dba; | |
| 756 $self->{'_dba'}->{'default'} = $dba unless $self->{'_dba'}->{'default'}; | |
| 757 return $self->{'_dba'}->{$database}; | |
| 758 } | |
| 759 | |
| 760 | |
| 761 =head2 get_dbconnection | |
| 762 | |
| 763 Arg[1] : (optional) String $prefix - the prefix used for retrieving the | |
| 764 connection settings from the configuration | |
| 765 Example : my $dbh = $self->get_dbconnection; | |
| 766 Description : Connects to the database server specified. You don't have to | |
| 767 specify a database name (this is useful for running commands | |
| 768 like $dbh->do('show databases')). | |
| 769 Return type : DBI database handle | |
| 770 Exceptions : thrown if connection fails | |
| 771 Caller : general | |
| 772 Status : At Risk | |
| 773 | |
| 774 =cut | |
| 775 | |
| 776 sub get_dbconnection { | |
| 777 my $self = shift; | |
| 778 my $prefix = shift; | |
| 779 | |
| 780 $self->check_required_params( | |
| 781 "${prefix}host", | |
| 782 "${prefix}port", | |
| 783 "${prefix}user", | |
| 784 ); | |
| 785 | |
| 786 my $dsn = "DBI:" . ($self->param('driver')||'mysql') . | |
| 787 ":host=" . $self->param("${prefix}host") . | |
| 788 ";port=" . $self->param("${prefix}port"); | |
| 789 | |
| 790 if ($self->param("${prefix}dbname")) { | |
| 791 $dsn .= ";dbname=".$self->param("${prefix}dbname"); | |
| 792 } | |
| 793 | |
| 794 # warn $dsn; | |
| 795 | |
| 796 my $dbh; | |
| 797 eval{ | |
| 798 $dbh = DBI->connect($dsn, $self->param("${prefix}user"), | |
| 799 $self->param("${prefix}pass"), {'RaiseError' => 1, 'PrintError' => 0}); | |
| 800 }; | |
| 801 | |
| 802 if (!$dbh || $@ || !$dbh->ping) { | |
| 803 $self->log_error("Could not connect to db server as user ". | |
| 804 $self->param("${prefix}user") . | |
| 805 " using [$dsn] as a locator:\n" . $DBI::errstr . $@); | |
| 806 } | |
| 807 | |
| 808 $self->{'_dbh'} = $dbh; | |
| 809 return $self->{'_dbh'}; | |
| 810 | |
| 811 } | |
| 812 | |
| 813 | |
| 814 =head2 dba | |
| 815 | |
| 816 Arg[1] : (optional) String $database - type of db apaptor to retrieve | |
| 817 Example : my $dba = $support->dba; | |
| 818 Description : Getter for database adaptor. Returns default (i.e. created | |
| 819 first) db adaptor if no argument is provided. | |
| 820 Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor | |
| 821 Exceptions : none | |
| 822 Caller : general | |
| 823 | |
| 824 =cut | |
| 825 | |
| 826 sub dba { | |
| 827 my ($self, $database) = shift; | |
| 828 return $self->{'_dba'}->{$database} || $self->{'_dba'}->{'default'}; | |
| 829 } | |
| 830 | |
| 831 =head2 dynamic_use | |
| 832 | |
| 833 Arg [1] : String $classname - The name of the class to require/import | |
| 834 Example : $self->dynamic_use('Bio::EnsEMBL::DBSQL::DBAdaptor'); | |
| 835 Description: Requires and imports the methods for the classname provided, | |
| 836 checks the symbol table so that it doesnot re-require modules | |
| 837 that have already been required. | |
| 838 Returntype : true on success | |
| 839 Exceptions : Warns to standard error if module fails to compile | |
| 840 Caller : internal | |
| 841 | |
| 842 =cut | |
| 843 | |
| 844 sub dynamic_use { | |
| 845 my ($self, $classname) = @_; | |
| 846 my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ? ($1,$2) : ('::', $classname); | |
| 847 | |
| 848 no strict 'refs'; | |
| 849 # return if module has already been imported | |
| 850 return 1 if $parent_namespace->{$module.'::'} && %{ $parent_namespace->{$module.'::'}||{} }; | |
| 851 | |
| 852 eval "require $classname"; | |
| 853 throw("Failed to require $classname: $@") if ($@); | |
| 854 $classname->import(); | |
| 855 | |
| 856 return 1; | |
| 857 } | |
| 858 | |
| 859 =head2 get_chrlength | |
| 860 | |
| 861 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba | |
| 862 Arg[2] : (optional) String $version - coord_system version | |
| 863 Arg[3] : (optional) String $type - type of region eg chromsome (defaults to 'toplevel') | |
| 864 Arg[4] : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX) | |
| 865 Arg[5] : (optional) Override chromosome parameter filtering with this array reference. Empty denotes all. | |
| 866 Example : my $chr_length = $support->get_chrlength($dba); | |
| 867 Description : Get all chromosomes and their length from the database. Return | |
| 868 chr_name/length for the chromosomes the user requested (or all | |
| 869 chromosomes by default) | |
| 870 Return type : Hashref - chromosome_name => length | |
| 871 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor | |
| 872 Caller : general | |
| 873 | |
| 874 =cut | |
| 875 | |
| 876 sub get_chrlength { | |
| 877 my ($self, $dba, $version,$type,$include_non_reference,$chroms) = @_; | |
| 878 $dba ||= $self->dba; | |
| 879 $type ||= 'toplevel'; | |
| 880 throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") | |
| 881 unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')); | |
| 882 | |
| 883 my $sa = $dba->get_SliceAdaptor; | |
| 884 | |
| 885 my @chromosomes = map { $_->seq_region_name } | |
| 886 @{ $sa->fetch_all($type,$version,$include_non_reference) }; | |
| 887 my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes; | |
| 888 | |
| 889 my @wanted = $self->param('chromosomes'); | |
| 890 @wanted = @$chroms if defined $chroms and ref($chroms) eq 'ARRAY'; | |
| 891 | |
| 892 if (@wanted) { | |
| 893 # check if user supplied invalid chromosome names | |
| 894 foreach my $chr (@wanted) { | |
| 895 my $found = 0; | |
| 896 foreach my $chr_from_db (keys %chr) { | |
| 897 if ($chr_from_db eq $chr) { | |
| 898 $found = 1; | |
| 899 last; | |
| 900 } | |
| 901 } | |
| 902 unless ($found) { | |
| 903 warning("Didn't find chromosome $chr in database " . | |
| 904 $self->param('dbname')); | |
| 905 } | |
| 906 } | |
| 907 | |
| 908 # filter to requested chromosomes only | |
| 909 HASH: | |
| 910 foreach my $chr_from_db (keys %chr) { | |
| 911 foreach my $chr (@wanted) { | |
| 912 if ($chr_from_db eq $chr) { | |
| 913 next HASH; | |
| 914 } | |
| 915 } | |
| 916 delete($chr{$chr_from_db}); | |
| 917 } | |
| 918 } | |
| 919 | |
| 920 return \%chr; | |
| 921 } | |
| 922 | |
| 923 =head2 get_ensembl_chr_mapping | |
| 924 | |
| 925 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba | |
| 926 Arg[2] : (optional) String $version - coord_system version | |
| 927 Example : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba); | |
| 928 Description : Gets a mapping between Vega chromosome names and their | |
| 929 equivalent Ensembl chromosomes. Works with non-reference chromosomes | |
| 930 Return type : Hashref - Vega name => Ensembl name | |
| 931 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor | |
| 932 Caller : general | |
| 933 | |
| 934 =cut | |
| 935 | |
| 936 sub get_ensembl_chr_mapping { | |
| 937 my ($self, $dba, $version) = @_; | |
| 938 $dba ||= $self->dba; | |
| 939 throw("get_ensembl_chr_mapping should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')); | |
| 940 | |
| 941 my $sa = $dba->get_SliceAdaptor; | |
| 942 my @chromosomes = map { $_->seq_region_name } | |
| 943 @{ $sa->fetch_all('chromosome', $version, 1) }; | |
| 944 | |
| 945 my %chrs; | |
| 946 foreach my $chr (@chromosomes) { | |
| 947 my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version); | |
| 948 my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') }; | |
| 949 if ($ensembl_name_attr) { | |
| 950 $chrs{$chr} = $ensembl_name_attr->value; | |
| 951 } else { | |
| 952 $chrs{$chr} = $chr; | |
| 953 } | |
| 954 } | |
| 955 return \%chrs; | |
| 956 } | |
| 957 | |
| 958 =head2 get_taxonomy_id | |
| 959 | |
| 960 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba | |
| 961 Example : my $sid = $support->get_taxonony_id($dba); | |
| 962 Description : Retrieves the taxononmy ID from the meta table | |
| 963 Return type : Int - the taxonomy ID | |
| 964 Exceptions : thrown if no taxonomy ID is found in the database | |
| 965 Caller : general | |
| 966 | |
| 967 =cut | |
| 968 | |
| 969 sub get_taxonomy_id { | |
| 970 my ($self, $dba) = @_; | |
| 971 $dba ||= $self->dba; | |
| 972 my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id"'; | |
| 973 my $sth = $dba->dbc->db_handle->prepare($sql); | |
| 974 $sth->execute; | |
| 975 my ($tid) = $sth->fetchrow_array; | |
| 976 $sth->finish; | |
| 977 $self->throw("Could not determine taxonomy_id from database.") unless $tid; | |
| 978 return $tid; | |
| 979 } | |
| 980 | |
| 981 =head2 get_species_scientific_name | |
| 982 | |
| 983 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba | |
| 984 Example : my $species = $support->get_species_scientific_name($dba); | |
| 985 Description : Retrieves the species scientific name (Genus species) from the | |
| 986 meta table | |
| 987 Return type : String - species scientific name | |
| 988 Exceptions : thrown if species name can not be determined from db | |
| 989 Caller : general | |
| 990 | |
| 991 =cut | |
| 992 | |
| 993 sub get_species_scientific_name { | |
| 994 my ($self, $dba) = @_; | |
| 995 $dba ||= $self->dba; | |
| 996 my $sql = "SELECT meta_value FROM meta WHERE meta_key = \'species.scientific_name\'"; | |
| 997 my $sth = $dba->dbc->db_handle->prepare($sql); | |
| 998 $sth->execute; | |
| 999 my @sp; | |
| 1000 while (my @row = $sth->fetchrow_array) { | |
| 1001 push @sp, $row[0]; | |
| 1002 } | |
| 1003 if (! @sp || @sp > 1) { | |
| 1004 $self->throw("Could not retrieve a single species scientific name from database."); | |
| 1005 } | |
| 1006 $sth->finish; | |
| 1007 my $species = $sp[0]; | |
| 1008 $species =~ s/ /_/g; | |
| 1009 return $species; | |
| 1010 } | |
| 1011 | |
| 1012 =head2 species | |
| 1013 | |
| 1014 Arg[1] : (optional) String $species - species name to set | |
| 1015 Example : my $species = $support->species; | |
| 1016 my $url = "http://vega.sanger.ac.uk/$species/"; | |
| 1017 Description : Getter/setter for species name (Genus_species). If not set, it's | |
| 1018 determined from database's meta table | |
| 1019 Return type : String - species name | |
| 1020 Exceptions : none | |
| 1021 Caller : general | |
| 1022 | |
| 1023 =cut | |
| 1024 | |
| 1025 sub species { | |
| 1026 my $self = shift; | |
| 1027 $self->{'_species'} = shift if (@_); | |
| 1028 # get species name from database if not set | |
| 1029 unless ($self->{'_species'}) { | |
| 1030 $self->{'_species'} = $self->get_species_scientific_name; | |
| 1031 } | |
| 1032 return $self->{'_species'}; | |
| 1033 } | |
| 1034 | |
| 1035 =head2 sort_chromosomes | |
| 1036 | |
| 1037 Arg[1] : (optional) Hashref $chr_hashref - Hashref with chr_name as keys | |
| 1038 Example : my $chr = { '6-COX' => 1, '1' => 1, 'X' => 1 }; | |
| 1039 my @sorted = $support->sort_chromosomes($chr); | |
| 1040 Description : Sorts chromosomes in an intuitive way (numerically, then | |
| 1041 alphabetically). If no chromosome hashref is passed, it's | |
| 1042 retrieve by calling $self->get_chrlength() | |
| 1043 Return type : List - sorted chromosome names | |
| 1044 Exceptions : thrown if no hashref is provided | |
| 1045 Caller : general | |
| 1046 | |
| 1047 =cut | |
| 1048 | |
| 1049 sub sort_chromosomes { | |
| 1050 my ($self, $chr_hashref) = @_; | |
| 1051 $chr_hashref = $self->get_chrlength unless ($chr_hashref); | |
| 1052 throw("You have to pass a hashref of your chromosomes") | |
| 1053 unless ($chr_hashref and ref($chr_hashref) eq 'HASH'); | |
| 1054 return (sort _by_chr_num keys %$chr_hashref); | |
| 1055 } | |
| 1056 | |
| 1057 =head2 _by_chr_num | |
| 1058 | |
| 1059 Example : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7); | |
| 1060 Description : Subroutine to use in sort for sorting chromosomes. Sorts | |
| 1061 numerically, then alphabetically | |
| 1062 Return type : values to be used by sort | |
| 1063 Exceptions : none | |
| 1064 Caller : internal ($self->sort_chromosomes) | |
| 1065 | |
| 1066 =cut | |
| 1067 | |
| 1068 sub _by_chr_num { | |
| 1069 my @awords = split /-/, $a; | |
| 1070 my @bwords = split /-/, $b; | |
| 1071 | |
| 1072 my $anum = $awords[0]; | |
| 1073 my $bnum = $bwords[0]; | |
| 1074 | |
| 1075 if ($anum !~ /^[0-9]*$/) { | |
| 1076 if ($bnum !~ /^[0-9]*$/) { | |
| 1077 return $anum cmp $bnum; | |
| 1078 } else { | |
| 1079 return 1; | |
| 1080 } | |
| 1081 } | |
| 1082 if ($bnum !~ /^[0-9]*$/) { | |
| 1083 return -1; | |
| 1084 } | |
| 1085 | |
| 1086 if ($anum <=> $bnum) { | |
| 1087 return $anum <=> $bnum; | |
| 1088 } else { | |
| 1089 if ($#awords == 0) { | |
| 1090 return -1; | |
| 1091 } elsif ($#bwords == 0) { | |
| 1092 return 1; | |
| 1093 } else { | |
| 1094 return $awords[1] cmp $bwords[1]; | |
| 1095 } | |
| 1096 } | |
| 1097 } | |
| 1098 | |
| 1099 =head2 split_chromosomes_by_size | |
| 1100 | |
| 1101 Arg[1] : (optional) Int $cutoff - the cutoff in bp between small and | |
| 1102 large chromosomes | |
| 1103 Arg[2] : (optional) Boolean to include duplicate regions, ie PAR or not | |
| 1104 (default is no) | |
| 1105 Arg[3] : (optional) Coordsystem version to retrieve | |
| 1106 | |
| 1107 Example : my $chr_slices = $support->split_chromosomes_by_size; | |
| 1108 foreach my $block_size (keys %{ $chr_slices }) { | |
| 1109 print "Chromosomes with blocksize $block_size: "; | |
| 1110 print join(", ", map { $_->seq_region_name } | |
| 1111 @{ $chr_slices->{$block_size} }); | |
| 1112 } | |
| 1113 Description : Determines block sizes for storing DensityFeatures on | |
| 1114 chromosomes, and return slices for each chromosome. The block | |
| 1115 size is determined so that you have 150 bins for the smallest | |
| 1116 chromosome over 5 Mb in length. For chromosomes smaller than 5 | |
| 1117 Mb, an additional smaller block size is used to yield 150 bins | |
| 1118 for the overall smallest chromosome. This will result in | |
| 1119 reasonable resolution for small chromosomes and high | |
| 1120 performance for big ones. Does not return non-reference seq_regions | |
| 1121 Return type : Hashref (key: block size; value: Arrayref of chromosome | |
| 1122 Bio::EnsEMBL::Slices) | |
| 1123 Exceptions : none | |
| 1124 Caller : density scripts | |
| 1125 | |
| 1126 =cut | |
| 1127 | |
| 1128 sub split_chromosomes_by_size { | |
| 1129 my $self = shift; | |
| 1130 my $cutoff = shift || 5000000; | |
| 1131 my $dup = shift || 0; | |
| 1132 my $cs_version = shift; | |
| 1133 my $include_non_reference = 1; #get non reference slices | |
| 1134 my $slice_adaptor = $self->dba->get_SliceAdaptor; | |
| 1135 my $top_slices; | |
| 1136 if ($self->param('chromosomes')) { | |
| 1137 foreach my $chr ($self->param('chromosomes')) { | |
| 1138 push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr); | |
| 1139 } | |
| 1140 } else { | |
| 1141 $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,$include_non_reference,$dup); | |
| 1142 } | |
| 1143 | |
| 1144 # filter out patches, if present | |
| 1145 $top_slices = [ grep { $_->is_reference or $self->is_haplotype($_,$self->dba) } @$top_slices ]; | |
| 1146 | |
| 1147 my ($big_chr, $small_chr, $min_big_chr, $min_small_chr); | |
| 1148 foreach my $slice (@{ $top_slices }) { | |
| 1149 next if ($slice->length eq 10000); #hack for chrY pseudoslice | |
| 1150 if ($slice->length < $cutoff) { | |
| 1151 if (! $min_small_chr or ($min_small_chr > $slice->length)) { | |
| 1152 $min_small_chr = $slice->length; | |
| 1153 } | |
| 1154 # push small chromosomes onto $small_chr | |
| 1155 push @{ $small_chr }, $slice; | |
| 1156 } | |
| 1157 elsif (! $min_big_chr or ($min_big_chr > $slice->length) ){ | |
| 1158 $min_big_chr = $slice->length; | |
| 1159 } | |
| 1160 # push _all_ chromosomes onto $big_chr | |
| 1161 push @{ $big_chr }, $slice; | |
| 1162 } | |
| 1163 my $chr_slices; | |
| 1164 $chr_slices->{int($min_big_chr/150)} = $big_chr if $min_big_chr; | |
| 1165 $chr_slices->{int($min_small_chr/150)} = $small_chr if $min_small_chr; | |
| 1166 return $chr_slices; | |
| 1167 } | |
| 1168 | |
| 1169 =head2 log | |
| 1170 | |
| 1171 Arg[1] : String $txt - the text to log | |
| 1172 Arg[2] : Int $indent - indentation level for log message | |
| 1173 Example : my $log = $support->log_filehandle; | |
| 1174 $support->log('Log foo.\n', 1); | |
| 1175 Description : Logs a message to the filehandle initialised by calling | |
| 1176 $self->log_filehandle(). You can supply an indentation level | |
| 1177 to get nice hierarchical log messages. | |
| 1178 Return type : true on success | |
| 1179 Exceptions : thrown when no filehandle can be obtained | |
| 1180 Caller : general | |
| 1181 | |
| 1182 =cut | |
| 1183 | |
| 1184 sub log { | |
| 1185 my ($self, $txt, $indent) = @_; | |
| 1186 $indent ||= 0; | |
| 1187 | |
| 1188 # strip off leading linebreaks so that indenting doesn't break | |
| 1189 $txt =~ s/^(\n*)//; | |
| 1190 | |
| 1191 $txt = $1." "x$indent . $txt; | |
| 1192 my $fh = $self->{'_log_filehandle'}; | |
| 1193 throw("Unable to obtain log filehandle") unless $fh; | |
| 1194 print $fh "$txt"; | |
| 1195 return(1); | |
| 1196 } | |
| 1197 | |
| 1198 =head2 lock_log | |
| 1199 | |
| 1200 Description : Use flock-style locks to lock log and fastforward to end. | |
| 1201 Useful if log is being written to by multiple processes. | |
| 1202 =cut | |
| 1203 | |
| 1204 sub lock_log { | |
| 1205 my ($self) = @_; | |
| 1206 | |
| 1207 my $fh = $self->{'_log_filehandle'}; | |
| 1208 return if -t $fh or -p $fh; # Shouldn't lock such things | |
| 1209 flock($self->{'_log_filehandle'},LOCK_EX) || return 0; | |
| 1210 seek($self->{'_log_filehandle'},0,SEEK_END); # fail ok, prob not reg file | |
| 1211 return 1; | |
| 1212 } | |
| 1213 | |
| 1214 =head2 unlock_log | |
| 1215 | |
| 1216 Description : Unlock log previously locked by lock_log. | |
| 1217 | |
| 1218 =cut | |
| 1219 | |
| 1220 sub unlock_log { | |
| 1221 my ($self) = @_; | |
| 1222 | |
| 1223 my $fh = $self->{'_log_filehandle'}; | |
| 1224 return if -t $fh or -p $fh; # We don't lock such things | |
| 1225 # flush is implicit in flock | |
| 1226 flock($self->{'_log_filehandle'},LOCK_UN) || return 0; | |
| 1227 return 1; | |
| 1228 } | |
| 1229 | |
| 1230 =head2 log_warning | |
| 1231 | |
| 1232 Arg[1] : String $txt - the warning text to log | |
| 1233 Arg[2] : Int $indent - indentation level for log message | |
| 1234 Arg[3] : Bool - add a line break before warning if true | |
| 1235 Example : my $log = $support->log_filehandle; | |
| 1236 $support->log_warning('Log foo.\n', 1); | |
| 1237 Description : Logs a message via $self->log and increases the warning counter. | |
| 1238 Return type : true on success | |
| 1239 Exceptions : none | |
| 1240 Caller : general | |
| 1241 | |
| 1242 =cut | |
| 1243 | |
| 1244 sub log_warning { | |
| 1245 my ($self, $txt, $indent, $break) = @_; | |
| 1246 $txt = "WARNING: " . $txt; | |
| 1247 $txt = "\n$txt" if ($break); | |
| 1248 $self->log($txt, $indent); | |
| 1249 $self->{'_warnings'}++; | |
| 1250 return(1); | |
| 1251 } | |
| 1252 | |
| 1253 =head2 log_error | |
| 1254 | |
| 1255 Arg[1] : String $txt - the error text to log | |
| 1256 Arg[2] : Int $indent - indentation level for log message | |
| 1257 Example : my $log = $support->log_filehandle; | |
| 1258 $support->log_error('Log foo.\n', 1); | |
| 1259 Description : Logs a message via $self->log and exits the script. | |
| 1260 Return type : none | |
| 1261 Exceptions : none | |
| 1262 Caller : general | |
| 1263 | |
| 1264 =cut | |
| 1265 | |
| 1266 sub log_error { | |
| 1267 my ($self, $txt, $indent) = @_; | |
| 1268 $txt = "ERROR: ".$txt; | |
| 1269 $self->log($txt, $indent); | |
| 1270 $self->log("Exiting.\n"); | |
| 1271 exit; | |
| 1272 } | |
| 1273 | |
| 1274 =head2 log_verbose | |
| 1275 | |
| 1276 Arg[1] : String $txt - the warning text to log | |
| 1277 Arg[2] : Int $indent - indentation level for log message | |
| 1278 Example : my $log = $support->log_filehandle; | |
| 1279 $support->log_verbose('Log this verbose message.\n', 1); | |
| 1280 Description : Logs a message via $self->log if --verbose option was used | |
| 1281 Return type : TRUE on success, FALSE if not verbose | |
| 1282 Exceptions : none | |
| 1283 Caller : general | |
| 1284 | |
| 1285 =cut | |
| 1286 | |
| 1287 sub log_verbose { | |
| 1288 my ($self, $txt, $indent) = @_; | |
| 1289 return(0) unless $self->param('verbose'); | |
| 1290 $self->log($txt, $indent); | |
| 1291 return(1); | |
| 1292 } | |
| 1293 | |
| 1294 =head2 log_stamped | |
| 1295 | |
| 1296 Arg[1] : String $txt - the warning text to log | |
| 1297 Arg[2] : Int $indent - indentation level for log message | |
| 1298 Example : my $log = $support->log_filehandle; | |
| 1299 $support->log_stamped('Log this stamped message.\n', 1); | |
| 1300 Description : Appends timestamp and memory usage to a message and logs it via | |
| 1301 $self->log | |
| 1302 Return type : TRUE on success | |
| 1303 Exceptions : none | |
| 1304 Caller : general | |
| 1305 | |
| 1306 =cut | |
| 1307 | |
| 1308 sub log_stamped { | |
| 1309 my ($self, $txt, $indent) = @_; | |
| 1310 # append timestamp and memory usage to log text | |
| 1311 $txt =~ s/(\n*)$//; | |
| 1312 $txt .= " ".$self->date_and_mem.$1; | |
| 1313 $self->log($txt, $indent); | |
| 1314 return(1); | |
| 1315 } | |
| 1316 | |
| 1317 =head2 log_filehandle | |
| 1318 | |
| 1319 Arg[1] : (optional) String $mode - file access mode | |
| 1320 Example : my $log = $support->log_filehandle; | |
| 1321 # print to the filehandle | |
| 1322 print $log 'Lets start logging...\n'; | |
| 1323 # log via the wrapper $self->log() | |
| 1324 $support->log('Another log message.\n'); | |
| 1325 Description : Returns a filehandle for logging (STDERR by default, logfile if | |
| 1326 set from config or commandline). You can use the filehandle | |
| 1327 directly to print to, or use the smart wrapper $self->log(). | |
| 1328 Logging mode (truncate or append) can be set by passing the | |
| 1329 mode as an argument to log_filehandle(), or with the | |
| 1330 --logappend commandline option (default: truncate) | |
| 1331 Return type : Filehandle - the filehandle to log to | |
| 1332 Exceptions : thrown if logfile can't be opened | |
| 1333 Caller : general | |
| 1334 | |
| 1335 =cut | |
| 1336 | |
| 1337 sub log_filehandle { | |
| 1338 my ($self, $mode, $date) = @_; | |
| 1339 $mode ||= '>'; | |
| 1340 $mode = '>>' if ($self->param('logappend')); | |
| 1341 my $fh = \*STDERR; | |
| 1342 if (my $logfile = $self->param('logfile')) { | |
| 1343 $logfile .= "_$date" if $date; | |
| 1344 if (my $logpath = $self->param('logpath')) { | |
| 1345 unless (-e $logpath) { | |
| 1346 system("mkdir $logpath") == 0 or | |
| 1347 $self->log_error("Can't create log dir $logpath: $!\n"); | |
| 1348 } | |
| 1349 $logfile = "$logpath/$logfile"; | |
| 1350 } | |
| 1351 open($fh, "$mode", $logfile) or throw( | |
| 1352 "Unable to open $logfile for writing: $!"); | |
| 1353 } | |
| 1354 $self->{'_log_filehandle'} = $fh; | |
| 1355 return $self->{'_log_filehandle'}; | |
| 1356 } | |
| 1357 | |
| 1358 =head2 filehandle | |
| 1359 | |
| 1360 Arg[1] : String $mode - file access mode | |
| 1361 Arg[2] : String $file - input or output file | |
| 1362 Example : my $fh = $support->filehandle('>>', '/path/to/file'); | |
| 1363 # print to the filehandle | |
| 1364 print $fh 'Your text goes here...\n'; | |
| 1365 Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading | |
| 1366 by default) to print to or read from. | |
| 1367 Return type : Filehandle - the filehandle | |
| 1368 Exceptions : thrown if file can't be opened | |
| 1369 Caller : general | |
| 1370 | |
| 1371 =cut | |
| 1372 | |
| 1373 sub filehandle { | |
| 1374 my ($self, $mode, $file) = @_; | |
| 1375 $mode ||= ">"; | |
| 1376 my $fh; | |
| 1377 if ($file) { | |
| 1378 open($fh, "$mode", $file) or throw( | |
| 1379 "Unable to open $file for writing: $!"); | |
| 1380 } elsif ($mode =~ />/) { | |
| 1381 $fh = \*STDOUT; | |
| 1382 } elsif ($mode =~ /</) { | |
| 1383 $fh = \*STDIN; | |
| 1384 } | |
| 1385 return $fh; | |
| 1386 } | |
| 1387 | |
| 1388 =head2 init_log_date | |
| 1389 | |
| 1390 Example : $support->init_log_date; | |
| 1391 Description : Opens a filehandle to a logfile with the date in the file name | |
| 1392 Return type : Filehandle - the log filehandle | |
| 1393 Exceptions : none | |
| 1394 Caller : general | |
| 1395 | |
| 1396 =cut | |
| 1397 | |
| 1398 sub init_log_date { | |
| 1399 my $self = shift; | |
| 1400 my $date = $self->date; | |
| 1401 return $self->init_log($date); | |
| 1402 } | |
| 1403 | |
| 1404 =head2 init_log | |
| 1405 | |
| 1406 Example : $support->init_log; | |
| 1407 Description : Opens a filehandle to the logfile and prints some header | |
| 1408 information to this file. This includes script name, date, user | |
| 1409 running the script and parameters the script will be running | |
| 1410 with. | |
| 1411 Return type : Filehandle - the log filehandle | |
| 1412 Exceptions : none | |
| 1413 Caller : general | |
| 1414 | |
| 1415 =cut | |
| 1416 | |
| 1417 sub init_log { | |
| 1418 my $self = shift; | |
| 1419 my $date = shift; | |
| 1420 | |
| 1421 # get a log filehandle | |
| 1422 my $log = $self->log_filehandle(undef,$date); | |
| 1423 | |
| 1424 # print script name, date, user who is running it | |
| 1425 my $hostname = `hostname`; | |
| 1426 chomp $hostname; | |
| 1427 my $script = "$hostname:$Bin/$Script"; | |
| 1428 my $user = `whoami`; | |
| 1429 chomp $user; | |
| 1430 $self->log("Script: $script\nDate: ".$self->date_and_time."\nUser: $user\n"); | |
| 1431 | |
| 1432 # print parameters the script is running with | |
| 1433 $self->log("Parameters:\n\n"); | |
| 1434 $self->log($self->list_all_params); | |
| 1435 | |
| 1436 # remember start time | |
| 1437 $self->{'_start_time'} = time; | |
| 1438 | |
| 1439 return $log; | |
| 1440 } | |
| 1441 | |
| 1442 =head2 finish_log | |
| 1443 | |
| 1444 Example : $support->finish_log; | |
| 1445 Description : Writes footer information to a logfile. This includes the | |
| 1446 number of logged warnings, timestamp and memory footprint. | |
| 1447 Return type : TRUE on success | |
| 1448 Exceptions : none | |
| 1449 Caller : general | |
| 1450 | |
| 1451 =cut | |
| 1452 | |
| 1453 sub finish_log { | |
| 1454 my $self = shift; | |
| 1455 $self->log("\nAll done. ".$self->warnings." warnings. "); | |
| 1456 if ($self->{'_start_time'}) { | |
| 1457 $self->log("Runtime "); | |
| 1458 my $diff = time - $self->{'_start_time'}; | |
| 1459 my $sec = $diff % 60; | |
| 1460 $diff = ($diff - $sec) / 60; | |
| 1461 my $min = $diff % 60; | |
| 1462 my $hours = ($diff - $min) / 60; | |
| 1463 $self->log("${hours}h ${min}min ${sec}sec "); | |
| 1464 } | |
| 1465 $self->log($self->date_and_mem."\n\n"); | |
| 1466 return(1); | |
| 1467 } | |
| 1468 | |
| 1469 =head2 date_and_mem | |
| 1470 | |
| 1471 Example : print LOG "Time, memory usage: ".$support->date_and_mem."\n"; | |
| 1472 Description : Prints a timestamp and the memory usage of your script. | |
| 1473 Return type : String - timestamp and memory usage | |
| 1474 Exceptions : none | |
| 1475 Caller : general | |
| 1476 | |
| 1477 =cut | |
| 1478 | |
| 1479 sub date_and_mem { | |
| 1480 my $date = strftime "%Y-%m-%d %T", localtime; | |
| 1481 my $mem = `ps -p $$ -o vsz |tail -1`; | |
| 1482 chomp $mem; | |
| 1483 return "[$date, mem $mem]"; | |
| 1484 } | |
| 1485 | |
| 1486 =head2 date | |
| 1487 | |
| 1488 Example : print "Date: " . $support->date . "\n"; | |
| 1489 Description : Prints a nicely formatted datetamp (YYYY-DD-MM) | |
| 1490 Return type : String - the timestamp | |
| 1491 Exceptions : none | |
| 1492 Caller : general | |
| 1493 | |
| 1494 =cut | |
| 1495 | |
| 1496 sub date { | |
| 1497 return strftime "%Y-%m-%d", localtime; | |
| 1498 } | |
| 1499 | |
| 1500 =head2 date_and_time | |
| 1501 | |
| 1502 Example : print "Date: " . $support->date . "\n"; | |
| 1503 Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss) | |
| 1504 Return type : String - the timestamp | |
| 1505 Exceptions : none | |
| 1506 Caller : general | |
| 1507 | |
| 1508 =cut | |
| 1509 | |
| 1510 sub date_and_time { | |
| 1511 return strftime "%Y-%m-%d %T", localtime; | |
| 1512 } | |
| 1513 | |
| 1514 =head2 format_time | |
| 1515 | |
| 1516 Example : print $support->format_time($gene->modifed_date) . "\n"; | |
| 1517 Description : Prints timestamps from the database | |
| 1518 Return type : String - nicely formatted time stamp | |
| 1519 Exceptions : none | |
| 1520 Caller : general | |
| 1521 | |
| 1522 =cut | |
| 1523 | |
| 1524 | |
| 1525 sub date_format { | |
| 1526 my( $self, $time, $format ) = @_; | |
| 1527 my( $d,$m,$y) = (localtime($time))[3,4,5]; | |
| 1528 my %S = ('d'=>sprintf('%02d',$d),'m'=>sprintf('%02d',$m+1),'y'=>$y+1900); | |
| 1529 (my $res = $format ) =~s/%(\w)/$S{$1}/ge; | |
| 1530 return $res; | |
| 1531 } | |
| 1532 | |
| 1533 | |
| 1534 =head2 mem | |
| 1535 | |
| 1536 Example : print "Memory usage: " . $support->mem . "\n"; | |
| 1537 Description : Prints the memory used by your script. Not sure about platform | |
| 1538 dependence of this call ... | |
| 1539 Return type : String - memory usage | |
| 1540 Exceptions : none | |
| 1541 Caller : general | |
| 1542 | |
| 1543 =cut | |
| 1544 | |
| 1545 sub mem { | |
| 1546 my $mem = `ps -p $$ -o vsz |tail -1`; | |
| 1547 chomp $mem; | |
| 1548 return $mem; | |
| 1549 } | |
| 1550 | |
| 1551 =head2 commify | |
| 1552 | |
| 1553 Arg[1] : Int $num - a number to commify | |
| 1554 Example : print "An easy to read number: ".$self->commify(100000000); | |
| 1555 # will print 100,000,000 | |
| 1556 Description : put commas into a number to make it easier to read | |
| 1557 Return type : a string representing the commified number | |
| 1558 Exceptions : none | |
| 1559 Caller : general | |
| 1560 Status : stable | |
| 1561 | |
| 1562 =cut | |
| 1563 | |
| 1564 sub commify { | |
| 1565 my $self = shift; | |
| 1566 my $num = shift; | |
| 1567 | |
| 1568 $num = reverse($num); | |
| 1569 $num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; | |
| 1570 | |
| 1571 return scalar reverse $num; | |
| 1572 } | |
| 1573 | |
| 1574 =head2 fetch_non_hidden_slices | |
| 1575 | |
| 1576 Arg[1] : B::E::SliceAdaptor | |
| 1577 Arg[2] : B::E::AttributeAdaptor | |
| 1578 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default | |
| 1579 Arg[4] : string $coord_system_version (optional) - 'otter' by default | |
| 1580 Example : $chroms = $support->fetch_non_hidden_slice($sa,$aa); | |
| 1581 Description : retrieve all slices from a loutre database that don't have a hidden attribute. | |
| 1582 Doesn't retrieve non-reference slices | |
| 1583 Return type : arrayref | |
| 1584 Caller : general | |
| 1585 Status : stable | |
| 1586 | |
| 1587 =cut | |
| 1588 | |
| 1589 sub fetch_non_hidden_slices { | |
| 1590 my $self = shift; | |
| 1591 my $aa = shift or throw("You must supply an attribute adaptor"); | |
| 1592 my $sa = shift or throw("You must supply a slice adaptor"); | |
| 1593 my $cs = shift || 'chromosome'; | |
| 1594 my $cv = shift || 'Otter'; | |
| 1595 my $visible_chroms; | |
| 1596 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) { | |
| 1597 my $chrom_name = $chrom->name; | |
| 1598 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden'); | |
| 1599 if ( scalar(@$attribs) > 1 ) { | |
| 1600 $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n"); | |
| 1601 } | |
| 1602 elsif ($attribs->[0]->value == 0) { | |
| 1603 push @$visible_chroms, $chrom; | |
| 1604 } | |
| 1605 elsif ($attribs->[0]->value == 1) { | |
| 1606 $self->log_verbose("chromosome $chrom_name is hidden\n"); | |
| 1607 } | |
| 1608 else { | |
| 1609 $self->log_warning("No hidden attribute for chromosome $chrom_name\n"); | |
| 1610 } | |
| 1611 } | |
| 1612 return $visible_chroms; | |
| 1613 } | |
| 1614 | |
| 1615 =head2 get_non_hidden_slice_names | |
| 1616 | |
| 1617 Arg[1] : B::E::SliceAdaptor | |
| 1618 Arg[2] : B::E::AttributeAdaptor | |
| 1619 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default | |
| 1620 Arg[4] : string $coord_system_version (optional) - 'otter' by default | |
| 1621 Example : $chrom_names = $support->get_non_hidden_slice_names($sa,$aa); | |
| 1622 Description : retrieve names of all slices from a loutre database that don't have a hidden attribute. | |
| 1623 Doesn't retrieve non-reference slices | |
| 1624 Return type : arrayref of names of all non-hidden slices | |
| 1625 Caller : general | |
| 1626 Status : stable | |
| 1627 | |
| 1628 =cut | |
| 1629 | |
| 1630 sub get_non_hidden_slice_names { | |
| 1631 my $self = shift; | |
| 1632 my $aa = shift or throw("You must supply an attribute adaptor"); | |
| 1633 my $sa = shift or throw("You must supply a slice adaptor"); | |
| 1634 my $cs = shift || 'chromosome'; | |
| 1635 my $cv = shift || 'Otter'; | |
| 1636 my $visible_chrom_names; | |
| 1637 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) { | |
| 1638 my $chrom_name = $chrom->seq_region_name; | |
| 1639 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden'); | |
| 1640 if ( scalar(@$attribs) > 1 ) { | |
| 1641 $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n"); | |
| 1642 } | |
| 1643 elsif ($attribs->[0]->value == 0) { | |
| 1644 push @$visible_chrom_names, $chrom_name; | |
| 1645 } | |
| 1646 elsif ($attribs->[0]->value == 1) { | |
| 1647 $self->log_verbose("chromosome $chrom_name is hidden\n"); | |
| 1648 } | |
| 1649 else { | |
| 1650 $self->log_warning("No hidden attribute for chromosome $chrom_name\n"); | |
| 1651 } | |
| 1652 } | |
| 1653 return $visible_chrom_names; | |
| 1654 } | |
| 1655 | |
| 1656 | |
| 1657 =head2 get_wanted_chromosomes | |
| 1658 | |
| 1659 Arg[1] : B::E::SliceAdaptor | |
| 1660 Arg[2] : B::E::AttributeAdaptor | |
| 1661 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default | |
| 1662 Arg[4] : string $coord_system_version (optional) - 'otter' by default | |
| 1663 Example : $chr_names = $support->get_wanted_chromosomes($laa,$lsa); | |
| 1664 Description : retrieve names of slices from a lutra database that are ready for dumping to Vega. | |
| 1665 Deals with list of names to ignore (ignore_chr = LIST) | |
| 1666 Return type : arrayref of slices | |
| 1667 Caller : general | |
| 1668 Status : stable | |
| 1669 | |
| 1670 =cut | |
| 1671 | |
| 1672 sub get_wanted_chromosomes { | |
| 1673 my $self = shift; | |
| 1674 my $aa = shift or throw("You must supply an attribute adaptor"); | |
| 1675 my $sa = shift or throw("You must supply a slice adaptor"); | |
| 1676 my $cs = shift || 'chromosome'; | |
| 1677 my $cv = shift || 'Otter'; | |
| 1678 my $export_mode = $self->param('release_type'); | |
| 1679 my $release = $self->param('vega_release'); | |
| 1680 my $names; | |
| 1681 my $chroms = $self->fetch_non_hidden_slices($aa,$sa,$cs,$cv); | |
| 1682 CHROM: | |
| 1683 foreach my $chrom (@$chroms) { | |
| 1684 my $attribs = $aa->fetch_all_by_Slice($chrom); | |
| 1685 my $vals = $self->get_attrib_values($attribs,'vega_export_mod'); | |
| 1686 if (scalar(@$vals > 1)) { | |
| 1687 $self->log_warning ("Multiple attribs for \'vega_export_mod\', please fix before continuing"); | |
| 1688 exit; | |
| 1689 } | |
| 1690 next CHROM if (! grep { $_ eq $export_mode} @$vals); | |
| 1691 $vals = $self->get_attrib_values($attribs,'vega_release',$release); | |
| 1692 if (scalar(@$vals > 1)) { | |
| 1693 $self->log_warning ("Multiple attribs for \'vega_release\' value = $release , please fix before continuing"); | |
| 1694 exit; | |
| 1695 } | |
| 1696 next CHROM if (! grep { $_ eq $release} @$vals); | |
| 1697 my $name = $chrom->seq_region_name; | |
| 1698 if (my @ignored = $self->param('ignore_chr')) { | |
| 1699 next CHROM if (grep {$_ eq $name} @ignored); | |
| 1700 } | |
| 1701 push @{$names}, $name; | |
| 1702 } | |
| 1703 return $names; | |
| 1704 } | |
| 1705 | |
| 1706 =head2 is_haplotype | |
| 1707 | |
| 1708 Arg[1] : B::E::Slice | |
| 1709 Arg[2]: : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!) | |
| 1710 Description : Is the slice a Vega haplotype? At the moment this is | |
| 1711 implemented by testing for presence of vega_ref_chrom but non_ref | |
| 1712 which is correct in practice, but really misses the prupose of | |
| 1713 vega_ref_chrom, so this might bite us if that changes. | |
| 1714 Return type : boolean | |
| 1715 | |
| 1716 =cut | |
| 1717 | |
| 1718 sub is_haplotype { | |
| 1719 my ($self,$slice,$dba) = @_; | |
| 1720 | |
| 1721 $dba ||= $self->dba; | |
| 1722 my $aa = $dba->get_adaptor('Attribute'); | |
| 1723 | |
| 1724 my $attribs = $aa->fetch_all_by_Slice($slice); | |
| 1725 return (@{$self->get_attrib_values($attribs,'vega_ref_chrom')} and | |
| 1726 @{$self->get_attrib_values($attribs,'non_ref',1)}); | |
| 1727 } | |
| 1728 | |
| 1729 =head2 get_unique_genes | |
| 1730 | |
| 1731 Arg[1] : B::E::Slice | |
| 1732 Arg[2] : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!) | |
| 1733 Example : $genes = $support->get_unique_genes($slice,$dba); | |
| 1734 Description : Retrieve genes that are only on the slice itself - used for human where assembly patches | |
| 1735 are in the assembly_exception table. Needs the PATCHes to have 'non_ref' seq_region_attributes. | |
| 1736 Return type : arrayref of genes | |
| 1737 Caller : general | |
| 1738 Status : stable | |
| 1739 | |
| 1740 =cut | |
| 1741 | |
| 1742 sub get_unique_genes { | |
| 1743 my $self = shift; | |
| 1744 my ($slice,$dba) = @_; | |
| 1745 $slice or throw("You must supply a slice"); | |
| 1746 $dba ||= $self->dba; | |
| 1747 | |
| 1748 my $sa = $dba->get_adaptor('Slice'); | |
| 1749 my $ga = $dba->get_adaptor('Gene'); | |
| 1750 my $patch = 0; | |
| 1751 my $genes = []; | |
| 1752 if ( ! $slice->is_reference() and ! $self->is_haplotype($slice,$dba) ) { | |
| 1753 # if ( 0 ) { | |
| 1754 $patch = 1; | |
| 1755 my $slices = $sa->fetch_by_region_unique( $slice->coord_system_name(),$slice->seq_region_name(),undef,undef,undef,$slice->coord_system()->version() ); | |
| 1756 foreach my $slice ( @{$slices} ) { | |
| 1757 push @$genes,@{$ga->fetch_all_by_Slice($slice)}; | |
| 1758 # my $start = $slice->start; | |
| 1759 } | |
| 1760 } | |
| 1761 else { | |
| 1762 $genes = $ga->fetch_all_by_Slice($slice); | |
| 1763 } | |
| 1764 return ($genes, $patch); | |
| 1765 } | |
| 1766 | |
| 1767 | |
| 1768 | |
| 1769 =head2 get_attrib_values | |
| 1770 | |
| 1771 Arg[1] : Arrayref of B::E::Attributes | |
| 1772 Arg[2] : 'code' to search for | |
| 1773 Arg[3] : 'value' to search for (optional) | |
| 1774 Example : my $c = $self->get_attrib_values($attribs,'name')); | |
| 1775 Description : (i) In the absence of an attribute value argument, examines an arrayref | |
| 1776 of B::E::Attributes for a particular attribute type, returning the values | |
| 1777 for each attribute of that type. Can therefore be used to test for the | |
| 1778 number of attributes of that type. | |
| 1779 (ii) In the presence of the optional value argument it returns all | |
| 1780 attributes with that value ie can be used to test for the presence of an | |
| 1781 attribute with that particular value. | |
| 1782 Return type : arrayref of values for that attribute | |
| 1783 Caller : general | |
| 1784 Status : stable | |
| 1785 | |
| 1786 =cut | |
| 1787 | |
| 1788 sub get_attrib_values { | |
| 1789 my $self = shift; | |
| 1790 my $attribs = shift; | |
| 1791 my $code = shift; | |
| 1792 my $value = shift; | |
| 1793 if (my @atts = grep {$_->code eq $code } @$attribs) { | |
| 1794 my $r = []; | |
| 1795 if ($value) { | |
| 1796 if (my @values = grep {$_->value eq $value} @atts) { | |
| 1797 foreach (@values) { | |
| 1798 push @$r, $_->value; | |
| 1799 } | |
| 1800 return $r; | |
| 1801 } | |
| 1802 else { | |
| 1803 return []; | |
| 1804 } | |
| 1805 } | |
| 1806 else { | |
| 1807 foreach (@atts) { | |
| 1808 push @$r, $_->value; | |
| 1809 } | |
| 1810 return $r; | |
| 1811 } | |
| 1812 } | |
| 1813 else { | |
| 1814 return []; | |
| 1815 } | |
| 1816 } | |
| 1817 | |
| 1818 =head2 fix_attrib_value | |
| 1819 | |
| 1820 Arg[1] : Arrayref of existing B::E::Attributes | |
| 1821 Arg[2] : dbID of object | |
| 1822 Arg[3] : name of object (just for reporting) | |
| 1823 Arg[4] : attrib_type.code | |
| 1824 Arg[5] : attrib_type.value | |
| 1825 Arg[6] : interactive ? (0 by default) | |
| 1826 Arg[7] : table | |
| 1827 Example : $support->fix_attrib_value($attribs,$chr_id,$chr_name,'vega_export_mod','N',1); | |
| 1828 Description : adds a new attribute to an object, or updates an existing attribute with a new value | |
| 1829 Can be run in interactive or non-interactive mode (default) | |
| 1830 Return type : arrayref of results | |
| 1831 Caller : general | |
| 1832 Status : only ever tested with seq_region_attributes to date | |
| 1833 | |
| 1834 =cut | |
| 1835 | |
| 1836 sub fix_attrib_value { | |
| 1837 my $self = shift; | |
| 1838 my $attribs = shift; | |
| 1839 my $id = shift; | |
| 1840 my $name = shift; | |
| 1841 my $code = shift; | |
| 1842 my $value = shift; | |
| 1843 my $interact = shift || 0; | |
| 1844 my $table = shift || 'seq_region_attrib'; | |
| 1845 | |
| 1846 #transiently set interactive parameter to zero | |
| 1847 my $int_before; | |
| 1848 if (! $interact) { | |
| 1849 $int_before = $self->param('interactive'); | |
| 1850 $self->param('interactive',0); | |
| 1851 } | |
| 1852 | |
| 1853 #get any existing value(s) for this attribute | |
| 1854 my $existings = $self->get_attrib_values($attribs,$code); | |
| 1855 | |
| 1856 #add a new attribute if there is none... | |
| 1857 if (! @$existings ) { | |
| 1858 if ($self->user_proceed("Do you want to set $name attrib (code = $code) to value $value ?")) { | |
| 1859 my $r = $self->store_new_attribute($id,$code,$value); | |
| 1860 | |
| 1861 #reset interactive parameter | |
| 1862 $self->param('interactive',$int_before) if (! $interact); | |
| 1863 return $r; | |
| 1864 } | |
| 1865 } | |
| 1866 #...warn and exit if you're trying to update more than one value for the same attribute... | |
| 1867 elsif (scalar @$existings > 1) { | |
| 1868 $self->log_warning("You shouldn't be trying to update multiple attributes with the same code at once ($name:$code,$value), looks like you have duplicate entries in the (seq_region_)attrib table\n"); | |
| 1869 exit; | |
| 1870 } | |
| 1871 | |
| 1872 #...or update an attribute with new values... | |
| 1873 else { | |
| 1874 my $existing = $existings->[0]; | |
| 1875 if ($existing ne $value) { | |
| 1876 if ($self->user_proceed("Do you want to reset $name attrib (code = $code) from $existing to $value ?")) { | |
| 1877 my $r = $self->update_attribute($id,$code,$value); | |
| 1878 $self->param('interactive',$int_before) if (! $interact); | |
| 1879 push @$r, $existing; | |
| 1880 return $r; | |
| 1881 } | |
| 1882 } | |
| 1883 #...or make no change | |
| 1884 else { | |
| 1885 $self->param('interactive',$int_before) if (! $interact); | |
| 1886 return []; | |
| 1887 } | |
| 1888 } | |
| 1889 } | |
| 1890 | |
| 1891 =head2 _get_attrib_id | |
| 1892 | |
| 1893 Arg[1] : attrib_type.code | |
| 1894 Arg[2] : database handle | |
| 1895 Example : $self->_get_attrib_id('name',$dbh) | |
| 1896 Description : get attrib_type.attrib_type_id from a attrib_type.code | |
| 1897 Return type : attrib_type.attrib_type_id | |
| 1898 Caller : internal | |
| 1899 Status : stable | |
| 1900 | |
| 1901 =cut | |
| 1902 | |
| 1903 sub _get_attrib_id { | |
| 1904 my $self = shift; | |
| 1905 my $attrib_code = shift; | |
| 1906 my $dbh = shift; | |
| 1907 my ($attrib_id) = $dbh->selectrow_array( | |
| 1908 qq(select attrib_type_id | |
| 1909 from attrib_type | |
| 1910 where code = ?), | |
| 1911 {}, | |
| 1912 ($attrib_code) | |
| 1913 ); | |
| 1914 if (! $attrib_id) { | |
| 1915 $self->log_warning("There is no attrib_type_id for code $attrib_code, please patch the attrib_table\n"); | |
| 1916 exit; | |
| 1917 } | |
| 1918 else { | |
| 1919 return $attrib_id; | |
| 1920 } | |
| 1921 } | |
| 1922 | |
| 1923 =head2 store_new_attribute | |
| 1924 | |
| 1925 Arg[1] : seq_region.seq_region_id | |
| 1926 Arg[2] : attrib_type.code | |
| 1927 Arg[3] : attrib_type.value | |
| 1928 ARG[4] : table to update (seq_region_attribute by default) | |
| 1929 Example : $support->store_new_attribute(23,name,5); | |
| 1930 Description : uses MySQL to store an entry (code and value) in an attribute table | |
| 1931 (seq_region_attrib by default) | |
| 1932 Return type : array_ref | |
| 1933 Caller : general | |
| 1934 Status : stable | |
| 1935 | |
| 1936 =cut | |
| 1937 | |
| 1938 sub store_new_attribute { | |
| 1939 my $self = shift; | |
| 1940 my $sr_id = shift; | |
| 1941 my $attrib_code = shift; | |
| 1942 my $attrib_value = shift || ''; | |
| 1943 my $table = shift || 'seq_region_attrib'; | |
| 1944 | |
| 1945 #get database handle | |
| 1946 my $dbh = $self->get_dbconnection('loutre'); | |
| 1947 #get attrib_type_id for this particular attribute | |
| 1948 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh); | |
| 1949 #store | |
| 1950 my $r = $dbh->do( | |
| 1951 qq(insert into $table | |
| 1952 values (?,?,?)), | |
| 1953 {}, | |
| 1954 ($sr_id,$attrib_id,$attrib_value) | |
| 1955 ); | |
| 1956 return ['Stored',$r]; | |
| 1957 } | |
| 1958 | |
| 1959 =head2 update_attribute | |
| 1960 | |
| 1961 Arg[1] : seq_region.seq_region_id | |
| 1962 Arg[2] : attrib_type.code | |
| 1963 Arg[3] : attrib_type.value | |
| 1964 ARG[4] : table to update (seq_region_attribute by default) | |
| 1965 Example : $support->update_attribute(23,name,5); | |
| 1966 Description : uses MySQL to update an attribute table (seq_region_attrib by default) | |
| 1967 Return type : array_ref | |
| 1968 Caller : general | |
| 1969 Status : stable | |
| 1970 | |
| 1971 =cut | |
| 1972 | |
| 1973 sub update_attribute { | |
| 1974 my $self = shift; | |
| 1975 my $sr_id = shift; | |
| 1976 my $attrib_code = shift; | |
| 1977 my $attrib_value = shift; | |
| 1978 my $table = shift || 'seq_region_attrib'; | |
| 1979 my $dbh = $self->get_dbconnection('loutre'); | |
| 1980 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh); | |
| 1981 #update | |
| 1982 my $r = $dbh->do( | |
| 1983 qq(update $table | |
| 1984 set value = ? | |
| 1985 where seq_region_id = $sr_id | |
| 1986 and attrib_type_id = $attrib_id), | |
| 1987 {}, | |
| 1988 ($attrib_value) | |
| 1989 ); | |
| 1990 return ['Updated',$r]; | |
| 1991 } | |
| 1992 | |
| 1993 | |
| 1994 =head2 remove_duplicate_attribs | |
| 1995 | |
| 1996 Arg[1] : db handle | |
| 1997 Arg[2] : table | |
| 1998 Example : $support->remove_duplicate_attribs($dbh,'gene'); | |
| 1999 Description : uses MySQL to remove duplicate entries from an attribute table | |
| 2000 Return type : none | |
| 2001 Caller : general | |
| 2002 Status : stable | |
| 2003 | |
| 2004 =cut | |
| 2005 | |
| 2006 sub remove_duplicate_attribs { | |
| 2007 my $self = shift; | |
| 2008 my $dbh = shift; | |
| 2009 my $table = shift; | |
| 2010 $dbh->do(qq(create table nondup_${table}_attrib like ${table}_attrib)); | |
| 2011 $dbh->do(qq(insert into nondup_${table}_attrib (select ${table}_id, attrib_type_id, value from ${table}_attrib group by ${table}_id, attrib_type_id, value))); | |
| 2012 $dbh->do(qq(delete from ${table}_attrib)); | |
| 2013 $dbh->do(qq(insert into ${table}_attrib (select ${table}_id, attrib_type_id, value from nondup_${table}_attrib))); | |
| 2014 $dbh->do(qq(drop table nondup_${table}_attrib)); | |
| 2015 } | |
| 2016 | |
| 2017 | |
| 2018 1; |
