Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Pipeline/Base.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Pipeline/Base.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,244 @@ +package Bio::EnsEMBL::Pipeline::Base; + +use strict; +use warnings; +use base qw/Bio::EnsEMBL::Hive::Process/; + +use Bio::EnsEMBL::Utils::Exception qw/throw/; +use Bio::EnsEMBL::Utils::IO qw/work_with_file/; +use Bio::EnsEMBL::Utils::Scalar qw/check_ref/; +use File::Find; +use File::Spec; +use File::Path qw/mkpath/; +use POSIX qw/strftime/; + +# Takes in a key, checks if the current $self->param() was an empty array +# and replaces it with the value from $self->param_defaults() +sub reset_empty_array_param { + my ($self, $key) = @_; + my $param_defaults = $self->param_defaults(); + my $current = $self->param($key); + my $replacement = $self->param_defaults()->{$key}; + if(check_ref($current, 'ARRAY') && check_ref($replacement, 'ARRAY')) { + if(! @{$current}) { + $self->fine('Restting param %s because the given array was empty', $key); + $self->param($key, $replacement); + } + } + return; +} + +=head2 get_Slices + + Arg[1] : String type of DB to use (defaults to core) + Arg[2] : Boolean should we filter the slices if it is human + Example : my $slices = $self->get_Slices('core', 1); + Description : Basic get_Slices() method to return all distinct slices + for a species but also optionally filters for the + first portion of Human Y which is a non-informative region + (composed solely of N's). The code will only filter for + GRCh37 forcing the developer to update the test for other + regions. + Returntype : ArrayRef[Bio::EnsEMBL::Slice] + Exceptions : Thrown if you are filtering Human but also are not on GRCh37 + +=cut + +sub get_Slices { + my ($self, $type, $filter_human) = @_; + my $dba = $self->get_DBAdaptor($type); + throw "Cannot get a DB adaptor" unless $dba; + + my $sa = $dba->get_SliceAdaptor(); + my @slices = @{$sa->fetch_all('toplevel', undef, 1, undef, undef)}; + + if($filter_human) { + my $production_name = $self->production_name(); + if($production_name eq 'homo_sapiens') { + my ($cs) = @{$dba->get_CoordSystem()->fetch_all()}; + my $expected = 'GRCh37'; + if($cs->version() ne $expected) { + throw sprintf(q{Cannot continue as %s's coordinate system %s is not the expected %s }, $production_name, $cs->version(), $expected); + } + @slices = grep { + if($_->seq_region_name() eq 'Y' && $_->end() < 2649521) { + $self->info('Filtering small Y slice'); + 0; + } + else { + 1; + } + } @slices; + } + } + + return [ sort { $a->length() <=> $b->length() } @slices ]; +} + +# Registry is loaded by Hive (see beekeeper_extra_cmdline_options() in conf) +sub get_DBAdaptor { + my ($self, $type) = @_; + my $species = $self->param('species'); + $type ||= 'core'; + return Bio::EnsEMBL::Registry->get_DBAdaptor($species, $type); +} + +sub cleanup_DBAdaptor { + my ($self, $type) = @_; + my $dba = $self->get_DBAdaptor($type); + $dba->clear_caches; + $dba->dbc->disconnect_if_idle; + return; +} + +sub get_dir { + my ($self, @extras) = @_; + my $base_dir = $self->param('base_path'); + my $dir = File::Spec->catdir($base_dir, @extras); + mkpath($dir); + return $dir; +} + +sub web_name { + my ($self) = @_; +# my $mc = $self->get_DBAdaptor()->get_MetaContainer(); +# my $name = $mc->single_value_by_key('species.url'); # change back + my $name = ucfirst($self->production_name()); + return $name; +} + +sub scientific_name { + my ($self) = @_; + my $dba = $self->get_DBAdaptor(); + my $mc = $dba->get_MetaContainer(); + my $name = $mc->get_scientific_name(); + $dba->dbc()->disconnect_if_idle(); + return $name; +} + +sub assembly { + my ($self) = @_; + my $dba = $self->get_DBAdaptor(); + return $dba->get_CoordSystemAdaptor()->fetch_all()->[0]->version(); +} + +sub production_name { + my ($self, $name) = @_; + my $dba; + if($name) { + $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($name, 'core'); + } + else { + $dba = $self->get_DBAdaptor(); + } + my $mc = $dba->get_MetaContainer(); + my $prod = $mc->get_production_name(); + $dba->dbc()->disconnect_if_idle(); + return $prod; +} + +# Closes file handle, and deletes the file stub if no data was written to +# the file handle (using tell). We can also only close a file handle and unlink +# the data if it was open otherwise we just ignore it +# Returns success if we managed to delete the file + +sub tidy_file_handle { + my ($self, $fh, $path) = @_; + if($fh->opened()) { + my $unlink = ($fh->tell() == 0) ? 1 : 0; + $fh->close(); + if($unlink && -f $path) { + unlink($path); + return 1; + } + } + return 0; +} + +sub info { + my ($self, $msg, @params) = @_; + if ($self->debug() > 1) { + my $formatted_msg; + if(scalar(@params)) { + $formatted_msg = sprintf($msg, @params); + } + else { + $formatted_msg = $msg; + } + printf STDERR "INFO [%s]: %s %s\n", $self->_memory_consumption(), strftime('%c',localtime()), $formatted_msg; + } + return +} + +sub fine { + my ($self, $msg, @params) = @_; + if ($self->debug() > 2) { + my $formatted_msg; + if(scalar(@params)) { + $formatted_msg = sprintf($msg, @params); + } + else { + $formatted_msg = $msg; + } + printf STDERR "FINE [%s]: %s %s\n", $self->_memory_consumption(), strftime('%c',localtime()), $formatted_msg; + } + return +} + +sub _memory_consumption { + my ($self) = @_; + my $content = `ps -o rss $$ | grep -v RSS`; + return q{?MB} if $? >> 8 != 0; + $content =~ s/\s+//g; + my $mem = $content/1024; + return sprintf('%.2fMB', $mem); +} + +sub find_files { + my ($self, $dir, $boolean_callback) = @_; + $self->throw("Cannot find path $dir") unless -d $dir; + my @files; + find(sub { + my $path = $File::Find::name; + if($boolean_callback->($_)) { + push(@files, $path); + } + }, $dir); + return \@files; +} + +sub unlink_all_files { + my ($self, $dir) = @_; + $self->info('Removing files from the directory %s', $dir); + #Delete anything which is a file & not the current or higher directory + my $boolean_callback = sub { + return ( $_[0] =~ /^\.\.?$/) ? 0 : 1; + }; + my $files = $self->find_files($dir, $boolean_callback); + foreach my $file (@{$files}) { + $self->fine('Unlinking %s', $file); + unlink $file; + } + $self->info('Removed %d file(s)', scalar(@{$files})); + return; +} + +sub assert_executable { + my ($self, $exe) = @_; + if(! -x $exe) { + my $output = `which $exe 2>&1`; + chomp $output; + my $rc = $? >> 8; + if($rc != 0) { + my $possible_location = `locate -l 1 $exe 2>&1`; + my $loc_rc = $? >> 8; + if($loc_rc != 0) { + my $msg = 'Cannot find the executable "%s" after trying "which" and "locate -l 1". Please ensure it is on your PATH or use an absolute location and try again'; + $self->throw(sprintf($msg, $exe)); + } + } + } + return 1; +} + +1;