Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Pipeline/FASTA/SCPBlast.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Pipeline/FASTA/SCPBlast.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,209 @@ +=pod + +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=head1 NAME + +Bio::EnsEMBL::Pipeline::FASTA::SCPBlast + +=head1 DESCRIPTION + +Performs a find in the Blast index directory, for the given species and copies +them to the specified target servers. + +Allowed parameters are: + +=over 8 + +=item no_scp - If true then we will not run SCP but still finish cleanly without error + +=item type - The type of dump to copy. Required parameter + +=item genomic_dir - Needed if you are copying DNA genomic files + +=item genes_dir - Needed if you are copying DNA gene files + +=item target_servers - The servers to copy to. Expects to be an array + +=item species - Species to work with + +=item scp_user - The user to scp as. Defaults to the current user + +=item scp_identity - Give an identity file to use during ssh commands + (useful when you are not scping as yourself) + +=item base_path - The base of the dumps. The source blast directory is + constructed from this path + +=back + +=cut + +package Bio::EnsEMBL::Pipeline::FASTA::SCPBlast; + +use strict; +use warnings; + +use base qw/Bio::EnsEMBL::Pipeline::FASTA::Base/; + +use Bio::EnsEMBL::Utils::Scalar qw/check_ref/; +use File::Spec; + +sub param_defaults { + my ($self) = @_; + return { + no_scp => 0, +# genomic_dir => '', +# genes_dir => '', +# target_servers => ['srv1', 'srv2'], + + scp_user => $ENV{USER}, #defaults to the current user +# scp_identity => '', + +# type => 'genes'/'genomic', +# species => '', + }; +} + +sub fetch_input { + my ($self) = @_; + if($self->param('no_scp')) { + $self->info('Skipping as no_scp has been specified'); + return; + } + + my $servers = $self->param('target_servers'); + + if(!check_ref($servers, 'ARRAY') || ! @{$servers}) { + my $msg = 'Will not perform copy as we have no servers'; + my $is_error = 0; + $self->db()->get_JobMessageAdaptor()->register_message( + $self->input_job()->dbID(), $msg, $is_error + ); + $self->info($msg); + return; + } + + foreach my $key (qw/type species/) { + $self->throw("Key $key is required") unless $self->param($key); + } + my $type = $self->param('type'); + if($type ne 'genomic' && $type ne 'genes') { + $self->throw("param 'type' must be set to 'genomic' or 'genes'"); + } + $self->target_dir(); #prodding for fetch's sake + return; +} + +sub run { + my ($self) = @_; + if($self->param('no_scp')) { + $self->info('Skipping as no_scp has been specified'); + return; + } + my $servers = $self->param('target_servers'); + return unless @{$servers}; + my $files = $self->get_files(); + foreach my $server (@{$servers}) { + $self->info('Copying files to %s for species %s', $server, $self->param('species')); + $self->copy_to_server($files, $server); + } + return; +} + +sub write_output { + my ($self) = @_; + $self->cleanup_DBAdaptor(); + return; +} + +sub copy_to_server { + my ($self, $files, $server) = @_; + my $target_dir = $self->target_dir(); + $self->check_remote_dir($target_dir, $server); + my $user = $self->param('scp_user'); + my $identity = $self->identity_param(); + foreach my $file (@{$files}) { + my ($volume, $directory, $filename) = File::Spec->splitpath($file); + my $target_path = File::Spec->catfile($target_dir, $filename); + my $cmd = sprintf('scp %s %s %s@%s:%s', $identity, $file, $user, $server, $target_path); + $self->fine('Executing %s', $cmd); + system($cmd) and $self->throw(sprintf("Cannot run command '%s'. RC %d", $cmd, ($?>>8))); + } + return; +} + +sub get_files { + my ($self) = @_; + my $species = $self->web_name(); + my $filter = sub { + my ($filename) = @_; + return ($filename =~ /^$species.+fa.+$/) ? 1 : 0; + }; + my $files = $self->find_files($self->blast_dir(), $filter); + $self->info('Found %d file(s) to copy', scalar(@{$files})); + return $files; +} + +sub blast_dir { + my ($self) = @_; + return $self->get_dir('blast', $self->param('type')); +} + +sub target_dir { + my ($self) = @_; + my $t = $self->param('type'); + my $key = "${t}_dir"; + my $dir = $self->param($key); + $self->throw("Cannot locate the parameter $key. We expect to do so") unless $dir; + return $dir; +} + +sub check_remote_dir { + my ($self, $remote_dir, $server) = @_; + my ($echo_rc) = $self->ssh_cmd($server, "echo -n 1"); + $self->throw("Cannot connect to $server") if $echo_rc; #1 means fail + my ($exists_rc) = $self->ssh_cmd($server, "test -d $remote_dir"); + if($exists_rc == 1) { + $self->info('Directory %s does not exist on %s. Will create it'); + my ($mkdir_rc, $mkdir_out) = $self->ssh_cmd($server, "mkdir -p $remote_dir"); + if($mkdir_rc == 1) { + $self->throw("Cannot create the directory $remote_dir on $server. Output from cmd was $mkdir_out. Check and rerun"); + } + } + return; +} + +sub ssh_cmd { + my ($self, $server, $cmd) = @_; + my $user = $self->param('scp_user'); + my $identity = $self->identity_param(); + $self->fine("Running command '%s' on '%s' as user '%s'", $cmd, $server, $user); + my $ssh_cmd = sprintf('ssh %s %s@%s "%s"', $identity, $user, $server, $cmd); + my $output = `$ssh_cmd`; + my $rc = $? >> 8; + return ($rc, $output); +} + +sub identity_param { + my ($self) = @_; + return ($self->param('scp_identity')) ? '-i '.$self->param('scp_identity') : q{}; +} + +1;