Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Pipeline/FASTA/SCPBlast.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =pod | |
| 2 | |
| 3 =head1 LICENSE | |
| 4 | |
| 5 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 6 Genome Research Limited. All rights reserved. | |
| 7 | |
| 8 This software is distributed under a modified Apache license. | |
| 9 For license details, please see | |
| 10 | |
| 11 http://www.ensembl.org/info/about/code_licence.html | |
| 12 | |
| 13 =head1 CONTACT | |
| 14 | |
| 15 Please email comments or questions to the public Ensembl | |
| 16 developers list at <dev@ensembl.org>. | |
| 17 | |
| 18 Questions may also be sent to the Ensembl help desk at | |
| 19 <helpdesk@ensembl.org>. | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::Pipeline::FASTA::SCPBlast | |
| 24 | |
| 25 =head1 DESCRIPTION | |
| 26 | |
| 27 Performs a find in the Blast index directory, for the given species and copies | |
| 28 them to the specified target servers. | |
| 29 | |
| 30 Allowed parameters are: | |
| 31 | |
| 32 =over 8 | |
| 33 | |
| 34 =item no_scp - If true then we will not run SCP but still finish cleanly without error | |
| 35 | |
| 36 =item type - The type of dump to copy. Required parameter | |
| 37 | |
| 38 =item genomic_dir - Needed if you are copying DNA genomic files | |
| 39 | |
| 40 =item genes_dir - Needed if you are copying DNA gene files | |
| 41 | |
| 42 =item target_servers - The servers to copy to. Expects to be an array | |
| 43 | |
| 44 =item species - Species to work with | |
| 45 | |
| 46 =item scp_user - The user to scp as. Defaults to the current user | |
| 47 | |
| 48 =item scp_identity - Give an identity file to use during ssh commands | |
| 49 (useful when you are not scping as yourself) | |
| 50 | |
| 51 =item base_path - The base of the dumps. The source blast directory is | |
| 52 constructed from this path | |
| 53 | |
| 54 =back | |
| 55 | |
| 56 =cut | |
| 57 | |
| 58 package Bio::EnsEMBL::Pipeline::FASTA::SCPBlast; | |
| 59 | |
| 60 use strict; | |
| 61 use warnings; | |
| 62 | |
| 63 use base qw/Bio::EnsEMBL::Pipeline::FASTA::Base/; | |
| 64 | |
| 65 use Bio::EnsEMBL::Utils::Scalar qw/check_ref/; | |
| 66 use File::Spec; | |
| 67 | |
| 68 sub param_defaults { | |
| 69 my ($self) = @_; | |
| 70 return { | |
| 71 no_scp => 0, | |
| 72 # genomic_dir => '', | |
| 73 # genes_dir => '', | |
| 74 # target_servers => ['srv1', 'srv2'], | |
| 75 | |
| 76 scp_user => $ENV{USER}, #defaults to the current user | |
| 77 # scp_identity => '', | |
| 78 | |
| 79 # type => 'genes'/'genomic', | |
| 80 # species => '', | |
| 81 }; | |
| 82 } | |
| 83 | |
| 84 sub fetch_input { | |
| 85 my ($self) = @_; | |
| 86 if($self->param('no_scp')) { | |
| 87 $self->info('Skipping as no_scp has been specified'); | |
| 88 return; | |
| 89 } | |
| 90 | |
| 91 my $servers = $self->param('target_servers'); | |
| 92 | |
| 93 if(!check_ref($servers, 'ARRAY') || ! @{$servers}) { | |
| 94 my $msg = 'Will not perform copy as we have no servers'; | |
| 95 my $is_error = 0; | |
| 96 $self->db()->get_JobMessageAdaptor()->register_message( | |
| 97 $self->input_job()->dbID(), $msg, $is_error | |
| 98 ); | |
| 99 $self->info($msg); | |
| 100 return; | |
| 101 } | |
| 102 | |
| 103 foreach my $key (qw/type species/) { | |
| 104 $self->throw("Key $key is required") unless $self->param($key); | |
| 105 } | |
| 106 my $type = $self->param('type'); | |
| 107 if($type ne 'genomic' && $type ne 'genes') { | |
| 108 $self->throw("param 'type' must be set to 'genomic' or 'genes'"); | |
| 109 } | |
| 110 $self->target_dir(); #prodding for fetch's sake | |
| 111 return; | |
| 112 } | |
| 113 | |
| 114 sub run { | |
| 115 my ($self) = @_; | |
| 116 if($self->param('no_scp')) { | |
| 117 $self->info('Skipping as no_scp has been specified'); | |
| 118 return; | |
| 119 } | |
| 120 my $servers = $self->param('target_servers'); | |
| 121 return unless @{$servers}; | |
| 122 my $files = $self->get_files(); | |
| 123 foreach my $server (@{$servers}) { | |
| 124 $self->info('Copying files to %s for species %s', $server, $self->param('species')); | |
| 125 $self->copy_to_server($files, $server); | |
| 126 } | |
| 127 return; | |
| 128 } | |
| 129 | |
| 130 sub write_output { | |
| 131 my ($self) = @_; | |
| 132 $self->cleanup_DBAdaptor(); | |
| 133 return; | |
| 134 } | |
| 135 | |
| 136 sub copy_to_server { | |
| 137 my ($self, $files, $server) = @_; | |
| 138 my $target_dir = $self->target_dir(); | |
| 139 $self->check_remote_dir($target_dir, $server); | |
| 140 my $user = $self->param('scp_user'); | |
| 141 my $identity = $self->identity_param(); | |
| 142 foreach my $file (@{$files}) { | |
| 143 my ($volume, $directory, $filename) = File::Spec->splitpath($file); | |
| 144 my $target_path = File::Spec->catfile($target_dir, $filename); | |
| 145 my $cmd = sprintf('scp %s %s %s@%s:%s', $identity, $file, $user, $server, $target_path); | |
| 146 $self->fine('Executing %s', $cmd); | |
| 147 system($cmd) and $self->throw(sprintf("Cannot run command '%s'. RC %d", $cmd, ($?>>8))); | |
| 148 } | |
| 149 return; | |
| 150 } | |
| 151 | |
| 152 sub get_files { | |
| 153 my ($self) = @_; | |
| 154 my $species = $self->web_name(); | |
| 155 my $filter = sub { | |
| 156 my ($filename) = @_; | |
| 157 return ($filename =~ /^$species.+fa.+$/) ? 1 : 0; | |
| 158 }; | |
| 159 my $files = $self->find_files($self->blast_dir(), $filter); | |
| 160 $self->info('Found %d file(s) to copy', scalar(@{$files})); | |
| 161 return $files; | |
| 162 } | |
| 163 | |
| 164 sub blast_dir { | |
| 165 my ($self) = @_; | |
| 166 return $self->get_dir('blast', $self->param('type')); | |
| 167 } | |
| 168 | |
| 169 sub target_dir { | |
| 170 my ($self) = @_; | |
| 171 my $t = $self->param('type'); | |
| 172 my $key = "${t}_dir"; | |
| 173 my $dir = $self->param($key); | |
| 174 $self->throw("Cannot locate the parameter $key. We expect to do so") unless $dir; | |
| 175 return $dir; | |
| 176 } | |
| 177 | |
| 178 sub check_remote_dir { | |
| 179 my ($self, $remote_dir, $server) = @_; | |
| 180 my ($echo_rc) = $self->ssh_cmd($server, "echo -n 1"); | |
| 181 $self->throw("Cannot connect to $server") if $echo_rc; #1 means fail | |
| 182 my ($exists_rc) = $self->ssh_cmd($server, "test -d $remote_dir"); | |
| 183 if($exists_rc == 1) { | |
| 184 $self->info('Directory %s does not exist on %s. Will create it'); | |
| 185 my ($mkdir_rc, $mkdir_out) = $self->ssh_cmd($server, "mkdir -p $remote_dir"); | |
| 186 if($mkdir_rc == 1) { | |
| 187 $self->throw("Cannot create the directory $remote_dir on $server. Output from cmd was $mkdir_out. Check and rerun"); | |
| 188 } | |
| 189 } | |
| 190 return; | |
| 191 } | |
| 192 | |
| 193 sub ssh_cmd { | |
| 194 my ($self, $server, $cmd) = @_; | |
| 195 my $user = $self->param('scp_user'); | |
| 196 my $identity = $self->identity_param(); | |
| 197 $self->fine("Running command '%s' on '%s' as user '%s'", $cmd, $server, $user); | |
| 198 my $ssh_cmd = sprintf('ssh %s %s@%s "%s"', $identity, $user, $server, $cmd); | |
| 199 my $output = `$ssh_cmd`; | |
| 200 my $rc = $? >> 8; | |
| 201 return ($rc, $output); | |
| 202 } | |
| 203 | |
| 204 sub identity_param { | |
| 205 my ($self) = @_; | |
| 206 return ($self->param('scp_identity')) ? '-i '.$self->param('scp_identity') : q{}; | |
| 207 } | |
| 208 | |
| 209 1; |
