0
|
1 =pod
|
|
2
|
|
3 =head1 LICENSE
|
|
4
|
|
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
6 Genome Research Limited. All rights reserved.
|
|
7
|
|
8 This software is distributed under a modified Apache license.
|
|
9 For license details, please see
|
|
10
|
|
11 http://www.ensembl.org/info/about/code_licence.html
|
|
12
|
|
13 =head1 CONTACT
|
|
14
|
|
15 Please email comments or questions to the public Ensembl
|
|
16 developers list at <dev@ensembl.org>.
|
|
17
|
|
18 Questions may also be sent to the Ensembl help desk at
|
|
19 <helpdesk@ensembl.org>.
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Pipeline::FASTA::SCPBlast
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 Performs a find in the Blast index directory, for the given species and copies
|
|
28 them to the specified target servers.
|
|
29
|
|
30 Allowed parameters are:
|
|
31
|
|
32 =over 8
|
|
33
|
|
34 =item no_scp - If true then we will not run SCP but still finish cleanly without error
|
|
35
|
|
36 =item type - The type of dump to copy. Required parameter
|
|
37
|
|
38 =item genomic_dir - Needed if you are copying DNA genomic files
|
|
39
|
|
40 =item genes_dir - Needed if you are copying DNA gene files
|
|
41
|
|
42 =item target_servers - The servers to copy to. Expects to be an array
|
|
43
|
|
44 =item species - Species to work with
|
|
45
|
|
46 =item scp_user - The user to scp as. Defaults to the current user
|
|
47
|
|
48 =item scp_identity - Give an identity file to use during ssh commands
|
|
49 (useful when you are not scping as yourself)
|
|
50
|
|
51 =item base_path - The base of the dumps. The source blast directory is
|
|
52 constructed from this path
|
|
53
|
|
54 =back
|
|
55
|
|
56 =cut
|
|
57
|
|
58 package Bio::EnsEMBL::Pipeline::FASTA::SCPBlast;
|
|
59
|
|
60 use strict;
|
|
61 use warnings;
|
|
62
|
|
63 use base qw/Bio::EnsEMBL::Pipeline::FASTA::Base/;
|
|
64
|
|
65 use Bio::EnsEMBL::Utils::Scalar qw/check_ref/;
|
|
66 use File::Spec;
|
|
67
|
|
68 sub param_defaults {
|
|
69 my ($self) = @_;
|
|
70 return {
|
|
71 no_scp => 0,
|
|
72 # genomic_dir => '',
|
|
73 # genes_dir => '',
|
|
74 # target_servers => ['srv1', 'srv2'],
|
|
75
|
|
76 scp_user => $ENV{USER}, #defaults to the current user
|
|
77 # scp_identity => '',
|
|
78
|
|
79 # type => 'genes'/'genomic',
|
|
80 # species => '',
|
|
81 };
|
|
82 }
|
|
83
|
|
84 sub fetch_input {
|
|
85 my ($self) = @_;
|
|
86 if($self->param('no_scp')) {
|
|
87 $self->info('Skipping as no_scp has been specified');
|
|
88 return;
|
|
89 }
|
|
90
|
|
91 my $servers = $self->param('target_servers');
|
|
92
|
|
93 if(!check_ref($servers, 'ARRAY') || ! @{$servers}) {
|
|
94 my $msg = 'Will not perform copy as we have no servers';
|
|
95 my $is_error = 0;
|
|
96 $self->db()->get_JobMessageAdaptor()->register_message(
|
|
97 $self->input_job()->dbID(), $msg, $is_error
|
|
98 );
|
|
99 $self->info($msg);
|
|
100 return;
|
|
101 }
|
|
102
|
|
103 foreach my $key (qw/type species/) {
|
|
104 $self->throw("Key $key is required") unless $self->param($key);
|
|
105 }
|
|
106 my $type = $self->param('type');
|
|
107 if($type ne 'genomic' && $type ne 'genes') {
|
|
108 $self->throw("param 'type' must be set to 'genomic' or 'genes'");
|
|
109 }
|
|
110 $self->target_dir(); #prodding for fetch's sake
|
|
111 return;
|
|
112 }
|
|
113
|
|
114 sub run {
|
|
115 my ($self) = @_;
|
|
116 if($self->param('no_scp')) {
|
|
117 $self->info('Skipping as no_scp has been specified');
|
|
118 return;
|
|
119 }
|
|
120 my $servers = $self->param('target_servers');
|
|
121 return unless @{$servers};
|
|
122 my $files = $self->get_files();
|
|
123 foreach my $server (@{$servers}) {
|
|
124 $self->info('Copying files to %s for species %s', $server, $self->param('species'));
|
|
125 $self->copy_to_server($files, $server);
|
|
126 }
|
|
127 return;
|
|
128 }
|
|
129
|
|
130 sub write_output {
|
|
131 my ($self) = @_;
|
|
132 $self->cleanup_DBAdaptor();
|
|
133 return;
|
|
134 }
|
|
135
|
|
136 sub copy_to_server {
|
|
137 my ($self, $files, $server) = @_;
|
|
138 my $target_dir = $self->target_dir();
|
|
139 $self->check_remote_dir($target_dir, $server);
|
|
140 my $user = $self->param('scp_user');
|
|
141 my $identity = $self->identity_param();
|
|
142 foreach my $file (@{$files}) {
|
|
143 my ($volume, $directory, $filename) = File::Spec->splitpath($file);
|
|
144 my $target_path = File::Spec->catfile($target_dir, $filename);
|
|
145 my $cmd = sprintf('scp %s %s %s@%s:%s', $identity, $file, $user, $server, $target_path);
|
|
146 $self->fine('Executing %s', $cmd);
|
|
147 system($cmd) and $self->throw(sprintf("Cannot run command '%s'. RC %d", $cmd, ($?>>8)));
|
|
148 }
|
|
149 return;
|
|
150 }
|
|
151
|
|
152 sub get_files {
|
|
153 my ($self) = @_;
|
|
154 my $species = $self->web_name();
|
|
155 my $filter = sub {
|
|
156 my ($filename) = @_;
|
|
157 return ($filename =~ /^$species.+fa.+$/) ? 1 : 0;
|
|
158 };
|
|
159 my $files = $self->find_files($self->blast_dir(), $filter);
|
|
160 $self->info('Found %d file(s) to copy', scalar(@{$files}));
|
|
161 return $files;
|
|
162 }
|
|
163
|
|
164 sub blast_dir {
|
|
165 my ($self) = @_;
|
|
166 return $self->get_dir('blast', $self->param('type'));
|
|
167 }
|
|
168
|
|
169 sub target_dir {
|
|
170 my ($self) = @_;
|
|
171 my $t = $self->param('type');
|
|
172 my $key = "${t}_dir";
|
|
173 my $dir = $self->param($key);
|
|
174 $self->throw("Cannot locate the parameter $key. We expect to do so") unless $dir;
|
|
175 return $dir;
|
|
176 }
|
|
177
|
|
178 sub check_remote_dir {
|
|
179 my ($self, $remote_dir, $server) = @_;
|
|
180 my ($echo_rc) = $self->ssh_cmd($server, "echo -n 1");
|
|
181 $self->throw("Cannot connect to $server") if $echo_rc; #1 means fail
|
|
182 my ($exists_rc) = $self->ssh_cmd($server, "test -d $remote_dir");
|
|
183 if($exists_rc == 1) {
|
|
184 $self->info('Directory %s does not exist on %s. Will create it');
|
|
185 my ($mkdir_rc, $mkdir_out) = $self->ssh_cmd($server, "mkdir -p $remote_dir");
|
|
186 if($mkdir_rc == 1) {
|
|
187 $self->throw("Cannot create the directory $remote_dir on $server. Output from cmd was $mkdir_out. Check and rerun");
|
|
188 }
|
|
189 }
|
|
190 return;
|
|
191 }
|
|
192
|
|
193 sub ssh_cmd {
|
|
194 my ($self, $server, $cmd) = @_;
|
|
195 my $user = $self->param('scp_user');
|
|
196 my $identity = $self->identity_param();
|
|
197 $self->fine("Running command '%s' on '%s' as user '%s'", $cmd, $server, $user);
|
|
198 my $ssh_cmd = sprintf('ssh %s %s@%s "%s"', $identity, $user, $server, $cmd);
|
|
199 my $output = `$ssh_cmd`;
|
|
200 my $rc = $? >> 8;
|
|
201 return ($rc, $output);
|
|
202 }
|
|
203
|
|
204 sub identity_param {
|
|
205 my ($self) = @_;
|
|
206 return ($self->param('scp_identity')) ? '-i '.$self->param('scp_identity') : q{};
|
|
207 }
|
|
208
|
|
209 1;
|