annotate variant_effect_predictor/Bio/EnsEMBL/Pipeline/FASTA/SCPBlast.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Pipeline::FASTA::SCPBlast
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 Performs a find in the Blast index directory, for the given species and copies
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 them to the specified target servers.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 Allowed parameters are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =over 8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 =item no_scp - If true then we will not run SCP but still finish cleanly without error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 =item type - The type of dump to copy. Required parameter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 =item genomic_dir - Needed if you are copying DNA genomic files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 =item genes_dir - Needed if you are copying DNA gene files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 =item target_servers - The servers to copy to. Expects to be an array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 =item species - Species to work with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 =item scp_user - The user to scp as. Defaults to the current user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 =item scp_identity - Give an identity file to use during ssh commands
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 (useful when you are not scping as yourself)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 =item base_path - The base of the dumps. The source blast directory is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 constructed from this path
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 package Bio::EnsEMBL::Pipeline::FASTA::SCPBlast;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 use base qw/Bio::EnsEMBL::Pipeline::FASTA::Base/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 use Bio::EnsEMBL::Utils::Scalar qw/check_ref/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 use File::Spec;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 sub param_defaults {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 return {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 no_scp => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 # genomic_dir => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 # genes_dir => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 # target_servers => ['srv1', 'srv2'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 scp_user => $ENV{USER}, #defaults to the current user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 # scp_identity => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 # type => 'genes'/'genomic',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 # species => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 sub fetch_input {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 if($self->param('no_scp')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 $self->info('Skipping as no_scp has been specified');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my $servers = $self->param('target_servers');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 if(!check_ref($servers, 'ARRAY') || ! @{$servers}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 my $msg = 'Will not perform copy as we have no servers';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my $is_error = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 $self->db()->get_JobMessageAdaptor()->register_message(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 $self->input_job()->dbID(), $msg, $is_error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 $self->info($msg);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 foreach my $key (qw/type species/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 $self->throw("Key $key is required") unless $self->param($key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 my $type = $self->param('type');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 if($type ne 'genomic' && $type ne 'genes') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 $self->throw("param 'type' must be set to 'genomic' or 'genes'");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 $self->target_dir(); #prodding for fetch's sake
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 if($self->param('no_scp')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 $self->info('Skipping as no_scp has been specified');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $servers = $self->param('target_servers');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 return unless @{$servers};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 my $files = $self->get_files();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 foreach my $server (@{$servers}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $self->info('Copying files to %s for species %s', $server, $self->param('species'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $self->copy_to_server($files, $server);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 sub write_output {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 $self->cleanup_DBAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 sub copy_to_server {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 my ($self, $files, $server) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 my $target_dir = $self->target_dir();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 $self->check_remote_dir($target_dir, $server);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 my $user = $self->param('scp_user');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 my $identity = $self->identity_param();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 foreach my $file (@{$files}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 my ($volume, $directory, $filename) = File::Spec->splitpath($file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 my $target_path = File::Spec->catfile($target_dir, $filename);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my $cmd = sprintf('scp %s %s %s@%s:%s', $identity, $file, $user, $server, $target_path);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 $self->fine('Executing %s', $cmd);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 system($cmd) and $self->throw(sprintf("Cannot run command '%s'. RC %d", $cmd, ($?>>8)));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 sub get_files {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 my $species = $self->web_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 my $filter = sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my ($filename) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 return ($filename =~ /^$species.+fa.+$/) ? 1 : 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 my $files = $self->find_files($self->blast_dir(), $filter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $self->info('Found %d file(s) to copy', scalar(@{$files}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 return $files;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 sub blast_dir {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 return $self->get_dir('blast', $self->param('type'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 sub target_dir {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $t = $self->param('type');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 my $key = "${t}_dir";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 my $dir = $self->param($key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 $self->throw("Cannot locate the parameter $key. We expect to do so") unless $dir;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 return $dir;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 sub check_remote_dir {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 my ($self, $remote_dir, $server) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 my ($echo_rc) = $self->ssh_cmd($server, "echo -n 1");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 $self->throw("Cannot connect to $server") if $echo_rc; #1 means fail
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 my ($exists_rc) = $self->ssh_cmd($server, "test -d $remote_dir");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 if($exists_rc == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 $self->info('Directory %s does not exist on %s. Will create it');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my ($mkdir_rc, $mkdir_out) = $self->ssh_cmd($server, "mkdir -p $remote_dir");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 if($mkdir_rc == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 $self->throw("Cannot create the directory $remote_dir on $server. Output from cmd was $mkdir_out. Check and rerun");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 sub ssh_cmd {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 my ($self, $server, $cmd) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 my $user = $self->param('scp_user');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my $identity = $self->identity_param();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 $self->fine("Running command '%s' on '%s' as user '%s'", $cmd, $server, $user);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my $ssh_cmd = sprintf('ssh %s %s@%s "%s"', $identity, $user, $server, $cmd);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 my $output = `$ssh_cmd`;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 my $rc = $? >> 8;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 return ($rc, $output);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 sub identity_param {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 return ($self->param('scp_identity')) ? '-i '.$self->param('scp_identity') : q{};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 1;