annotate variant_effect_predictor/Bio/EnsEMBL/Pipeline/FASTA/CopyDNA.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Pipeline::FASTA::CopyDNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 Performs a find in the DNA dumps directory, for the given species, in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 previous release FTP dump directory. Any files matching the normal gzipped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 fasta extension will be copied over to this release's directory.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 Previous release is defined as V<release-1>; override this class if your
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 definition of the previous release is different.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 Allowed parameters are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 =over 8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 =item release - Needed to build the target path
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 =item previous_release - Needed to build the source path
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 =item ftp_dir - Current location of the FTP directory for the previous
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 release. Should be the root i.e. the level I<release-XX> is in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 =item species - Species to work with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 =item base_path - The base of the dumps; reused files will be copied to here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 package Bio::EnsEMBL::Pipeline::FASTA::CopyDNA;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use base qw/Bio::EnsEMBL::Pipeline::FASTA::Base/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use File::Copy;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 use File::Find;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 use File::Spec;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 sub fetch_input {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 my @required = qw/release ftp_dir species/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 foreach my $key (@required) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 $self->throw("Need to define a $key parameter") unless $self->param($key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 my $new_path = $self->new_path();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 #Remove all files from the new path
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 $self->unlink_all_files($new_path);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 my $files = $self->get_dna_files();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 foreach my $old_file (@{$files}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 my $new_file = $self->new_filename($old_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 $self->fine('copy %s %s', $old_file, $new_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 copy($old_file, $new_file) or $self->throw("Cannot copy $old_file to $new_file: $!");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 sub new_filename {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my ($self, $old_filename) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my ($old_volume, $old_dir, $old_file) = File::Spec->splitpath($old_filename);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $old_release = $self->param('previous_release');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 my $release = $self->param('release');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my $new_file = $old_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 $new_file =~ s/\.$old_release\./.$release./;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 my $new_path = $self->new_path();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 return File::Spec->catfile($new_path, $new_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 sub new_path {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 return $self->fasta_path('dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 sub get_dna_files {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 my $old_path = $self->old_path();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 my $filter = sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 my ($filename) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 return ($filename =~ /\.fa\.gz$/ || $filename eq 'README') ? 1 : 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 my $files = $self->find_files($old_path, $filter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 return $files;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 1;