annotate variant_effect_predictor/Bio/EnsEMBL/Pipeline/ChecksumGenerator.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Pipeline::ChecksumGenerator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 Creates a CHECKSUMS file in the given directory which is produced from running
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 the sum command over every file in the directory. This excludes the CHECKSUMS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 file, parent directory or any hidden files.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 Allowed parameters are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 =over 8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 =item dir - The directory to generate checksums for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 =item gzip - If the resulting file should be gzipped. Defaults to false
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 package Bio::EnsEMBL::Pipeline::ChecksumGenerator;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 use base qw/Bio::EnsEMBL::Pipeline::Base/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 use File::Spec;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 use Bio::EnsEMBL::Utils::IO qw/work_with_file gz_work_with_file/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 sub param_defaults {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 return {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 gzip => 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 sub fetch_input {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 my $dir = $self->param('dir');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 $self->throw("No 'dir' parameter specified") unless $dir;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 $self->throw("Dir $dir does not exist") unless -d $dir;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 my @checksums;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 my $dir = $self->param('dir');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 $self->info('Checksumming directory %s', $dir);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 opendir(my $dh, $dir) or die "Cannot open directory $dir";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 my @files = sort { $a cmp $b } readdir($dh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 closedir($dh) or die "Cannot close directory $dir";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 foreach my $file (@files) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 next if $file =~ /^\./; #hidden file or up/current dir
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 next if $file =~ /^CHECKSUM/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 my $path = File::Spec->catfile($dir, $file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 my $checksum = $self->checksum($path);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 push(@checksums, [$checksum, $file])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 $self->param('checksums', \@checksums);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 sub write_output {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $dir = $self->param('dir');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 my $checksum = File::Spec->catfile($dir, 'CHECKSUMS');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 $checksum .= '.gz' if $self->param('gzip');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 if(-f $checksum) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 $self->info('Checksum file already exists. Removing');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 unlink $checksum;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 my @checksums = @{$self->param('checksums')};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 return unless @checksums;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 my $writer = sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 my ($fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 foreach my $entry (@checksums) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 my $line = join(qq{\t}, @{$entry});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 print $fh $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 print $fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my @params = ($checksum, 'w', $writer);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 if($self->param('gzip')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 gz_work_with_file(@params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 work_with_file(@params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $self->permissions($checksum);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 sub checksum {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 my ($self, $path) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 my $checksum = `sum $path`;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 $checksum =~ s/\s* $path//xms;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 chomp($checksum);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 return $checksum;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 sub permissions {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 my ($self, $file) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 my $mode = 0666;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 chmod($mode, $file) or $self->throw("Cannot perform the chmod to mode $mode for file $file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 1;