0
|
1 =pod
|
|
2
|
|
3 =head1 LICENSE
|
|
4
|
|
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
6 Genome Research Limited. All rights reserved.
|
|
7
|
|
8 This software is distributed under a modified Apache license.
|
|
9 For license details, please see
|
|
10
|
|
11 http://www.ensembl.org/info/about/code_licence.html
|
|
12
|
|
13 =head1 CONTACT
|
|
14
|
|
15 Please email comments or questions to the public Ensembl
|
|
16 developers list at <dev@ensembl.org>.
|
|
17
|
|
18 Questions may also be sent to the Ensembl help desk at
|
|
19 <helpdesk@ensembl.org>.
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Pipeline::ChecksumGenerator
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 Creates a CHECKSUMS file in the given directory which is produced from running
|
|
28 the sum command over every file in the directory. This excludes the CHECKSUMS
|
|
29 file, parent directory or any hidden files.
|
|
30
|
|
31 Allowed parameters are:
|
|
32
|
|
33 =over 8
|
|
34
|
|
35 =item dir - The directory to generate checksums for
|
|
36
|
|
37 =item gzip - If the resulting file should be gzipped. Defaults to false
|
|
38
|
|
39 =back
|
|
40
|
|
41 =cut
|
|
42
|
|
43 package Bio::EnsEMBL::Pipeline::ChecksumGenerator;
|
|
44
|
|
45 use strict;
|
|
46 use warnings;
|
|
47
|
|
48 use base qw/Bio::EnsEMBL::Pipeline::Base/;
|
|
49
|
|
50 use File::Spec;
|
|
51 use Bio::EnsEMBL::Utils::IO qw/work_with_file gz_work_with_file/;
|
|
52
|
|
53 sub param_defaults {
|
|
54 my ($self) = @_;
|
|
55 return {
|
|
56 gzip => 0
|
|
57 };
|
|
58 }
|
|
59
|
|
60 sub fetch_input {
|
|
61 my ($self) = @_;
|
|
62 my $dir = $self->param('dir');
|
|
63 $self->throw("No 'dir' parameter specified") unless $dir;
|
|
64 $self->throw("Dir $dir does not exist") unless -d $dir;
|
|
65 return;
|
|
66 }
|
|
67
|
|
68 sub run {
|
|
69 my ($self) = @_;
|
|
70 my @checksums;
|
|
71
|
|
72 my $dir = $self->param('dir');
|
|
73 $self->info('Checksumming directory %s', $dir);
|
|
74
|
|
75 opendir(my $dh, $dir) or die "Cannot open directory $dir";
|
|
76 my @files = sort { $a cmp $b } readdir($dh);
|
|
77 closedir($dh) or die "Cannot close directory $dir";
|
|
78
|
|
79 foreach my $file (@files) {
|
|
80 next if $file =~ /^\./; #hidden file or up/current dir
|
|
81 next if $file =~ /^CHECKSUM/;
|
|
82 my $path = File::Spec->catfile($dir, $file);
|
|
83 my $checksum = $self->checksum($path);
|
|
84 push(@checksums, [$checksum, $file])
|
|
85 }
|
|
86
|
|
87 $self->param('checksums', \@checksums);
|
|
88 return;
|
|
89 }
|
|
90
|
|
91 sub write_output {
|
|
92 my ($self) = @_;
|
|
93 my $dir = $self->param('dir');
|
|
94 my $checksum = File::Spec->catfile($dir, 'CHECKSUMS');
|
|
95 $checksum .= '.gz' if $self->param('gzip');
|
|
96 if(-f $checksum) {
|
|
97 $self->info('Checksum file already exists. Removing');
|
|
98 unlink $checksum;
|
|
99 }
|
|
100
|
|
101 my @checksums = @{$self->param('checksums')};
|
|
102
|
|
103 return unless @checksums;
|
|
104
|
|
105 my $writer = sub {
|
|
106 my ($fh) = @_;
|
|
107 foreach my $entry (@checksums) {
|
|
108 my $line = join(qq{\t}, @{$entry});
|
|
109 print $fh $line;
|
|
110 print $fh "\n";
|
|
111 }
|
|
112 return;
|
|
113 };
|
|
114 my @params = ($checksum, 'w', $writer);
|
|
115
|
|
116
|
|
117 if($self->param('gzip')) {
|
|
118 gz_work_with_file(@params);
|
|
119 }
|
|
120 else {
|
|
121 work_with_file(@params);
|
|
122 }
|
|
123
|
|
124 $self->permissions($checksum);
|
|
125 return;
|
|
126 }
|
|
127
|
|
128 sub checksum {
|
|
129 my ($self, $path) = @_;
|
|
130 my $checksum = `sum $path`;
|
|
131 $checksum =~ s/\s* $path//xms;
|
|
132 chomp($checksum);
|
|
133 return $checksum;
|
|
134 }
|
|
135
|
|
136 sub permissions {
|
|
137 my ($self, $file) = @_;
|
|
138 my $mode = 0666;
|
|
139 chmod($mode, $file) or $self->throw("Cannot perform the chmod to mode $mode for file $file");
|
|
140 return;
|
|
141 }
|
|
142
|
|
143 1;
|