comparison variant_effect_predictor/Bio/EnsEMBL/Pipeline/ChecksumGenerator.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =pod
2
3 =head1 LICENSE
4
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
6 Genome Research Limited. All rights reserved.
7
8 This software is distributed under a modified Apache license.
9 For license details, please see
10
11 http://www.ensembl.org/info/about/code_licence.html
12
13 =head1 CONTACT
14
15 Please email comments or questions to the public Ensembl
16 developers list at <dev@ensembl.org>.
17
18 Questions may also be sent to the Ensembl help desk at
19 <helpdesk@ensembl.org>.
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Pipeline::ChecksumGenerator
24
25 =head1 DESCRIPTION
26
27 Creates a CHECKSUMS file in the given directory which is produced from running
28 the sum command over every file in the directory. This excludes the CHECKSUMS
29 file, parent directory or any hidden files.
30
31 Allowed parameters are:
32
33 =over 8
34
35 =item dir - The directory to generate checksums for
36
37 =item gzip - If the resulting file should be gzipped. Defaults to false
38
39 =back
40
41 =cut
42
43 package Bio::EnsEMBL::Pipeline::ChecksumGenerator;
44
45 use strict;
46 use warnings;
47
48 use base qw/Bio::EnsEMBL::Pipeline::Base/;
49
50 use File::Spec;
51 use Bio::EnsEMBL::Utils::IO qw/work_with_file gz_work_with_file/;
52
53 sub param_defaults {
54 my ($self) = @_;
55 return {
56 gzip => 0
57 };
58 }
59
60 sub fetch_input {
61 my ($self) = @_;
62 my $dir = $self->param('dir');
63 $self->throw("No 'dir' parameter specified") unless $dir;
64 $self->throw("Dir $dir does not exist") unless -d $dir;
65 return;
66 }
67
68 sub run {
69 my ($self) = @_;
70 my @checksums;
71
72 my $dir = $self->param('dir');
73 $self->info('Checksumming directory %s', $dir);
74
75 opendir(my $dh, $dir) or die "Cannot open directory $dir";
76 my @files = sort { $a cmp $b } readdir($dh);
77 closedir($dh) or die "Cannot close directory $dir";
78
79 foreach my $file (@files) {
80 next if $file =~ /^\./; #hidden file or up/current dir
81 next if $file =~ /^CHECKSUM/;
82 my $path = File::Spec->catfile($dir, $file);
83 my $checksum = $self->checksum($path);
84 push(@checksums, [$checksum, $file])
85 }
86
87 $self->param('checksums', \@checksums);
88 return;
89 }
90
91 sub write_output {
92 my ($self) = @_;
93 my $dir = $self->param('dir');
94 my $checksum = File::Spec->catfile($dir, 'CHECKSUMS');
95 $checksum .= '.gz' if $self->param('gzip');
96 if(-f $checksum) {
97 $self->info('Checksum file already exists. Removing');
98 unlink $checksum;
99 }
100
101 my @checksums = @{$self->param('checksums')};
102
103 return unless @checksums;
104
105 my $writer = sub {
106 my ($fh) = @_;
107 foreach my $entry (@checksums) {
108 my $line = join(qq{\t}, @{$entry});
109 print $fh $line;
110 print $fh "\n";
111 }
112 return;
113 };
114 my @params = ($checksum, 'w', $writer);
115
116
117 if($self->param('gzip')) {
118 gz_work_with_file(@params);
119 }
120 else {
121 work_with_file(@params);
122 }
123
124 $self->permissions($checksum);
125 return;
126 }
127
128 sub checksum {
129 my ($self, $path) = @_;
130 my $checksum = `sum $path`;
131 $checksum =~ s/\s* $path//xms;
132 chomp($checksum);
133 return $checksum;
134 }
135
136 sub permissions {
137 my ($self, $file) = @_;
138 my $mode = 0666;
139 chmod($mode, $file) or $self->throw("Cannot perform the chmod to mode $mode for file $file");
140 return;
141 }
142
143 1;