Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Pipeline/ChecksumGenerator.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line source
=pod =head1 LICENSE Copyright (c) 1999-2012 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <dev@ensembl.org>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =head1 NAME Bio::EnsEMBL::Pipeline::ChecksumGenerator =head1 DESCRIPTION Creates a CHECKSUMS file in the given directory which is produced from running the sum command over every file in the directory. This excludes the CHECKSUMS file, parent directory or any hidden files. Allowed parameters are: =over 8 =item dir - The directory to generate checksums for =item gzip - If the resulting file should be gzipped. Defaults to false =back =cut package Bio::EnsEMBL::Pipeline::ChecksumGenerator; use strict; use warnings; use base qw/Bio::EnsEMBL::Pipeline::Base/; use File::Spec; use Bio::EnsEMBL::Utils::IO qw/work_with_file gz_work_with_file/; sub param_defaults { my ($self) = @_; return { gzip => 0 }; } sub fetch_input { my ($self) = @_; my $dir = $self->param('dir'); $self->throw("No 'dir' parameter specified") unless $dir; $self->throw("Dir $dir does not exist") unless -d $dir; return; } sub run { my ($self) = @_; my @checksums; my $dir = $self->param('dir'); $self->info('Checksumming directory %s', $dir); opendir(my $dh, $dir) or die "Cannot open directory $dir"; my @files = sort { $a cmp $b } readdir($dh); closedir($dh) or die "Cannot close directory $dir"; foreach my $file (@files) { next if $file =~ /^\./; #hidden file or up/current dir next if $file =~ /^CHECKSUM/; my $path = File::Spec->catfile($dir, $file); my $checksum = $self->checksum($path); push(@checksums, [$checksum, $file]) } $self->param('checksums', \@checksums); return; } sub write_output { my ($self) = @_; my $dir = $self->param('dir'); my $checksum = File::Spec->catfile($dir, 'CHECKSUMS'); $checksum .= '.gz' if $self->param('gzip'); if(-f $checksum) { $self->info('Checksum file already exists. Removing'); unlink $checksum; } my @checksums = @{$self->param('checksums')}; return unless @checksums; my $writer = sub { my ($fh) = @_; foreach my $entry (@checksums) { my $line = join(qq{\t}, @{$entry}); print $fh $line; print $fh "\n"; } return; }; my @params = ($checksum, 'w', $writer); if($self->param('gzip')) { gz_work_with_file(@params); } else { work_with_file(@params); } $self->permissions($checksum); return; } sub checksum { my ($self, $path) = @_; my $checksum = `sum $path`; $checksum =~ s/\s* $path//xms; chomp($checksum); return $checksum; } sub permissions { my ($self, $file) = @_; my $mode = 0666; chmod($mode, $file) or $self->throw("Cannot perform the chmod to mode $mode for file $file"); return; } 1;