diff variant_effect_predictor/Bio/EnsEMBL/Pipeline/ChecksumGenerator.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Pipeline/ChecksumGenerator.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,143 @@
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=head1 NAME
+
+Bio::EnsEMBL::Pipeline::ChecksumGenerator
+
+=head1 DESCRIPTION
+
+Creates a CHECKSUMS file in the given directory which is produced from running
+the sum command over every file in the directory. This excludes the CHECKSUMS
+file, parent directory or any hidden files.
+
+Allowed parameters are:
+
+=over 8
+
+=item dir - The directory to generate checksums for
+
+=item gzip - If the resulting file should be gzipped. Defaults to false
+
+=back
+
+=cut
+
+package Bio::EnsEMBL::Pipeline::ChecksumGenerator;
+
+use strict;
+use warnings;
+
+use base qw/Bio::EnsEMBL::Pipeline::Base/;
+
+use File::Spec;
+use Bio::EnsEMBL::Utils::IO qw/work_with_file gz_work_with_file/;
+
+sub param_defaults {
+  my ($self) = @_;
+  return {
+    gzip => 0
+  };
+}
+
+sub fetch_input {
+  my ($self) = @_;
+  my $dir = $self->param('dir');
+  $self->throw("No 'dir' parameter specified") unless $dir;
+  $self->throw("Dir $dir does not exist") unless -d $dir;
+  return;
+}
+
+sub run {
+  my ($self) = @_;
+  my @checksums;
+  
+  my $dir = $self->param('dir');
+  $self->info('Checksumming directory %s', $dir);
+
+  opendir(my $dh, $dir) or die "Cannot open directory $dir";
+  my @files = sort { $a cmp $b } readdir($dh);
+  closedir($dh) or die "Cannot close directory $dir";
+
+  foreach my $file (@files) {
+    next if $file =~ /^\./;         #hidden file or up/current dir
+    next if $file =~ /^CHECKSUM/;
+    my $path = File::Spec->catfile($dir, $file);
+    my $checksum = $self->checksum($path);
+    push(@checksums, [$checksum, $file])
+  }
+  
+  $self->param('checksums', \@checksums);
+  return;
+}
+
+sub write_output {
+  my ($self) = @_;
+  my $dir = $self->param('dir');
+  my $checksum = File::Spec->catfile($dir, 'CHECKSUMS');
+  $checksum .= '.gz' if $self->param('gzip');
+  if(-f $checksum) {
+    $self->info('Checksum file already exists. Removing');
+    unlink $checksum;
+  }
+  
+  my @checksums = @{$self->param('checksums')};
+  
+  return unless @checksums;
+  
+  my $writer = sub {
+    my ($fh) = @_;
+    foreach my $entry (@checksums) {
+      my $line = join(qq{\t}, @{$entry});
+      print $fh $line;
+      print $fh "\n"; 
+    }
+    return;
+  };
+  my @params = ($checksum, 'w', $writer);
+  
+  
+  if($self->param('gzip')) {
+    gz_work_with_file(@params);
+  } 
+  else {
+    work_with_file(@params);
+  } 
+  
+  $self->permissions($checksum);
+  return;
+}
+
+sub checksum {
+  my ($self, $path) = @_;
+  my $checksum = `sum $path`;
+  $checksum =~ s/\s* $path//xms;
+  chomp($checksum);
+  return $checksum;
+}
+
+sub permissions {
+  my ($self, $file) = @_;
+  my $mode = 0666;
+  chmod($mode, $file) or $self->throw("Cannot perform the chmod to mode $mode for file $file");
+  return;
+}
+
+1;