diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/ConvergeReplicates.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/ConvergeReplicates.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,108 @@
+=pod 
+
+=head1 NAME
+
+Bio::EnsEMBL::Hive::RunnableDB::Funcgen::ConvergeReplicates
+
+=head1 DESCRIPTION
+
+'ConvergeReplicates' Just merges the replicates BAM and convert it to a SAM
+into the alignments folder for the peaks pipeline...
+
+=cut
+
+package Bio::EnsEMBL::Funcgen::RunnableDB::ConvergeReplicates;
+
+use warnings;
+use strict;
+use Bio::EnsEMBL::DBSQL::DBAdaptor;
+use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor; 
+use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
+#use Data::Dumper;
+
+use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Alignment');
+
+#TODO... Maybe use and update the tracking database...
+sub fetch_input {   # fetch parameters...
+  my $self = shift @_;
+  
+  $self->SUPER::fetch_input();
+
+  my $species = $self->_species();
+  my $gender = $self->_cell_type()->gender;
+  $gender = $gender ? $gender : "male";
+  my $assembly = $self->_assembly();
+
+  my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_".$gender."_".$assembly."_unmasked.header.sam";
+  $self->_sam_header($sam_header);
+
+  my $repository = $self->_repository();
+  if(! -d $repository){ 
+    system("mkdir -p $repository") && throw("Couldn't create directory $repository");
+  }
+
+  #my $nbr_replicates = $self->param('nbr_replicates') || throw "Number of replicates not given";
+  my $nbr_replicates = $self->param('nbr_replicates') || 2;
+  $self->_nbr_replicates($nbr_replicates);
+
+  return 1;
+}
+
+sub run {   
+  my $self = shift @_;
+
+  my $sam_header = $self->_sam_header();
+
+  my $output_file_prefix = $self->_repository()."/".$self->_set_name();
+  my $file_prefix = $self->_output_dir()."/".$self->_set_name();
+  my $merge_cmd;
+  if($self->_nbr_replicates()>1){
+    $merge_cmd="samtools merge -h $sam_header ${file_prefix}.bam ${file_prefix}.[0-9]*.sorted.bam "; 
+  }  else {
+    $merge_cmd = "cp ${file_prefix}.[0-9]*.sorted.bam ${file_prefix}.bam";    
+  }
+  if(system($merge_cmd) != 0){ throw "Error merging replicate bam files: $merge_cmd"; }
+  
+
+  my $convert_cmd = "samtools view -h ${file_prefix}.bam > ${output_file_prefix}.samse.sam";
+  if(system($convert_cmd) != 0){ throw "Error converting merged bam to sam: $convert_cmd"; }
+
+  my $zip_cmd = "gzip ${output_file_prefix}.samse.sam";
+  if(system($zip_cmd) != 0){ warn "Error zipping sam"; }
+
+  #Merge the alignment logs too...
+  my $alignment_log = $output_file_prefix.".alignment.log";
+  my $log_cmd="echo \"Alignment QC - total reads as input: \" >> ${alignment_log}";
+  $log_cmd="${log_cmd};samtools flagstat ${file_prefix}.bam | head -n 1 >> ${alignment_log}";
+  $log_cmd="${log_cmd}; echo \"Alignment QC - mapped reads: \" >> ${alignment_log} ";
+  $log_cmd="${log_cmd};samtools view -u -F 4 ${file_prefix}.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
+  $log_cmd="${log_cmd}; echo \"Alignment QC - reliably aligned reads (mapping quality >= 1): \" >> ${alignment_log}";
+  $log_cmd="${log_cmd};samtools view -u -F 4 -q 1 ${file_prefix}.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
+    
+  if(system($log_cmd) != 0){ warn "Error making the alignment statistics"; }
+
+  my $rm_cmd="rm -f ${file_prefix}.bam";
+  if(system($rm_cmd) != 0){ warn "Error removing temp files. Remove them manually: $rm_cmd"; }
+
+  return 1;
+}
+
+
+sub write_output {  # Nothing to do here
+  my $self = shift @_;
+
+  return 1;
+
+}
+
+#Private getter / setter to the sam header
+sub _sam_header {
+  return $_[0]->_getter_setter('sam_header',$_[1]);
+}
+
+#Private getter / setter to the sam header
+sub _nbr_replicates {
+  return $_[0]->_getter_setter('nbr_replicates',$_[1]);
+}
+
+1;