0
|
1 =pod
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::EnsEMBL::Hive::RunnableDB::Funcgen::ConvergeReplicates
|
|
6
|
|
7 =head1 DESCRIPTION
|
|
8
|
|
9 'ConvergeReplicates' Just merges the replicates BAM and convert it to a SAM
|
|
10 into the alignments folder for the peaks pipeline...
|
|
11
|
|
12 =cut
|
|
13
|
|
14 package Bio::EnsEMBL::Funcgen::RunnableDB::ConvergeReplicates;
|
|
15
|
|
16 use warnings;
|
|
17 use strict;
|
|
18 use Bio::EnsEMBL::DBSQL::DBAdaptor;
|
|
19 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
|
|
20 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
|
|
21 #use Data::Dumper;
|
|
22
|
|
23 use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Alignment');
|
|
24
|
|
25 #TODO... Maybe use and update the tracking database...
|
|
26 sub fetch_input { # fetch parameters...
|
|
27 my $self = shift @_;
|
|
28
|
|
29 $self->SUPER::fetch_input();
|
|
30
|
|
31 my $species = $self->_species();
|
|
32 my $gender = $self->_cell_type()->gender;
|
|
33 $gender = $gender ? $gender : "male";
|
|
34 my $assembly = $self->_assembly();
|
|
35
|
|
36 my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_".$gender."_".$assembly."_unmasked.header.sam";
|
|
37 $self->_sam_header($sam_header);
|
|
38
|
|
39 my $repository = $self->_repository();
|
|
40 if(! -d $repository){
|
|
41 system("mkdir -p $repository") && throw("Couldn't create directory $repository");
|
|
42 }
|
|
43
|
|
44 #my $nbr_replicates = $self->param('nbr_replicates') || throw "Number of replicates not given";
|
|
45 my $nbr_replicates = $self->param('nbr_replicates') || 2;
|
|
46 $self->_nbr_replicates($nbr_replicates);
|
|
47
|
|
48 return 1;
|
|
49 }
|
|
50
|
|
51 sub run {
|
|
52 my $self = shift @_;
|
|
53
|
|
54 my $sam_header = $self->_sam_header();
|
|
55
|
|
56 my $output_file_prefix = $self->_repository()."/".$self->_set_name();
|
|
57 my $file_prefix = $self->_output_dir()."/".$self->_set_name();
|
|
58 my $merge_cmd;
|
|
59 if($self->_nbr_replicates()>1){
|
|
60 $merge_cmd="samtools merge -h $sam_header ${file_prefix}.bam ${file_prefix}.[0-9]*.sorted.bam ";
|
|
61 } else {
|
|
62 $merge_cmd = "cp ${file_prefix}.[0-9]*.sorted.bam ${file_prefix}.bam";
|
|
63 }
|
|
64 if(system($merge_cmd) != 0){ throw "Error merging replicate bam files: $merge_cmd"; }
|
|
65
|
|
66
|
|
67 my $convert_cmd = "samtools view -h ${file_prefix}.bam > ${output_file_prefix}.samse.sam";
|
|
68 if(system($convert_cmd) != 0){ throw "Error converting merged bam to sam: $convert_cmd"; }
|
|
69
|
|
70 my $zip_cmd = "gzip ${output_file_prefix}.samse.sam";
|
|
71 if(system($zip_cmd) != 0){ warn "Error zipping sam"; }
|
|
72
|
|
73 #Merge the alignment logs too...
|
|
74 my $alignment_log = $output_file_prefix.".alignment.log";
|
|
75 my $log_cmd="echo \"Alignment QC - total reads as input: \" >> ${alignment_log}";
|
|
76 $log_cmd="${log_cmd};samtools flagstat ${file_prefix}.bam | head -n 1 >> ${alignment_log}";
|
|
77 $log_cmd="${log_cmd}; echo \"Alignment QC - mapped reads: \" >> ${alignment_log} ";
|
|
78 $log_cmd="${log_cmd};samtools view -u -F 4 ${file_prefix}.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
|
|
79 $log_cmd="${log_cmd}; echo \"Alignment QC - reliably aligned reads (mapping quality >= 1): \" >> ${alignment_log}";
|
|
80 $log_cmd="${log_cmd};samtools view -u -F 4 -q 1 ${file_prefix}.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
|
|
81
|
|
82 if(system($log_cmd) != 0){ warn "Error making the alignment statistics"; }
|
|
83
|
|
84 my $rm_cmd="rm -f ${file_prefix}.bam";
|
|
85 if(system($rm_cmd) != 0){ warn "Error removing temp files. Remove them manually: $rm_cmd"; }
|
|
86
|
|
87 return 1;
|
|
88 }
|
|
89
|
|
90
|
|
91 sub write_output { # Nothing to do here
|
|
92 my $self = shift @_;
|
|
93
|
|
94 return 1;
|
|
95
|
|
96 }
|
|
97
|
|
98 #Private getter / setter to the sam header
|
|
99 sub _sam_header {
|
|
100 return $_[0]->_getter_setter('sam_header',$_[1]);
|
|
101 }
|
|
102
|
|
103 #Private getter / setter to the sam header
|
|
104 sub _nbr_replicates {
|
|
105 return $_[0]->_getter_setter('nbr_replicates',$_[1]);
|
|
106 }
|
|
107
|
|
108 1;
|