0
|
1 =pod
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::EnsEMBL::Funcgen::RunnableDB::WrapUpAlignment;
|
|
6
|
|
7 =head1 DESCRIPTION
|
|
8
|
|
9 'WrapUpAlignment' Merges all results from alignment jobs into a single file
|
|
10
|
|
11 =cut
|
|
12
|
|
13 package Bio::EnsEMBL::Funcgen::RunnableDB::WrapUpAlignment;
|
|
14
|
|
15 use warnings;
|
|
16 use strict;
|
|
17 use Bio::EnsEMBL::Funcgen::Utils::Helper;
|
|
18 use Bio::EnsEMBL::DBSQL::DBAdaptor;
|
|
19 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
|
|
20 use Bio::EnsEMBL::Funcgen::InputSet;
|
|
21 use Bio::EnsEMBL::Funcgen::DataSet;
|
|
22 use Bio::EnsEMBL::Funcgen::FeatureSet;
|
|
23 use Bio::EnsEMBL::Funcgen::AnnotatedFeature;
|
|
24
|
|
25 use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Alignment');
|
|
26
|
|
27 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
|
|
28 use Data::Dumper;
|
|
29 use File::Copy;
|
|
30
|
|
31 sub fetch_input {
|
|
32 my $self = shift @_;
|
|
33
|
|
34 $self->SUPER::fetch_input();
|
|
35
|
|
36 #my $replicate = $self->param('replicate') || throw "No replicate given";
|
|
37 #$self->_replicate($replicate);
|
|
38
|
|
39 #my $nbr_subfiles = $self->param('nbr_subfiles') || throw "Number of subfiles not given";
|
|
40 #if($nbr_subfiles<1){ throw "We need at least one subfile. Empty or invalid set given: $nbr_subfiles"; }
|
|
41 #$self->_nbr_subfiles($nbr_subfiles);
|
|
42
|
|
43 my $species=$self->_species();
|
|
44 my $gender=$self->_cell_type->gender();
|
|
45 $gender = $gender ? $gender : "male";
|
|
46 my $assembly=$self->_assembly();
|
|
47 my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_".$gender."_".$assembly."_unmasked.header.sam";
|
|
48 $self->_sam_header($sam_header);
|
|
49
|
|
50
|
|
51 my $repository = $self->_repository();
|
|
52 if(! -d $repository){
|
|
53 system("mkdir -p $repository") && throw("Couldn't create directory $repository");
|
|
54 }
|
|
55
|
|
56 return 1;
|
|
57 }
|
|
58
|
|
59 sub run {
|
|
60 my $self = shift @_;
|
|
61
|
|
62 # my $replicate = $self->_replicate();
|
|
63 my $sam_header = $self->_sam_header();
|
|
64
|
|
65 my $set_name = $self->_set_name();
|
|
66 my $file_prefix = $self->_output_dir()."/".$set_name;
|
|
67 my $input_file_pattern = "${file_prefix}_[0-9]*_[0-9]*_[0-9]*.sorted.bam";
|
|
68 my $merge_cmd="samtools merge -f -h $sam_header ${file_prefix}.sorted.bam $input_file_pattern";
|
|
69
|
|
70 #Maybe remove duplicates as default behavior ?.
|
|
71 #my $merge_cmd="samtools merge -h $sam_header - ${file_prefix}.[0-9]*.sorted.bam | ";
|
|
72 #$merge_cmd.=" samtools rmdup -s - ${file_prefix}.sorted.bam";
|
|
73
|
|
74 if(system($merge_cmd) != 0){ throw "Error merging file: $merge_cmd"; }
|
|
75
|
|
76 #keep end result as bam... it's more compact: only when passing to the peak caller we pass to sam or other format...
|
|
77 #merge_cmd.=" samtools view -h - | gzip -c > ${file_prefix}${align_type}.sam.gz"
|
|
78
|
|
79 my $alignment_log = $file_prefix.".alignment.log";
|
|
80
|
|
81 my $log_cmd="echo \"Alignment QC - total reads as input: \" >> ${alignment_log}";
|
|
82 $log_cmd.=";samtools flagstat ${file_prefix}.sorted.bam | head -n 1 >> ${alignment_log}";
|
|
83 $log_cmd.=";echo \"Alignment QC - mapped reads: \" >> ${alignment_log} ";
|
|
84 $log_cmd.=";samtools view -u -F 4 ${file_prefix}.sorted.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
|
|
85 $log_cmd.="; echo \"Alignment QC - reliably aligned reads (mapping quality >= 1): \" >> ${alignment_log}";
|
|
86 $log_cmd.=";samtools view -u -F 4 -q 1 ${file_prefix}.sorted.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
|
|
87 #Maybe do some percentages?
|
|
88
|
|
89 if(system($log_cmd) != 0){ warn "Error making the alignment statistics"; }
|
|
90 print STDERR "logged\n";
|
|
91 my $repository = $self->_repository();
|
|
92
|
|
93 move("${file_prefix}.sorted.bam","${repository}/${set_name}.samse.bam");
|
|
94
|
|
95 my $convert_cmd = "samtools view -h ${repository}/${set_name}.samse.bam | gzip -c - > ${repository}/${set_name}.samse.sam.gz";
|
|
96
|
|
97
|
|
98 #print STDERR "using convert cmd: ".$convert_cmd;
|
|
99
|
|
100 if(system($convert_cmd) != 0){ warn "Error converting to zipped sam"; }
|
|
101 #print STDERR "converted\n";
|
|
102 my $rm_cmd="rm -f $input_file_pattern";
|
|
103 if(system($rm_cmd) != 0){ warn "Error removing temp files. Remove them manually"; }
|
|
104 #$rm_cmd="rm -f ${file_prefix}.sorted.bam";
|
|
105 #if(system($rm_cmd) != 0){ warn "Error removing final bam files. Remove it manually"; }
|
|
106
|
|
107 return 1;
|
|
108 }
|
|
109
|
|
110
|
|
111 sub write_output {
|
|
112 my $self = shift @_;
|
|
113
|
|
114 return 1;
|
|
115
|
|
116 }
|
|
117
|
|
118 #Private getter / setter to the sam header
|
|
119 sub _sam_header {
|
|
120 return $_[0]->_getter_setter('sam_header',$_[1]);
|
|
121 }
|
|
122
|
|
123 #Private getter / setter to the replicate number
|
|
124 sub _replicate {
|
|
125 return $_[0]->_getter_setter('replicate',$_[1]);
|
|
126 }
|
|
127
|
|
128 #Private getter / setter to the replicate number
|
|
129 sub _nbr_subfiles {
|
|
130 return $_[0]->_getter_setter('nbr_subfiles',$_[1]);
|
|
131 }
|
|
132
|
|
133 1;
|