Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/WrapUpAlignment.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =pod | |
2 | |
3 =head1 NAME | |
4 | |
5 Bio::EnsEMBL::Funcgen::RunnableDB::WrapUpAlignment; | |
6 | |
7 =head1 DESCRIPTION | |
8 | |
9 'WrapUpAlignment' Merges all results from alignment jobs into a single file | |
10 | |
11 =cut | |
12 | |
13 package Bio::EnsEMBL::Funcgen::RunnableDB::WrapUpAlignment; | |
14 | |
15 use warnings; | |
16 use strict; | |
17 use Bio::EnsEMBL::Funcgen::Utils::Helper; | |
18 use Bio::EnsEMBL::DBSQL::DBAdaptor; | |
19 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor; | |
20 use Bio::EnsEMBL::Funcgen::InputSet; | |
21 use Bio::EnsEMBL::Funcgen::DataSet; | |
22 use Bio::EnsEMBL::Funcgen::FeatureSet; | |
23 use Bio::EnsEMBL::Funcgen::AnnotatedFeature; | |
24 | |
25 use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Alignment'); | |
26 | |
27 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump); | |
28 use Data::Dumper; | |
29 use File::Copy; | |
30 | |
31 sub fetch_input { | |
32 my $self = shift @_; | |
33 | |
34 $self->SUPER::fetch_input(); | |
35 | |
36 #my $replicate = $self->param('replicate') || throw "No replicate given"; | |
37 #$self->_replicate($replicate); | |
38 | |
39 #my $nbr_subfiles = $self->param('nbr_subfiles') || throw "Number of subfiles not given"; | |
40 #if($nbr_subfiles<1){ throw "We need at least one subfile. Empty or invalid set given: $nbr_subfiles"; } | |
41 #$self->_nbr_subfiles($nbr_subfiles); | |
42 | |
43 my $species=$self->_species(); | |
44 my $gender=$self->_cell_type->gender(); | |
45 $gender = $gender ? $gender : "male"; | |
46 my $assembly=$self->_assembly(); | |
47 my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_".$gender."_".$assembly."_unmasked.header.sam"; | |
48 $self->_sam_header($sam_header); | |
49 | |
50 | |
51 my $repository = $self->_repository(); | |
52 if(! -d $repository){ | |
53 system("mkdir -p $repository") && throw("Couldn't create directory $repository"); | |
54 } | |
55 | |
56 return 1; | |
57 } | |
58 | |
59 sub run { | |
60 my $self = shift @_; | |
61 | |
62 # my $replicate = $self->_replicate(); | |
63 my $sam_header = $self->_sam_header(); | |
64 | |
65 my $set_name = $self->_set_name(); | |
66 my $file_prefix = $self->_output_dir()."/".$set_name; | |
67 my $input_file_pattern = "${file_prefix}_[0-9]*_[0-9]*_[0-9]*.sorted.bam"; | |
68 my $merge_cmd="samtools merge -f -h $sam_header ${file_prefix}.sorted.bam $input_file_pattern"; | |
69 | |
70 #Maybe remove duplicates as default behavior ?. | |
71 #my $merge_cmd="samtools merge -h $sam_header - ${file_prefix}.[0-9]*.sorted.bam | "; | |
72 #$merge_cmd.=" samtools rmdup -s - ${file_prefix}.sorted.bam"; | |
73 | |
74 if(system($merge_cmd) != 0){ throw "Error merging file: $merge_cmd"; } | |
75 | |
76 #keep end result as bam... it's more compact: only when passing to the peak caller we pass to sam or other format... | |
77 #merge_cmd.=" samtools view -h - | gzip -c > ${file_prefix}${align_type}.sam.gz" | |
78 | |
79 my $alignment_log = $file_prefix.".alignment.log"; | |
80 | |
81 my $log_cmd="echo \"Alignment QC - total reads as input: \" >> ${alignment_log}"; | |
82 $log_cmd.=";samtools flagstat ${file_prefix}.sorted.bam | head -n 1 >> ${alignment_log}"; | |
83 $log_cmd.=";echo \"Alignment QC - mapped reads: \" >> ${alignment_log} "; | |
84 $log_cmd.=";samtools view -u -F 4 ${file_prefix}.sorted.bam | samtools flagstat - | head -n 1 >> ${alignment_log}"; | |
85 $log_cmd.="; echo \"Alignment QC - reliably aligned reads (mapping quality >= 1): \" >> ${alignment_log}"; | |
86 $log_cmd.=";samtools view -u -F 4 -q 1 ${file_prefix}.sorted.bam | samtools flagstat - | head -n 1 >> ${alignment_log}"; | |
87 #Maybe do some percentages? | |
88 | |
89 if(system($log_cmd) != 0){ warn "Error making the alignment statistics"; } | |
90 print STDERR "logged\n"; | |
91 my $repository = $self->_repository(); | |
92 | |
93 move("${file_prefix}.sorted.bam","${repository}/${set_name}.samse.bam"); | |
94 | |
95 my $convert_cmd = "samtools view -h ${repository}/${set_name}.samse.bam | gzip -c - > ${repository}/${set_name}.samse.sam.gz"; | |
96 | |
97 | |
98 #print STDERR "using convert cmd: ".$convert_cmd; | |
99 | |
100 if(system($convert_cmd) != 0){ warn "Error converting to zipped sam"; } | |
101 #print STDERR "converted\n"; | |
102 my $rm_cmd="rm -f $input_file_pattern"; | |
103 if(system($rm_cmd) != 0){ warn "Error removing temp files. Remove them manually"; } | |
104 #$rm_cmd="rm -f ${file_prefix}.sorted.bam"; | |
105 #if(system($rm_cmd) != 0){ warn "Error removing final bam files. Remove it manually"; } | |
106 | |
107 return 1; | |
108 } | |
109 | |
110 | |
111 sub write_output { | |
112 my $self = shift @_; | |
113 | |
114 return 1; | |
115 | |
116 } | |
117 | |
118 #Private getter / setter to the sam header | |
119 sub _sam_header { | |
120 return $_[0]->_getter_setter('sam_header',$_[1]); | |
121 } | |
122 | |
123 #Private getter / setter to the replicate number | |
124 sub _replicate { | |
125 return $_[0]->_getter_setter('replicate',$_[1]); | |
126 } | |
127 | |
128 #Private getter / setter to the replicate number | |
129 sub _nbr_subfiles { | |
130 return $_[0]->_getter_setter('nbr_subfiles',$_[1]); | |
131 } | |
132 | |
133 1; |