annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/ConvergeReplicates.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Bio::EnsEMBL::Hive::RunnableDB::Funcgen::ConvergeReplicates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 'ConvergeReplicates' Just merges the replicates BAM and convert it to a SAM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 into the alignments folder for the peaks pipeline...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 package Bio::EnsEMBL::Funcgen::RunnableDB::ConvergeReplicates;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 use Bio::EnsEMBL::DBSQL::DBAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 #use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Alignment');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 #TODO... Maybe use and update the tracking database...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 sub fetch_input { # fetch parameters...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 $self->SUPER::fetch_input();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 my $species = $self->_species();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 my $gender = $self->_cell_type()->gender;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $gender = $gender ? $gender : "male";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 my $assembly = $self->_assembly();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_".$gender."_".$assembly."_unmasked.header.sam";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 $self->_sam_header($sam_header);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 my $repository = $self->_repository();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 if(! -d $repository){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 system("mkdir -p $repository") && throw("Couldn't create directory $repository");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 #my $nbr_replicates = $self->param('nbr_replicates') || throw "Number of replicates not given";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 my $nbr_replicates = $self->param('nbr_replicates') || 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 $self->_nbr_replicates($nbr_replicates);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 my $sam_header = $self->_sam_header();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 my $output_file_prefix = $self->_repository()."/".$self->_set_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 my $file_prefix = $self->_output_dir()."/".$self->_set_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 my $merge_cmd;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 if($self->_nbr_replicates()>1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $merge_cmd="samtools merge -h $sam_header ${file_prefix}.bam ${file_prefix}.[0-9]*.sorted.bam ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 $merge_cmd = "cp ${file_prefix}.[0-9]*.sorted.bam ${file_prefix}.bam";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 if(system($merge_cmd) != 0){ throw "Error merging replicate bam files: $merge_cmd"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 my $convert_cmd = "samtools view -h ${file_prefix}.bam > ${output_file_prefix}.samse.sam";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 if(system($convert_cmd) != 0){ throw "Error converting merged bam to sam: $convert_cmd"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 my $zip_cmd = "gzip ${output_file_prefix}.samse.sam";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 if(system($zip_cmd) != 0){ warn "Error zipping sam"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 #Merge the alignment logs too...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my $alignment_log = $output_file_prefix.".alignment.log";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 my $log_cmd="echo \"Alignment QC - total reads as input: \" >> ${alignment_log}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 $log_cmd="${log_cmd};samtools flagstat ${file_prefix}.bam | head -n 1 >> ${alignment_log}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 $log_cmd="${log_cmd}; echo \"Alignment QC - mapped reads: \" >> ${alignment_log} ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 $log_cmd="${log_cmd};samtools view -u -F 4 ${file_prefix}.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 $log_cmd="${log_cmd}; echo \"Alignment QC - reliably aligned reads (mapping quality >= 1): \" >> ${alignment_log}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 $log_cmd="${log_cmd};samtools view -u -F 4 -q 1 ${file_prefix}.bam | samtools flagstat - | head -n 1 >> ${alignment_log}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 if(system($log_cmd) != 0){ warn "Error making the alignment statistics"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 my $rm_cmd="rm -f ${file_prefix}.bam";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 if(system($rm_cmd) != 0){ warn "Error removing temp files. Remove them manually: $rm_cmd"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 sub write_output { # Nothing to do here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 #Private getter / setter to the sam header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 sub _sam_header {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 return $_[0]->_getter_setter('sam_header',$_[1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 #Private getter / setter to the sam header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 sub _nbr_replicates {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 return $_[0]->_getter_setter('nbr_replicates',$_[1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 1;