annotate computeGCBias.xml @ 24:e43b4015b4cc draft

Uploaded
author bgruening
date Fri, 13 Dec 2013 17:38:30 -0500
parents 8710187d1eb5
children 1e5fcb1555de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
24
e43b4015b4cc Uploaded
bgruening
parents: 22
diff changeset
1 <tool id="deeptools_computeGCBias" name="computeGCBias" version="1.0.2">
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
2 <description>to see whether your samples should be normalized for GC bias</description>
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
3 <expand macro="requirements" />
22
8710187d1eb5 Uploaded
bgruening
parents: 21
diff changeset
4 <expand macro="stdio" />
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
5 <macros>
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
6 <import>deepTools_macros.xml</import>
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
7 </macros>
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
8 <command>
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
9 ln -s $bamInput local_bamInput.bam;
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
10 ln -s $bamInput.metadata.bam_index local_bamInput.bam.bai;
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
11
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
12 computeGCBias
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
13
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
14 @THREADS@
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
15
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
16 --bamfile 'local_bamInput.bam'
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
17 --GCbiasFrequenciesFile $outFileName
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
18 --fragmentLength $fragmentLength
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
19
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
20 @reference_genome_source@
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
21
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
22 #if $effectiveGenomeSize.effectiveGenomeSize_opt == "specific":
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
23 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
24 #else:
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
25 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize_opt
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
26 #end if
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
27
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
28 #if $advancedOpt.showAdvancedOpt == "yes":
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
29 #if str($advancedOpt.region.value) != '':
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
30 --region '$advancedOpt.region'
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
31 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
32
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
33 --sampleSize '$advancedOpt.sampleSize'
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
34 --regionSize '$advancedOpt.regionSize'
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
35
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
36 #if $advancedOpt.filterOut:
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
37 --filterOut $advancedOpt.filterOut
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
38 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
39
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
40 #if $advancedOpt.extraSampling:
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
41 --extraSampling $advancedOpt.extraSampling
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
42 #end if
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
43 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
44
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
45 #if $saveBiasPlot:
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
46 --biasPlot $biasPlot
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
47 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
48
15
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
49 ## #if $output.showOutputSettings == "yes"
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
50 ## #if $output.saveBiasPlot:
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
51 ## --biasPlot biasPlot.png ;
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
52 ## mv biasPlot.png $biasPlot
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
53 ## #end if
351cd1f8791b Uploaded
bgruening
parents: 10
diff changeset
54 ## #end if
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
55
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
56 </command>
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
57 <inputs>
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
58
21
8436c195dd6c Uploaded
bgruening
parents: 15
diff changeset
59 <param name="bamInput" format="bam" type="data" label="BAM file"
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
60 help="The BAM file must be sorted."/>
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
61
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
62 <expand macro="reference_genome_source" />
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
63 <expand macro="effectiveGenomeSize" />
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
64
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
65 <param name="fragmentLength" type="integer" value="300" min="1"
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
66 label="Fragment length used for the sequencing"
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
67 help ="If paired-end reads are used, the fragment length is computed from the BAM file."/>
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
68
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
69 <conditional name="advancedOpt">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
70 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
71 <option value="no" selected="true">no</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
72 <option value="yes">yes</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
73 </param>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
74 <when value="no" />
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
75 <when value="yes">
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
76 <param name="region" type="text" value=""
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
77 label="Region of the genome to limit the operation to"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
78 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example &quot;chr10&quot; or &quot;chr10:456700:891000&quot;" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
79
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
80 <param name="sampleSize" type="integer" value="50000000" min="1"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
81 label="Number of sampling points to be considered" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
82
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
83 <param name="regionSize" type="integer" value="300" min="1"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
84 label="Region size"
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
85 help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read."/>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
86
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
87 <param name="filterOut" type="data" format="bed" optional="true"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
88 label="BED file containing genomic regions to be excluded from the estimation of the correction"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
89 help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
90 <param name="extraSampling" type="data" format="bed" optional="true"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
91 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
92 help="" />
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
93 </when>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
94 </conditional>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
95
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
96 <param name="saveBiasPlot" type="boolean" truevalue="--biasPlot" falsevalue="" checked="True" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
97 <!--
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
98 <conditional name="output" >
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
99 <param name="showOutputSettings" type="select" label="Show additional output options" >
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
100 <option value="no" selected="true">no</option>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
101 <option value="yes">yes</option>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
102 </param>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
103 <when value="no" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
104 <when value="yes">
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
105 <param name="saveBiasPlot" type="boolean" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
106 </when>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
107 </conditional>
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
108 -->
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
109 </inputs>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
110 <outputs>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
111 <data format="tabular" name="outFileName" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
112 <data format="png" name="biasPlot" label="${tool.name} on ${on_string}: bias plot">
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
113 <filter>saveBiasPlot is True</filter>
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
114 <!--<filter>(output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)</filter>-->
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
115 </data>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
116 </outputs>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
117 <help>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
118
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
119 **What it does**
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
120
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
121 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. (see below for more explanations)
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
122 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
123 There are two plots produced by the tool: a boxplot showing the absolute read numbers per genomic-GC bin and an x-y plot
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
124 depicting the ratio of observed/expected reads per genomic GC content bin.
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
125
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
126 -----
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
127
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
128 **Summary of the method used**
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
129
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
130 In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the specific
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
131 reference genome contains for each amount of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...).
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
132 We then sample a large number of equally sized genome bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
133 sequencing as it only depends on the respective reference genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents).
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
134 The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content.
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
135 In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
136 usually shows a significant bias towards reads with high GC content (>50%)
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
137
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
138 .. image:: $PATH_TO_IMAGES/QC_GCplots_input.png
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
139
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
140
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
141 **Output files**:
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
142
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
143 - Diagnostic plot
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
144
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
145 - box plot of absolute read numbers per genomic GC bin
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
146 - x-y plot of observed/expected read ratios per genomic GC content bin
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
147
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
148 - Data matrix
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
149
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
150 - to be used for GC correction with correctGCbias
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
151
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
152
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
153 -----
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
154
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
155 .. class:: infomark
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
156
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
157 @REFERENCES@
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
158
10
a68a771625d2 Uploaded
bgruening
parents: 9
diff changeset
159 </help>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
160 </tool>