annotate computeGCBias.xml @ 0:b2886915ce68 draft default tip

Uploaded
author bgruening
date Tue, 10 Feb 2015 03:21:34 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
1 <tool id="deeptools_computeGCBias" name="computeGCBias" version="@WRAPPER_VERSION@.0">
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
2 <description>to see whether your samples should be normalized for GC bias</description>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
3 <expand macro="requirements" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
4 <expand macro="stdio" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
5 <macros>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
6 <token name="@BINARY@">computeGCBias</token>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
7 <import>deepTools_macros.xml</import>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
8 </macros>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
9 <command>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
10 <![CDATA[
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
11 ln -s $bamInput local_bamInput.bam;
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
12 ln -s $bamInput.metadata.bam_index local_bamInput.bam.bai;
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
13
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
14 computeGCBias
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
15 @THREADS@
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
16
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
17 --bamfile 'local_bamInput.bam'
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
18 --GCbiasFrequenciesFile $outFileName
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
19 --fragmentLength $fragmentLength
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
20
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
21 @reference_genome_source@
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
22
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
23 #if $effectiveGenomeSize.effectiveGenomeSize_opt == "specific":
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
24 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
25 #else:
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
26 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize_opt
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
27 #end if
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
28
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
29 #if str($region).strip() != '':
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
30 --region '$region'
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
31 #end if
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
32
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
33 #if $advancedOpt.showAdvancedOpt == "yes":
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
34
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
35 --sampleSize '$advancedOpt.sampleSize'
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
36 --regionSize '$advancedOpt.regionSize'
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
37
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
38 #if $advancedOpt.filterOut:
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
39 --filterOut $advancedOpt.filterOut
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
40 #end if
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
41
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
42 #if $advancedOpt.extraSampling:
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
43 --extraSampling $advancedOpt.extraSampling
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
44 #end if
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
45 #end if
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
46
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
47 #if str($image_format) != 'none':
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
48 --biasPlot $outImageName
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
49 --plotFileFormat $image_format
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
50 #end if
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
51 ]]>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
52 </command>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
53 <inputs>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
54 <param name="bamInput" format="bam" type="data" label="BAM file"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
55 help="The BAM file must be sorted."/>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
56
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
57 <expand macro="reference_genome_source" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
58 <expand macro="effectiveGenomeSize" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
59 <expand macro="fragmentLength" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
60 <expand macro="region_limit_operation" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
61
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
62 <conditional name="advancedOpt">
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
63 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
64 <option value="no" selected="true">no</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
65 <option value="yes">yes</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
66 </param>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
67 <when value="no" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
68 <when value="yes">
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
69 <param name="sampleSize" type="integer" value="50000000" min="1"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
70 label="Number of sampling points to be considered" help="(--sampleSize)" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
71 <param name="regionSize" type="integer" value="300" min="1"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
72 label="Region size"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
73 help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read. (--regionSize)"/>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
74 <param name="filterOut" type="data" format="bed" optional="true"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
75 label="BED file containing genomic regions to be excluded from the estimation of the correction"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
76 help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks. (--filterOut)" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
77 <param name="extraSampling" type="data" format="bed" optional="true"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
78 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
79 help="(--extraSampling)" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
80 </when>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
81 </conditional>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
82 <param name="image_format" type="select"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
83 label="GC bias plot"
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
84 help="If given, a diagnostic image summarizing the GC bias found on the sample will be created. (--plotFileFormat)">
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
85 <option value="none">No image</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
86 <option value="png" selected="true">Image in png format</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
87 <option value="pdf">Image in pdf format</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
88 <option value="svg">Image in svg format</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
89 <option value="eps">Image in eps format</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
90 <option value="emf">Image in emf format</option>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
91 </param>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
92 </inputs>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
93 <outputs>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
94 <data name="outFileName" format="tabular" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
95 <data name="outImageName" format="png" label="${tool.name} GC-bias Plot">
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
96 <filter>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
97 ((
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
98 image_format != 'none'
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
99 ))
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
100 </filter>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
101 <change_format>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
102 <when input="image_format" value="pdf" format="pdf" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
103 <when input="image_format" value="svg" format="svg" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
104 <when input="image_format" value="eps" format="eps" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
105 <when input="image_format" value="emf" format="emf" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
106 </change_format>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
107 </data>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
108 </outputs>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
109 <tests>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
110 <test>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
111 <param name="bamInput" value="phiX.bam" ftype="bam" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
112 <param name="image_format" value="png" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
113 <param name="showAdvancedOpt" value="yes" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
114 <param name="regionSize" value="1" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
115 <param name="fragmentLength" value="100" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
116 <param name="ref_source" value="history" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
117 <param name="input1" value="phiX.2bit" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
118 <output name="outFileName" file="computeGCBias_result1.tabular" ftype="tabular" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
119 <output name="outImageName" file="computeGCBias_result1.png" ftype="png" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
120 </test>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
121 </tests>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
122 <help>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
123 <![CDATA[
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
124 **What it does**
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
125
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
126 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012) Nucleic Acids Res. (see below for more explanations)
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
127 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
128 There are two plots produced by the tool: a boxplot showing the absolute read numbers per genomic-GC bin and an x-y plot
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
129 depicting the ratio of observed/expected reads per genomic GC content bin.
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
130
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
131 -----
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
132
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
133 **Summary of the method used**
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
134
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
135 In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the specific
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
136 reference genome contains for each amount of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...).
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
137 We then sample a large number of equally sized genome bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
138 sequencing as it only depends on the respective reference genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents).
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
139 The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content.
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
140 In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
141 usually shows a significant bias towards reads with high GC content (>50%)
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
142
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
143 .. image:: $PATH_TO_IMAGES/QC_GCplots_input.png
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
144
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
145
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
146 You can find more details on the computeGCBias wiki page: computeGCBias wiki: https://github.com/fidelram/deepTools/wiki/QC#wiki-computeGCbias
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
147
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
148
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
149 **Output files**:
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
150
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
151 - Diagnostic plot
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
152
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
153 - box plot of absolute read numbers per genomic GC bin
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
154 - x-y plot of observed/expected read ratios per genomic GC content bin
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
155
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
156 - Data matrix
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
157
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
158 - to be used for GC correction with correctGCbias
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
159
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
160
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
161 -----
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
162
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
163 @REFERENCES@
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
164 ]]>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
165 </help>
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
166 <expand macro="citations" />
b2886915ce68 Uploaded
bgruening
parents:
diff changeset
167 </tool>