annotate computeGCBias.xml @ 80:be73bb8a1303 draft default tip

planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit 3bc1d1c6f4e28ac7ff8df79fe4e3f00a195938e6-dirty
author bgruening
date Wed, 21 Oct 2015 02:51:49 -0400
parents 1dbd76a58d8b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
61
f3140d17939e Uploaded
bgruening
parents:
diff changeset
1 <tool id="deeptools_computeGCBias" name="computeGCBias" version="@WRAPPER_VERSION@.0">
f3140d17939e Uploaded
bgruening
parents:
diff changeset
2 <description>to see whether your samples should be normalized for GC bias</description>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
3 <macros>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
4 <token name="@BINARY@">computeGCBias</token>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
5 <import>deepTools_macros.xml</import>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
6 </macros>
65
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
7 <expand macro="requirements" />
61
f3140d17939e Uploaded
bgruening
parents:
diff changeset
8 <command>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
9 <![CDATA[
f3140d17939e Uploaded
bgruening
parents:
diff changeset
10 ln -s $bamInput local_bamInput.bam;
f3140d17939e Uploaded
bgruening
parents:
diff changeset
11 ln -s $bamInput.metadata.bam_index local_bamInput.bam.bai;
f3140d17939e Uploaded
bgruening
parents:
diff changeset
12
f3140d17939e Uploaded
bgruening
parents:
diff changeset
13 computeGCBias
f3140d17939e Uploaded
bgruening
parents:
diff changeset
14 @THREADS@
f3140d17939e Uploaded
bgruening
parents:
diff changeset
15
f3140d17939e Uploaded
bgruening
parents:
diff changeset
16 --bamfile 'local_bamInput.bam'
f3140d17939e Uploaded
bgruening
parents:
diff changeset
17 --GCbiasFrequenciesFile $outFileName
f3140d17939e Uploaded
bgruening
parents:
diff changeset
18 --fragmentLength $fragmentLength
f3140d17939e Uploaded
bgruening
parents:
diff changeset
19
f3140d17939e Uploaded
bgruening
parents:
diff changeset
20 @reference_genome_source@
f3140d17939e Uploaded
bgruening
parents:
diff changeset
21
f3140d17939e Uploaded
bgruening
parents:
diff changeset
22 #if $effectiveGenomeSize.effectiveGenomeSize_opt == "specific":
f3140d17939e Uploaded
bgruening
parents:
diff changeset
23 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize
f3140d17939e Uploaded
bgruening
parents:
diff changeset
24 #else:
f3140d17939e Uploaded
bgruening
parents:
diff changeset
25 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize_opt
f3140d17939e Uploaded
bgruening
parents:
diff changeset
26 #end if
f3140d17939e Uploaded
bgruening
parents:
diff changeset
27
f3140d17939e Uploaded
bgruening
parents:
diff changeset
28 #if str($region).strip() != '':
f3140d17939e Uploaded
bgruening
parents:
diff changeset
29 --region '$region'
f3140d17939e Uploaded
bgruening
parents:
diff changeset
30 #end if
f3140d17939e Uploaded
bgruening
parents:
diff changeset
31
f3140d17939e Uploaded
bgruening
parents:
diff changeset
32 #if $advancedOpt.showAdvancedOpt == "yes":
f3140d17939e Uploaded
bgruening
parents:
diff changeset
33 --sampleSize '$advancedOpt.sampleSize'
f3140d17939e Uploaded
bgruening
parents:
diff changeset
34 --regionSize '$advancedOpt.regionSize'
f3140d17939e Uploaded
bgruening
parents:
diff changeset
35
f3140d17939e Uploaded
bgruening
parents:
diff changeset
36 #if $advancedOpt.filterOut:
f3140d17939e Uploaded
bgruening
parents:
diff changeset
37 --filterOut $advancedOpt.filterOut
f3140d17939e Uploaded
bgruening
parents:
diff changeset
38 #end if
f3140d17939e Uploaded
bgruening
parents:
diff changeset
39
f3140d17939e Uploaded
bgruening
parents:
diff changeset
40 #if $advancedOpt.extraSampling:
f3140d17939e Uploaded
bgruening
parents:
diff changeset
41 --extraSampling $advancedOpt.extraSampling
f3140d17939e Uploaded
bgruening
parents:
diff changeset
42 #end if
f3140d17939e Uploaded
bgruening
parents:
diff changeset
43 #end if
f3140d17939e Uploaded
bgruening
parents:
diff changeset
44
f3140d17939e Uploaded
bgruening
parents:
diff changeset
45 #if str($image_format) != 'none':
f3140d17939e Uploaded
bgruening
parents:
diff changeset
46 --biasPlot $outImageName
f3140d17939e Uploaded
bgruening
parents:
diff changeset
47 --plotFileFormat $image_format
f3140d17939e Uploaded
bgruening
parents:
diff changeset
48 #end if
f3140d17939e Uploaded
bgruening
parents:
diff changeset
49 ]]>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
50 </command>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
51 <inputs>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
52 <param name="bamInput" format="bam" type="data" label="BAM file"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
53 help="The BAM file must be sorted."/>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
54
f3140d17939e Uploaded
bgruening
parents:
diff changeset
55 <expand macro="reference_genome_source" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
56 <expand macro="effectiveGenomeSize" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
57 <expand macro="fragmentLength" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
58 <expand macro="region_limit_operation" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
59
f3140d17939e Uploaded
bgruening
parents:
diff changeset
60 <conditional name="advancedOpt">
f3140d17939e Uploaded
bgruening
parents:
diff changeset
61 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
f3140d17939e Uploaded
bgruening
parents:
diff changeset
62 <option value="no" selected="true">no</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
63 <option value="yes">yes</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
64 </param>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
65 <when value="no" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
66 <when value="yes">
f3140d17939e Uploaded
bgruening
parents:
diff changeset
67 <param name="sampleSize" type="integer" value="50000000" min="1"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
68 label="Number of sampling points to be considered" help="(--sampleSize)" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
69 <param name="regionSize" type="integer" value="300" min="1"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
70 label="Region size"
65
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
71 help ="To plot the reads per GC over a region, the size of the region is
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
72 required (see below for more details of the mthod). By default, the bin size
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
73 is set to 300 bp, which is close to the standard fragment size many sequencing
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
74 applications. However, if the depth of sequencing is low, a larger bin size will
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
75 be required, otherwise many bins will not overlap with any read. (--regionSize)"/>
61
f3140d17939e Uploaded
bgruening
parents:
diff changeset
76 <param name="filterOut" type="data" format="bed" optional="true"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
77 label="BED file containing genomic regions to be excluded from the estimation of the correction"
65
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
78 help="Such regions usually contain repetitive regions and peaks that if included will
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
79 bias the correction. It is recommended to filter out known repetitive regions if multi-reads
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
80 (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data,
9bee2c86eeb1 planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit ab1ab06323702186cf0c883d5774720cbb822cb5-dirty
iuc
parents: 61
diff changeset
81 it is recommended to first use a peak caller to identify and filter out the identified peaks. (--filterOut)" />
61
f3140d17939e Uploaded
bgruening
parents:
diff changeset
82 <param name="extraSampling" type="data" format="bed" optional="true"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
83 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
84 help="(--extraSampling)" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
85 </when>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
86 </conditional>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
87 <param name="image_format" type="select"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
88 label="GC bias plot"
f3140d17939e Uploaded
bgruening
parents:
diff changeset
89 help="If given, a diagnostic image summarizing the GC bias found on the sample will be created. (--plotFileFormat)">
f3140d17939e Uploaded
bgruening
parents:
diff changeset
90 <option value="none">No image</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
91 <option value="png" selected="true">Image in png format</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
92 <option value="pdf">Image in pdf format</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
93 <option value="svg">Image in svg format</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
94 <option value="eps">Image in eps format</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
95 <option value="emf">Image in emf format</option>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
96 </param>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
97 </inputs>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
98 <outputs>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
99 <data name="outFileName" format="tabular" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
100 <data name="outImageName" format="png" label="${tool.name} GC-bias Plot">
f3140d17939e Uploaded
bgruening
parents:
diff changeset
101 <filter>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
102 ((
f3140d17939e Uploaded
bgruening
parents:
diff changeset
103 image_format != 'none'
f3140d17939e Uploaded
bgruening
parents:
diff changeset
104 ))
f3140d17939e Uploaded
bgruening
parents:
diff changeset
105 </filter>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
106 <change_format>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
107 <when input="image_format" value="pdf" format="pdf" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
108 <when input="image_format" value="svg" format="svg" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
109 <when input="image_format" value="eps" format="eps" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
110 <when input="image_format" value="emf" format="emf" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
111 </change_format>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
112 </data>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
113 </outputs>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
114 <tests>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
115 <test>
66
1dbd76a58d8b planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit f720d773e34ad90d14d7cae77c1a0bb5d126102c-dirty
galaxyp
parents: 65
diff changeset
116 <param name="bamInput" value="paired_chr2L.bam" ftype="bam" />
61
f3140d17939e Uploaded
bgruening
parents:
diff changeset
117 <param name="image_format" value="png" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
118 <param name="showAdvancedOpt" value="yes" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
119 <param name="regionSize" value="1" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
120 <param name="fragmentLength" value="100" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
121 <param name="ref_source" value="history" />
66
1dbd76a58d8b planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit f720d773e34ad90d14d7cae77c1a0bb5d126102c-dirty
galaxyp
parents: 65
diff changeset
122 <param name="input1" value="sequence.2bit" />
1dbd76a58d8b planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit f720d773e34ad90d14d7cae77c1a0bb5d126102c-dirty
galaxyp
parents: 65
diff changeset
123 <param name="sampleSize" value="10" />
1dbd76a58d8b planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit f720d773e34ad90d14d7cae77c1a0bb5d126102c-dirty
galaxyp
parents: 65
diff changeset
124 <param name="effectiveGenomeSize_opt" value="specific" />
1dbd76a58d8b planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit f720d773e34ad90d14d7cae77c1a0bb5d126102c-dirty
galaxyp
parents: 65
diff changeset
125 <param name="effectiveGenomeSize" value="23011544" />
1dbd76a58d8b planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit f720d773e34ad90d14d7cae77c1a0bb5d126102c-dirty
galaxyp
parents: 65
diff changeset
126 <param name="region" value="chr2L" />
1dbd76a58d8b planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit f720d773e34ad90d14d7cae77c1a0bb5d126102c-dirty
galaxyp
parents: 65
diff changeset
127 <param name="image_format" value="none" />
61
f3140d17939e Uploaded
bgruening
parents:
diff changeset
128 <output name="outFileName" file="computeGCBias_result1.tabular" ftype="tabular" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
129 </test>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
130 </tests>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
131 <help>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
132 <![CDATA[
f3140d17939e Uploaded
bgruening
parents:
diff changeset
133 **What it does**
f3140d17939e Uploaded
bgruening
parents:
diff changeset
134
f3140d17939e Uploaded
bgruening
parents:
diff changeset
135 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012) Nucleic Acids Res. (see below for more explanations)
f3140d17939e Uploaded
bgruening
parents:
diff changeset
136 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.
f3140d17939e Uploaded
bgruening
parents:
diff changeset
137 There are two plots produced by the tool: a boxplot showing the absolute read numbers per genomic-GC bin and an x-y plot
f3140d17939e Uploaded
bgruening
parents:
diff changeset
138 depicting the ratio of observed/expected reads per genomic GC content bin.
f3140d17939e Uploaded
bgruening
parents:
diff changeset
139
f3140d17939e Uploaded
bgruening
parents:
diff changeset
140 -----
f3140d17939e Uploaded
bgruening
parents:
diff changeset
141
f3140d17939e Uploaded
bgruening
parents:
diff changeset
142 **Summary of the method used**
f3140d17939e Uploaded
bgruening
parents:
diff changeset
143
f3140d17939e Uploaded
bgruening
parents:
diff changeset
144 In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the specific
f3140d17939e Uploaded
bgruening
parents:
diff changeset
145 reference genome contains for each amount of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...).
f3140d17939e Uploaded
bgruening
parents:
diff changeset
146 We then sample a large number of equally sized genome bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any
f3140d17939e Uploaded
bgruening
parents:
diff changeset
147 sequencing as it only depends on the respective reference genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents).
f3140d17939e Uploaded
bgruening
parents:
diff changeset
148 The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content.
f3140d17939e Uploaded
bgruening
parents:
diff changeset
149 In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples
f3140d17939e Uploaded
bgruening
parents:
diff changeset
150 usually shows a significant bias towards reads with high GC content (>50%)
f3140d17939e Uploaded
bgruening
parents:
diff changeset
151
f3140d17939e Uploaded
bgruening
parents:
diff changeset
152 .. image:: $PATH_TO_IMAGES/QC_GCplots_input.png
f3140d17939e Uploaded
bgruening
parents:
diff changeset
153
f3140d17939e Uploaded
bgruening
parents:
diff changeset
154
f3140d17939e Uploaded
bgruening
parents:
diff changeset
155 You can find more details on the computeGCBias wiki page: computeGCBias wiki: https://github.com/fidelram/deepTools/wiki/QC#wiki-computeGCbias
f3140d17939e Uploaded
bgruening
parents:
diff changeset
156
f3140d17939e Uploaded
bgruening
parents:
diff changeset
157
f3140d17939e Uploaded
bgruening
parents:
diff changeset
158 **Output files**:
f3140d17939e Uploaded
bgruening
parents:
diff changeset
159
f3140d17939e Uploaded
bgruening
parents:
diff changeset
160 - Diagnostic plot
f3140d17939e Uploaded
bgruening
parents:
diff changeset
161
f3140d17939e Uploaded
bgruening
parents:
diff changeset
162 - box plot of absolute read numbers per genomic GC bin
f3140d17939e Uploaded
bgruening
parents:
diff changeset
163 - x-y plot of observed/expected read ratios per genomic GC content bin
f3140d17939e Uploaded
bgruening
parents:
diff changeset
164
f3140d17939e Uploaded
bgruening
parents:
diff changeset
165 - Data matrix
f3140d17939e Uploaded
bgruening
parents:
diff changeset
166
f3140d17939e Uploaded
bgruening
parents:
diff changeset
167 - to be used for GC correction with correctGCbias
f3140d17939e Uploaded
bgruening
parents:
diff changeset
168
f3140d17939e Uploaded
bgruening
parents:
diff changeset
169
f3140d17939e Uploaded
bgruening
parents:
diff changeset
170 -----
f3140d17939e Uploaded
bgruening
parents:
diff changeset
171
f3140d17939e Uploaded
bgruening
parents:
diff changeset
172 @REFERENCES@
f3140d17939e Uploaded
bgruening
parents:
diff changeset
173 ]]>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
174 </help>
f3140d17939e Uploaded
bgruening
parents:
diff changeset
175 <expand macro="citations" />
f3140d17939e Uploaded
bgruening
parents:
diff changeset
176 </tool>