annotate computeGCBias.xml @ 0:03e5e7b2cedd draft default tip

Uploaded
author devteam
date Thu, 14 Nov 2013 15:58:04 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
1 <tool id="deeptools_computeGCBias" name="computeGCBias" version="1.0.1">
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
2 <description>to see whether your samples should be normalized for GC bias</description>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
3 <expand macro="requirements" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
4 <stdio>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
5 <exit_code range="0" level="warning" description="Warning" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
6 </stdio>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
7 <macros>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
8 <import>deepTools_macros.xml</import>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
9 </macros>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
10 <command>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
11 #import tempfile
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
12 #set $temp_dir = os.path.abspath(tempfile.mkdtemp())
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
13
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
14 #set $temp_bam_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
15 #set $temp_bam_path = $temp_bam_handle.name + '.bam'
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
16 #silent $temp_bam_handle.close()
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
17 #silent os.system("ln -s %s %s" % (str($bamInput), $temp_bam_path))
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
18 #silent os.system("ln -s %s %s.bai" % (str($bamInput.metadata.bam_index), $temp_bam_path))
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
19
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
20 computeGCBias
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
21
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
22 @THREADS@
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
23
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
24 --bamfile '$temp_bam_path'
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
25 --GCbiasFrequenciesFile $outFileName
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
26 --fragmentLength $fragmentLength
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
27
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
28 @reference_genome_source@
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
29
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
30
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
31 #if $effectiveGenomeSize.effectiveGenomeSize_opt == "specific":
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
32 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
33 #else:
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
34 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize_opt
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
35 #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
36
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
37
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
38 #if $advancedOpt.showAdvancedOpt == "yes":
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
39 #if str($advancedOpt.region.value) != '':
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
40 --region '$advancedOpt.region'
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
41 #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
42
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
43 --sampleSize '$advancedOpt.sampleSize'
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
44 --regionSize '$advancedOpt.regionSize'
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
45
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
46 #if $advancedOpt.filterOut:
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
47 --filterOut $advancedOpt.filterOut
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
48 #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
49
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
50 #if $advancedOpt.extraSampling:
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
51 --extraSampling $advancedOpt.extraSampling
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
52 #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
53
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
54 #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
55
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
56 #if $saveBiasPlot:
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
57 --biasPlot $biasPlot
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
58 #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
59
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
60 ## #if $output.showOutputSettings == "yes"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
61 ## #if $output.saveBiasPlot:
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
62 ## --biasPlot biasPlot.png ;
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
63 ## mv biasPlot.png $biasPlot
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
64 ## #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
65 ## #end if
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
66
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
67 ; rm $temp_dir -rf
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
68
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
69 </command>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
70 <inputs>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
71
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
72 <param name="bamInput" format="bam" type="data" label="Input BAM file"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
73 help="The BAM file must be sorted."/>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
74
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
75 <expand macro="reference_genome_source" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
76 <expand macro="effectiveGenomeSize" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
77
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
78 <param name="fragmentLength" type="integer" value="300" min="1"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
79 label="Fragment length used for the sequencing"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
80 help ="If paired-end reads are used, the fragment length is computed from the BAM file."/>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
81
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
82 <conditional name="advancedOpt">
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
83 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
84 <option value="no" selected="true">no</option>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
85 <option value="yes">yes</option>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
86 </param>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
87 <when value="no" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
88 <when value="yes">
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
89 <param name="region" type="text" value=""
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
90 label="Region of the genome to limit the operation to"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
91 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example &quot;chr10&quot; or &quot;chr10:456700:891000&quot;" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
92
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
93 <param name="sampleSize" type="integer" value="50000000" min="1"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
94 label="Number of sampling points to be considered" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
95
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
96 <param name="regionSize" type="integer" value="300" min="1"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
97 label="Region size"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
98 help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read."/>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
99
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
100 <param name="filterOut" type="data" format="bed" optional="true"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
101 label="BED file containing genomic regions to be excluded from the estimation of the correction"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
102 help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
103 <param name="extraSampling" type="data" format="bed" optional="true"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
104 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
105 help="" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
106 </when>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
107 </conditional>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
108
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
109 <param name="saveBiasPlot" type="boolean" truevalue="--biasPlot" falsevalue="" checked="True" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
110 <!--
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
111 <conditional name="output" >
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
112 <param name="showOutputSettings" type="select" label="Show additional output options" >
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
113 <option value="no" selected="true">no</option>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
114 <option value="yes">yes</option>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
115 </param>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
116 <when value="no" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
117 <when value="yes">
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
118 <param name="saveBiasPlot" type="boolean" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
119 </when>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
120 </conditional>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
121 -->
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
122 </inputs>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
123 <outputs>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
124 <data format="tabular" name="outFileName" />
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
125 <data format="png" name="biasPlot" label="${tool.name} on ${on_string}: bias plot">
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
126 <filter>saveBiasPlot is True</filter>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
127 <!--<filter>(output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)</filter>-->
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
128 </data>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
129 </outputs>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
130 <help>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
131
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
132 **What it does**
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
133
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
134 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. (see below for more explanations)
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
135 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
136 There are two plots produced by the tool: a boxplot showing the absolute read numbers per genomic-GC bin and an x-y plot
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
137 depicting the ratio of observed/expected reads per genomic GC content bin.
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
138
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
139 -----
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
140
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
141 **Summary of the method used**
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
142
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
143 In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the specific
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
144 reference genome contains for each amount of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...).
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
145 We then sample a large number of equally sized genome bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
146 sequencing as it only depends on the respective reference genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents).
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
147 The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content.
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
148 In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
149 usually shows a significant bias towards reads with high GC content (>50%)
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
150
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
151 .. image:: $PATH_TO_IMAGES/QC_GCplots_input.png
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
152
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
153
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
154 **Output files**:
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
155
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
156 - Diagnostic plot
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
157
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
158 - box plot of absolute read numbers per genomic GC bin
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
159 - x-y plot of observed/expected read ratios per genomic GC content bin
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
160
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
161 - Data matrix
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
162
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
163 - to be used for GC correction with correctGCbias
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
164
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
165
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
166 -----
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
167
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
168 .. class:: infomark
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
169
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
170 @REFERENCES@
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
171
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
172 </help>
03e5e7b2cedd Uploaded
devteam
parents:
diff changeset
173 </tool>