annotate computeGCBias.xml @ 6:c5847db0cb41 draft

Uploaded
author bgruening
date Wed, 14 Aug 2013 07:18:18 -0400
parents 1f312af2f8db
children 73761f33f198
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
1 <tool id="deeptools_computeGCBias" name="computeGCBias" version="1.0.1">
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
2 <description>to see whether your samples should be normalized for GC bias</description>
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
3
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
4 <requirements>
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
5 <requirement type="package" version="1.5.1_df852fa1ef13251a17274ee18fbf919fbc515079">deepTools</requirement>
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
6 <requirement type="package" >deepTools</requirement>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
7 </requirements>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
8 <stdio>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
9 <exit_code range="0" level="warning" description="Warning" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
10 </stdio>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
11 <command>
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
12 #import tempfile
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
13 #set $temp_dir = os.path.abspath(tempfile.mkdtemp())
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
14
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
15 #set $temp_bam_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
16 #set $temp_bam_path = $temp_bam_handle.name + '.bam'
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
17 #silent $temp_bam_handle.close()
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
18 #silent os.system("ln -s %s %s" % (str($bamInput), $temp_bam_path))
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
19 #silent os.system("ln -s %s %s.bai" % (str($bamInput.metadata.bam_index), $temp_bam_path))
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
20
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
21 computeGCBias
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
22
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
23 ##ToDo
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
24 --numberOfProcessors 4
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
25
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
26 --bamfile '$temp_bam_path'
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
27 --species '$species'
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
28 --GCbiasFrequenciesFile $outFileName
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
29 --fragmentLength $fragmentLength
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
30
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
31 #if $source.ref_source=="history":
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
32 --genome $source.input1
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
33 #else:
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
34 --genome "${source.input1_2bit.fields.path}"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
35 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
36
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
37 #if $advancedOpt.showAdvancedOpt == "yes":
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
38 #if str($advancedOpt.region.value) != '':
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
39 --region '$advancedOpt.region'
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
40 #end if
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
41
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
42 --binSize '$advancedOpt.binSize'
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
43 --sampleSize '$advancedOpt.sampleSize'
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
44 --regionSize '$advancedOpt.regionSize'
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
45
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
46 #if $advancedOpt.filterOut:
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
47 --filterOut $advancedOpt.filterOut
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
48 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
49
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
50 #if $advancedOpt.extraSampling:
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
51 --extraSampling $advancedOpt.extraSampling
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
52 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
53
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
54 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
55
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
56 #if $saveBiasPlot:
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
57 --biasPlot $biasPlot
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
58 #end if
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
59
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
60 ## #if $output.showOutputSettings == "yes"
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
61 ## #if $output.saveBiasPlot:
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
62 ## --biasPlot biasPlot.png ;
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
63 ## mv biasPlot.png $biasPlot
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
64 ## #end if
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
65 ## #end if
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
66
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
67 ; rm $temp_dir -rf
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
68
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
69 </command>
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
70 <inputs>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
71
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
72 <param name="bamInput" format="bam" type="data" label="Input BAM file"
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
73 help="The BAM file must be sorted."/>
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
74 <!--<param name="species" type="text" value="" label="Species name abbreviation" />-->
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
75
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
76 <param name="species" type="select" label="Species name abbreviation">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
77 <option value="hg19">hg19</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
78 <option value="ce10">ce10</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
79 <option value="dm3">dm3</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
80 <option value="mm9">mm9</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
81 </param>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
82
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
83 <conditional name="source">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
84 <param name="ref_source" type="select" label="Reference genome">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
85 <option value="cached">locally cached</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
86 <option value="history">in your history</option>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
87 </param>
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
88 <when value="cached">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
89 <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
90 <options from_data_table="deepTools_seqs" />
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
91 </param>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
92 </when>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
93 <when value="history">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
94 <param name="input1" type="data" format="twobit" label="Select a reference dataset in 2bit format" />
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
95 </when>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
96 </conditional>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
97 <param name="fragmentLength" type="integer" value="300" min="1"
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
98 label="Fragment length used for the sequencing"
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
99 help ="If paired-end reads are used, the fragment length is computed from the BAM file."/>
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
100
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
101 <conditional name="advancedOpt">
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
102 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
103 <option value="no" selected="true">no</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
104 <option value="yes">yes</option>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
105 </param>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
106 <when value="no" />
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
107 <when value="yes">
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
108 <param name="region" type="text" value=""
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
109 label="Region of the genome to limit the operation to"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
110 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example &quot;chr10&quot; or &quot;chr10:456700:891000&quot;" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
111
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
112 <param name="binSize" type="integer" value="50" min="1"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
113 label="Bin size in bp"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
114 help="Size of the bins in bp for the ouput of the bigwig/bedgraph file."/>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
115
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
116 <param name="sampleSize" type="integer" value="50000000" min="1"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
117 label="Number of sampling points to be considered" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
118
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
119 <param name="regionSize" type="integer" value="300" min="1"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
120 label="Region size"
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
121 help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read."/>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
122
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
123 <param name="filterOut" type="data" format="bed" optional="true"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
124 label="BED file containing genomic regions to be excluded from the estimation of the correction"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
125 help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
126 <param name="extraSampling" type="data" format="bed" optional="true"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
127 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
128 help="" />
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
129 </when>
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
130 </conditional>
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
131
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
132 <param name="saveBiasPlot" type="boolean" truevalue="--biasPlot" falsevalue="" checked="True" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
133 <!--
1
21d563d5f2b2 Uploaded
bgruening
parents: 0
diff changeset
134 <conditional name="output" >
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
135 <param name="showOutputSettings" type="select" label="Show additional output options" >
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
136 <option value="no" selected="true">no</option>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
137 <option value="yes">yes</option>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
138 </param>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
139 <when value="no" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
140 <when value="yes">
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
141 <param name="saveBiasPlot" type="boolean" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
142 </when>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
143 </conditional>
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
144 -->
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
145 </inputs>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
146 <outputs>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
147 <data format="tabular" name="outFileName" />
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
148 <data format="png" name="biasPlot" label="${tool.name} on ${on_string}: bias plot">
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
149 <filter>saveBiasPlot is True</filter>
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
150 <!--<filter>(output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)</filter>-->
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
151 </data>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
152 </outputs>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
153 <help>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
154
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
155 **What it does**
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
156
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
157 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. (see below for more explanations)
5
1f312af2f8db Uploaded
bgruening
parents: 1
diff changeset
158 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
159 There are two plots produced by the tool: a boxplot showing the absolute read numbers per genomic-GC bin and an x-y plot
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
160 depicting the ratio of observed/expected reads per genomic GC content bin.
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
161
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
162 -----
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
163
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
164 **Summary of the method used**
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
165
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
166 In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the specific
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
167 reference genome contains for each amount of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...).
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
168 We then sample a large number of equally sized genome bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
169 sequencing as it only depends on the respective reference genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents).
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
170 The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content.
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
171 In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
172 usually shows a significant bias towards reads with high GC content (>50%)
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
173
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
174 -----
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
175
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
176 .. class:: infomark
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
177
6
c5847db0cb41 Uploaded
bgruening
parents: 5
diff changeset
178 If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com
0
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
179
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
180 This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
181
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
182 .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
183 .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
184
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
185 </help>
09b65c12a75a Uploaded
bgruening
parents:
diff changeset
186 </tool>