comparison computeMatrix.xml @ 61:f3140d17939e draft

Uploaded
author bgruening
date Fri, 23 Jan 2015 16:53:31 -0500
parents
children 9bee2c86eeb1
comparison
equal deleted inserted replaced
60:c699d2b577f8 61:f3140d17939e
1 <tool id="deeptools_computeMatrix" name="computeMatrix" version="@WRAPPER_VERSION@.0">
2 <description>summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile</description>
3 <expand macro="requirements" />
4 <expand macro="stdio" />
5 <macros>
6 <token name="@BINARY@">computeMatrix</token>
7 <import>deepTools_macros.xml</import>
8 </macros>
9 <command>
10 <![CDATA[
11 #import tempfile
12
13 #set $temp_input_handle = tempfile.NamedTemporaryFile()
14 #set $temp_input_path = $temp_input_handle.name
15 #silent $temp_input_handle.close()
16
17 #for $rf in $regionsFiles:
18 cat "$rf.regionsFile" >> $temp_input_path;
19 #if str($rf.label.value).strip():
20 echo "\#$rf.label.value" >> $temp_input_path;
21 #else:
22 echo "\#$rf.regionsFile.name" >> $temp_input_path;
23 #end if
24 #end for
25
26 computeMatrix
27
28 $mode.mode_select
29 --regionsFileName '$temp_input_path'
30 --scoreFileName '$scoreFile'
31 --outFileName '$outFileName'
32
33 @THREADS@
34
35 #if $output.showOutputSettings == "yes"
36 #if $output.saveData:
37 --outFileNameData '$outFileNameData'
38 #end if
39 #if $output.saveMatrix:
40 --outFileNameMatrix '$outFileNameMatrix'
41 #end if
42
43 #if $output.saveSortedRegions:
44 --outFileSortedRegions '$outFileSortedRegions'
45 #end if
46 #end if
47
48 #if $mode.mode_select == "reference-point":
49 --referencePoint $mode.referencePoint
50 $mode.nanAfterEnd
51 --beforeRegionStartLength $mode.beforeRegionStartLength
52 --afterRegionStartLength $mode.afterRegionStartLength
53 #else
54 --regionBodyLength $mode.regionBodyLength
55 --startLabel "$mode.startLabel"
56 --endLabel "$mode.endLabel"
57 #if $mode.regionStartLength.regionStartLength_select == "yes":
58 --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength
59 --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength
60 #end if
61 #end if
62
63 #if $advancedOpt.showAdvancedOpt == "yes":
64 --sortRegions '$advancedOpt.sortRegions'
65 --sortUsing '$advancedOpt.sortUsing'
66 --averageTypeBins '$advancedOpt.averageTypeBins'
67 $advancedOpt.missingDataAsZero
68 $advancedOpt.skipZeros
69 --binSize $advancedOpt.binSize
70
71 #if $advancedOpt.minThreshold:
72 --minThreshold $advancedOpt.minThreshold
73 #end if
74 #if $advancedOpt.maxThreshold:
75 --maxThreshold $advancedOpt.maxThreshold
76 #end if
77 #if $advancedOpt.scale:
78 --scale $advancedOpt.scale
79 #end if
80
81 #end if
82 ; rm $temp_input_path
83 ]]>
84 </command>
85 <inputs>
86
87 <repeat name="regionsFiles" title="regions to plot" min="1">
88 <param name="regionsFile" format="bed" type="data" label="Regions to plot" help="File, in BED format, containing the regions to plot."/>
89 <param name="label" type="text" size="30" optional="true" value="" label="Label" help="Label to use in the output."/>
90 </repeat>
91
92 <param name="scoreFile" format="bigwig" type="data"
93 label="Score file"
94 help="Should be a bigWig file (containing a score, usually covering the whole genome). You can generate a bigWig file either from a bedGraph or WIG file using UCSC tools or from a BAM file using the deepTool bamCoverage. (-scoreFile)"/>
95
96 <conditional name="mode" >
97 <param name="mode_select" type="select"
98 label="computeMatrix has two main output options"
99 help="In the scale-regions mode, all regions in the BED file are stretched or shrunk to the same length (bp) that is indicated by the user. Reference-point refers to a position within the BED regions (e.g start of region). In the reference-point mode only those genomic positions before (downstream) and/or after (upstream) the reference point will be plotted.">
100 <option value="scale-regions" selected="true">scale-regions</option>
101 <option value="reference-point">reference-point</option>
102 </param>
103
104 <when value="scale-regions" >
105 <param name="regionBodyLength" type="integer" value="500"
106 label="Distance in bp to which all regions are going to be fitted" help="(--regionBodyLength)"/>
107 <param name="startLabel" type="text" value="TSS" size="10"
108 label="Label for the region start"
109 help ="Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. &quot;peak start&quot;. (--startLabel)" />
110 <param name="endLabel" type="text" value="TES" size="10"
111 label="Label for the region end"
112 help="Label shown in the plot for the region end. Default is TES (transcription end site). (--endLabel)"/>
113 <conditional name="regionStartLength">
114 <param name="regionStartLength_select" type="select" label="Set distance up- and downstream of the given regions">
115 <option value="no" selected="true">no</option>
116 <option value="yes">yes</option>
117 </param>
118 <when value="no" />
119 <when value="yes">
120 <param name="beforeRegionStartLength" type="integer" value="1000" min="1"
121 label="Distance upstream of the start site of the regions defined in the region file"
122 help="If the regions are genes, this would be the distance upstream of the transcription start site. (--beforeRegionStartLength)"/>
123 <param name="afterRegionStartLength" type="integer" value="1000" min="1"
124 label="Distance downstream of the end site of the given regions"
125 help="If the regions are genes, this would be the distance downstream of the transcription end site. (--afterRegionStartLength)"/>
126 </when>
127 </conditional>
128 </when>
129 <when value="reference-point">
130 <param name="referencePoint" type="select" label="The reference point for the plotting">
131 <option value="TSS" selected="true">beginning of region (e.g. TSS)</option>
132 <option value="TES">end of region (e.g. TES)</option>
133 <option value="center">center of region</option>
134 </param>
135 <param name="nanAfterEnd" type="boolean" truevalue="--nanAfterEnd" falsevalue=""
136 label="Discard any values after the region end"
137 help="This is useful to visualize the region end when not using the scale-regions mode and when the reference-point is set to the TSS. (--nanAfterEnd)"/>
138 <param name="beforeRegionStartLength" type="integer" value="1000" min="1"
139 label="Distance upstream of the start site of the regions defined in the region file"
140 help="If the regions are genes, this would be the distance upstream of the transcription start site. (--beforeRegionStartLength)"/>
141 <param name="afterRegionStartLength" type="integer" value="1000" min="1"
142 label="Distance downstream of the end site of the given regions"
143 help="If the regions are genes, this would be the distance downstream of the transcription end site. (--afterRegionStartLength)"/>
144 </when>
145 </conditional>
146
147 <expand macro="input_graphic_output_settings">
148 <expand macro="input_save_matrix_values" />
149 </expand>
150
151 <conditional name="advancedOpt" >
152 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
153 <option value="no" selected="true">no</option>
154 <option value="yes">yes</option>
155 </param>
156 <when value="no" />
157 <when value="yes">
158 <param name="binSize" type="integer" value="10" min="1"
159 label="Length, in base pairs, of the non-overlapping bin for averaging the score over the regions length"
160 help="(--binSize)"/>
161 <param name="sortRegions" type="select" label="Sort regions"
162 help="Whether the output file should present the regions sorted.">
163 <option value="no" selected="true">no ordering</option>
164 <option value="descend">descending order</option>
165 <option value="ascend">ascending order</option>
166 </param>
167
168 <param name="sortUsing" type="select" label="Method used for sorting"
169 help="The value is computed for each row. (--sortUsing)" >
170 <option value="mean" selected="true">mean</option>
171 <option value="median">median</option>
172 <option value="min">min</option>
173 <option value="max">max</option>
174 <option value="sum">sum</option>
175 <option value="region_length">region length</option>
176 </param>
177
178 <param name="averageTypeBins" type="select"
179 label="Define the type of statistic that should be displayed."
180 help="The value is computed for each bin. (--averageTypeBins)">
181 <option value="mean" selected="true">mean</option>
182 <option value="median">median</option>
183 <option value="min">min</option>
184 <option value="max">max</option>
185 <option value="sum">sum</option>
186 <option value="std">std</option>
187 </param>
188
189 <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue=""
190 label="Indicate missing data as zero"
191 help="Set to &quot;yes&quot;, if missing data should be indicated as zeros. Default is to ignore such cases which will be depicted as black areas in the heatmap. (see &quot;Missing data color&quot; options of the heatmapper for additional options). (--missingDataAsZero)"/>
192 <param name="skipZeros" type="boolean" truevalue="--skipZeros" falsevalue=""
193 label="Skip zeros"
194 help="Whether regions with only scores of zero should be included or not. Default is to include them. (--skipZeros)"/>
195 <param name="minThreshold" type="float" optional="True"
196 label="Minimum threshold"
197 help="Any region containing a value that is equal or less than this numeric value will be skipped. This is useful to skip, for example, genes where the read count is zero for any of the bins. This could be the result of unmappable areas and can bias the overall results. (--minThreshold)"/>
198 <param name="maxThreshold" type="float" optional="True"
199 label="Maximum threshold"
200 help="Any region containing a value that is equal or higher that this numeric value will be skipped. The max threshold is useful to skip those few regions with very high read counts (e.g. major satellites) that may bias the average values. (--maxThreshold)"/>
201 <param name="scale" type="float" optional="True" label="Scaling factor"
202 help="If set, all values are multiplied by this number. (--scale)"/>
203 </when>
204 </conditional>
205 </inputs>
206 <outputs>
207 <data format="bgzip" name="outFileName" label="${tool.name} on ${on_string}: Matrix" />
208 <expand macro="output_graphic_outputs" />
209 <expand macro="output_save_matrix_values" />
210 </outputs>
211 <!--
212 computeMatrix -S test.bw -R test2.bed -a 100 -b 100 -bs 1
213 -->
214 <tests>
215 <test>
216 <param name="regionsFile" value="computeMatrix1.bed" ftype="bed" />
217 <param name="scoreFile" value="bamCoverage_result4.bw" ftype="bigwig" />
218 <param name="showAdvancedOpt" value="yes" />
219 <param name="mode_select" value="reference-point" />
220 <param name="binSize" value="10" />
221 <param name="sortUsing" value="sum" />
222 <param name="averageTypeBins" value="sum" />
223 <param name="missingDataAsZero" value="True" />
224 <param name="beforeRegionStartLength" value="10" />
225 <param name="afterRegionStartLength" value="10" />
226 <output name="outFileName" file="computeMatrix_result1.gz" ftype="bgzip" compare="sim_size" />
227 </test>
228 <test>
229 <param name="regionsFile" value="computeMatrix2.bed" ftype="bed" />
230 <param name="scoreFile" value="computeMatrix2.bw" ftype="bigwig" />
231 <param name="showAdvancedOpt" value="yes" />
232 <param name="mode_select" value="reference-point" />
233 <param name="binSize" value="10" />
234 <param name="beforeRegionStartLength" value="10" />
235 <param name="afterRegionStartLength" value="10" />
236 <output name="outFileName" file="computeMatrix_result2.gz" ftype="bgzip" compare="sim_size" />
237 </test>
238 </tests>
239 <help>
240 <![CDATA[
241 **What it does**
242
243 This tool prepares an intermediary file (a gzipped table of values)
244 that contains scores associated with genomic regions that can be used
245 afterwards to plot a heatmap or a profile.
246
247 Genomic regions can really be anything - genes, parts of genes, ChIP-seq
248 peaks, favorite genome regions... as long as you provide a proper file
249 in BED or INTERVAL format. If you would like to compare different groups of regions
250 (i.e. genes from chromosome 2 and 3), you can supply more than 1 BED file, one for each group.
251
252 computeMatrix can also be used to filter and sort
253 regions according to their score by making use of its advanced output options.
254
255
256 .. image:: $PATH_TO_IMAGES/flowChart_computeMatrixetc.png
257 :alt: Relationship between computeMatrix, heatmapper and profiler
258
259
260 You can find more details on the computeMatrix wiki page: https://github.com/fidelram/deepTools/wiki/Visualizations#wiki-computeMatrix
261
262
263 -----
264
265 @REFERENCES@
266 ]]>
267 </help>
268 <expand macro="citations" />
269 </tool>