comparison computeMatrix.xml @ 0:09b65c12a75a draft

Uploaded
author bgruening
date Sat, 03 Aug 2013 13:42:12 -0400
parents
children 21d563d5f2b2
comparison
equal deleted inserted replaced
-1:000000000000 0:09b65c12a75a
1 <tool id="computeMatrix" name="computeMatrix" version="1.0">
2 <description>summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile</description>
3 <requirements>
4 <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement>
5 </requirements>
6 <command>
7 computeMatrix
8 $mode.mode_select
9 --regionsFileName '$regionsFile'
10 --scoreFileName '$scoreFile'
11 --outFileName '$outFileName'
12
13 #if $output.showOutputSettings == "yes"
14 #if $output.saveData:
15 --outFileNameData '$outFileNameData'
16 #end if
17 #if $output.saveMatrix:
18 --outFileNameMatrix '$outFileNameMatrix'
19 #end if
20
21 #if $output.saveSortedRegions:
22 --outFileSortedRegions '$outFileSortedRegions'
23 #end if
24 #end if
25
26 #if $mode.mode_select == "reference-point":
27 --referencePoint $mode.referencePoint
28 $mode.nanAfterEnd
29 --beforeRegionStartLength $mode.beforeRegionStartLength
30 --afterRegionStartLength $mode.afterRegionStartLength
31 #else
32 --regionBodyLength $mode.regionBodyLength
33 --startLabel $mode.startLabel
34 --endLabel $mode.endLabel
35 #if $mode.regionStartLength.regionStartLength_select == "yes":
36 --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength
37 --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength
38 #end if
39 #end if
40
41 #if $advancedOpt.showAdvancedOpt == "yes":
42 --sortRegions '$advancedOpt.sortRegions'
43 --sortUsing '$advancedOpt.sortUsing'
44 --averageTypeBins '$advancedOpt.averageTypeBins'
45 $advancedOpt.missingDataAsZero
46 $advancedOpt.skipZeros
47 -bs $advancedOpt.binSize
48
49 #if $advancedOpt.minThreshold:
50 --minThreshold $advancedOpt.minThreshold
51 #end if
52 #if $advancedOpt.maxThreshold:
53 --maxThreshold $advancedOpt.maxThreshold
54 #end if
55 #if $advancedOpt.scale:
56 --scale $advancedOpt.scale
57 #end if
58
59 #end if
60 --numberOfProcessors 4
61 </command>
62 <inputs>
63 <param name="regionsFile" format="bed,gff" type="data" label="Regions to plot" help="File, in BED or GFF format, containing the regions to plot."/>
64 <param name="scoreFile" format="bigwig,bam" type="data" label="Score file" help="Either a bigWig file (containing a score, usually covering the whole genome) or a BAM file. For this last case, coverage counts will be used for the heatmap."/>
65
66 <conditional name="mode" >
67 <param name="mode_select" type="select" label="computeMatrix has two main output options" help="In the scale-regions mode, all regions in the BED/GFF file are stretched or shrunk to the same length (bp) that is indicated by the user. Reference-point refers to a position within the BED/GFF regions (e.g start of region). In the reference-point mode only those genomic positions before (downstream) and/or after (upstream) the reference point will be plotted.">
68 <option value="scale-regions" selected="true">scale-regions</option>
69 <option value="reference-point">reference-point</option>
70 </param>
71
72 <when value="scale-regions" >
73 <param name="regionBodyLength" type="integer" value="500" label="Distance in bp to which all regions are going to be fitted"/>
74 <param name="startLabel" type="text" value="TSS" size="10" label="Label for the region start" help ="Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. &quot;peak start&quot;." />
75 <param name="endLabel" type="text" value="TES" size="10" label="Label for the region end" help="Label shown in the plot for the region end. Default is TES (transcription end site)."/>
76 <conditional name="regionStartLength">
77 <param name="regionStartLength_select" type="select" label="Set distance up- and downstream of the given regions">
78 <option value="no" selected="true">no</option>
79 <option value="yes">yes</option>
80 </param>
81 <when value="no" />
82 <when value="yes">
83 <param name="beforeRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/>
84
85 <param name="afterRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/>
86 </when>
87 </conditional>
88 </when>
89
90 <when value="reference-point">
91 <param name="referencePoint" type="select" label="The reference point for the plotting">
92 <option value="TSS" selected="true">region start (TSS)</option>
93 <option value="TES" selected="true">region end (TES)</option>
94 <option value="center" selected="true">center of the region</option>
95 </param>
96 <param name="nanAfterEnd" type="boolean" truevalue="--nanAfterEnd" falsevalue="" label="Discard any values after the region end" help="This is useful to visualize the region end when not using the scale-regions mode and when the reference-point is set to the TSS."/>
97 <param name="beforeRegionStartLength" type="integer" value="1000" min="1" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/>
98
99 <param name="afterRegionStartLength" type="integer" value="1000" min="1" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/>
100 </when>
101 </conditional>
102
103 <conditional name="output" >
104 <param name="showOutputSettings" type="select" label="Show additional output options" >
105 <option value="no" selected="true">no</option>
106 <option value="yes">yes</option>
107 </param>
108 <when value="no" />
109 <when value="yes">
110 <param name="saveData" type="boolean" label="Save the averages per matrix column into a text file" help="This corresponds to the underlying data used to plot a summary profile."/>
111 <param name="saveMatrix" type="boolean" label="Save the matrix of values underlying the heatmap" help="This matrix can easily be loaded into R or other programs."/>
112 <param name="saveSortedRegions" type="boolean" label="Save the regions after skiping zeros or min/max threshold values" help="The order of the regions in the file follows the sorting order selected. This is useful, for example, to generate other heatmaps keeping the sorting of the first heatmap."/>
113 </when>
114 </conditional>
115
116
117 <conditional name="advancedOpt" >
118 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
119 <option value="no" selected="true">no</option>
120 <option value="yes">yes</option>
121 </param>
122 <when value="no" />
123 <when value="yes">
124
125 <param name="binSize" type="integer" value="100" min="1" optional="true" label="Length, in base pairs, of the non-overlapping bin for averaging the score over the regions length" />
126
127 <param name="sortRegions" type="select" label="Sort regions"
128 help="Whether the output file should present the regions sorted.">
129 <option value="no" selected="true">no ordering</option>
130 <option value="descend">descending order</option>
131 <option value="ascend">ascending order</option>
132 </param>
133
134 <param name="sortUsing" type="select" label="Method used for sorting." help="The value is computed for each row." >
135 <option value="mean" selected="true">mean</option>
136 <option value="median">median</option>
137 <option value="min">min</option>
138 <option value="max">max</option>
139 <option value="sum">sum</option>
140 <option value="region_length">region length</option>
141 </param>
142
143 <param name="averageTypeBins" type="select" label="Define the type of statistic that should be used over the bin size range">
144 <option value="mean" selected="true">mean</option>
145 <option value="median">median</option>
146 <option value="min">min</option>
147 <option value="max">max</option>
148 <option value="sum">sum</option>
149 <option value="std">std</option>
150 </param>
151
152 <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" label="Indicate missing data as zero" help="Only for bigwig input! Set to &quot;yes&quot;, if missing data should be indicated as zeros. Default is to ignore such cases which will be depicted as black areas in the heatmap. (see &quot;Missing data color&quot; options of the heatmapper for additional options)."/>
153
154 <param name="skipZeros" type="boolean" truevalue="--skipZeros" falsevalue="" label="Skip zeros" help="Whether regions with only scores of zero should be included or not. Default is to include them."/>
155
156 <param name="minThreshold" type="float" optional="true" label="Minimum threshold" help="Any region containing a value that is equal or less than this numeric value will be skipped. This is useful to skip, for example, genes where the read count is zero for any of the bins. This could be the result of unmappable areas and can bias the overall results."/>
157 <param name="maxThreshold" type="float" optional="true" label="Maximum threshold" help="Any region containing a value that is equal or higher that this numeric value will be skipped. The max threshold is useful to skip those few regions with very high read counts (e.g. major satellites) that may bias the average values."/>
158 <param name="scale" type="float" optional="true" label="Scale" help="If set, all values are multiplied by this number."/>
159 </when>
160 </conditional>
161
162 </inputs>
163 <outputs>
164 <data format="bgzip" name="outFileName" label="${tool.name} on ${on_string}: matrix">
165 </data>
166 <data format="tabular" name="outFileNameData" label="${tool.name} on ${on_string}: raw data">
167 <filter>(output['showOutputSettings'] == 'yes' and output['saveData'] == True)</filter>
168 </data>
169 <data format="tabular" name="outFileNameMatrix" label="${tool.name} on ${on_string}: matrix of values">
170 <filter>(output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)</filter>
171 </data>
172 <data format="bed" name="outFileSortedRegions" label="${tool.name} on ${on_string}: sorted/filtered regions">
173 <filter>(output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)</filter>
174 </data>
175 </outputs>
176 <!--
177 computeMatrix -S test.bw -R test2.bed -a 100 -b 100 -bs 1
178 -->
179 <tests>
180 <test>
181 <param name="regionsFile" value="test2.bed" ftype="bed" />
182 <param name="scoreFile" value="test.bw" ftype="bigwig" />
183 <param name="advancedOpt.binSize" value="1" />
184 <param name="mode.beforeRegionStartLength" value="100" />
185 <param name="mode.afterRegionStartLength" value="100" />
186 <output name="outFileName" file="master.mat.gz" ftype="bgzip" compare="sim_size" delta="100" />
187 </test>
188 </tests>
189 <help>
190 **What it does**
191
192 This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or GFF format can be used. This tool can also be used to filter and sort regions according to their score.
193
194 -----
195
196 .. class:: infomark
197
198 Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
199
200 This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
201
202
203 .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
204 .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
205 .. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
206 </help>
207
208 </tool>