comparison w4mclassfilter.xml @ 15:08d4ca8bc6dd draft

"planemo upload for repository https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper/tree/master commit 9639dde5737c9aa2330bb603c2299345939407cf"
author eschen42
date Thu, 11 Mar 2021 20:46:26 +0000
parents
children
comparison
equal deleted inserted replaced
14:1d36ecf93e67 15:08d4ca8bc6dd
1 <tool id="w4mclassfilter" name="W4m Data Subset" version="0.98.19">
2 <description>Filter W4M data by values or metadata</description>
3 <requirements>
4 <requirement type="package" version="4.0.3">r-base</requirement>
5 <requirement type="package" version="1.1_5">r-batch</requirement>
6 <requirement type="package" version="0.98.19">w4mclassfilter</requirement>
7 </requirements>
8 <command detect_errors="aggressive"><![CDATA[
9 unset R_HOME;
10 if [ '$centering' == 'medoid' -a '$imputation' == 'none' ]; then
11 (echo 'medoid' centering may not be chosen with imputation 'none' 1>&2);
12 [ ! 1 ];
13 else
14 Rscript
15 $__tool_directory__/w4mclassfilter_wrapper.R
16 dataMatrix_in '$dataMatrix_in'
17 sampleMetadata_in '$sampleMetadata_in'
18 variableMetadata_in '$variableMetadata_in'
19 sampleclassNames '$sampleclassNames'
20 inclusive '$inclusive'
21 wildcards '$wildcards'
22 classnameColumn '$classnameColumn'
23 samplenameColumn 'sampleMetadata'
24 variable_range_filter '$variableRangeFilter'
25 transformation '$transformation'
26 imputation '$imputation'
27 dataMatrix_out '$dataMatrix_out'
28 sampleMetadata_out '$sampleMetadata_out'
29 variableMetadata_out '$variableMetadata_out'
30 order_vrbl '$order_vrbl'
31 order_smpl '$order_smpl'
32 centering '$centering';
33 fi
34 ]]></command>
35 <inputs>
36 <param name="dataMatrix_in" format="tabular" label="Data matrix" type="data"
37 help="Choose data-matrix file (tab-separated values with sample names in first row and feature names in first column)." />
38 <param name="sampleMetadata_in" format="tabular" label="Sample metadata" type="data"
39 help="Choose sample-metadata file (tab-separated values with one row per sample, sample name in first column)." />
40 <param name="variableMetadata_in" format="tabular" label="Variable metadata" type="data"
41 help="Choose variable-metadata file (tab-separated values with one row per feature, feature name in first column)." />
42 <param name="classnameColumn" label="Column containing the sample-class names (or treatment names)" type="text" value = "class"
43 help="Name the column in 'Sample metadata' that has the values to be referenced by 'Sample-class names' and 'Compute centers for classes'. [default: 'class']">
44 <sanitizer>
45 <valid initial="string.letters">
46 <add preset="string.digits"/>
47 <add value="&#46;" /> <!-- dot, period -->
48 <add value="&#95;" /> <!-- underscore -->
49 </valid>
50 </sanitizer>
51 </param>
52 <param name="sampleclassNames" label="Sample-class names (or patterns)" type="text" value = ""
53 help="List of names (or patterns to match names) of sample classes to include or exclude. List should be comma-separated with no stray space characters. (Leave this empty to match no names.) [default: empty]">
54 <sanitizer>
55 <valid initial="string.letters">
56 <add preset="string.digits"/>
57 <add value="&#123;" /> <!-- l-curb, left-curly-bracket -->
58 <add value="&#124;" /> <!-- pipe -->
59 <add value="&#125;" /> <!-- r-curb, right-curly-bracket -->
60 <add value="&#36;" /> <!-- dollar, dollar-sign -->
61 <add value="&#40;" /> <!-- left-paren -->
62 <add value="&#41;" /> <!-- right-paren -->
63 <add value="&#42;" /> <!-- splat, asterisk -->
64 <add value="&#43;" /> <!-- plus -->
65 <add value="&#45;" /> <!-- dash, hyphen -->
66 <add value="&#44;" /> <!-- comma -->
67 <add value="&#46;" /> <!-- dot, period -->
68 <add value="&#58;" /> <!-- colon -->
69 <add value="&#59;" /> <!-- semi, semicolon -->
70 <add value="&#63;" /> <!-- what, question mark -->
71 <add value="&#91;" /> <!-- l-squib, left-square-bracket -->
72 <add value="&#92;" /> <!-- whack, backslash -->
73 <add value="&#93;" /> <!-- r-squib, right-square-bracket -->
74 <add value="&#94;" /> <!-- hat, caret -->
75 <add value="&#95;" /> <!-- underscore -->
76 </valid>
77 </sanitizer>
78 </param>
79 <param name="inclusive" label="Exclude/include named (or matched) sample classes" type="select" help="Indicate meaning of preceding list: either to identify classes to exclude from output or to identify classes to include in output. [default: 'filter-out']">
80 <option value="TRUE">filter-in: &#160;&#160; Include only the named sample classes.</option>
81 <option value="FALSE" selected="true">filter-out: &#160;&#160; Exclude only the named sample classes.</option>
82 </param>
83 <param name="wildcards" label="Use 'wild card patterns' or 'regular expression patterns' to match sample-class names" type="select"
84 help="See '&lt;i&gt;Wild-card patterns to match class names&lt;/i&gt;' and '&lt;i&gt;Regular-expression patterns to match sample-class names&lt;/i&gt;' sections below. [default: 'wild-card patterns']">
85 <option value="TRUE" selected="true">wild-card patterns: &#160;&#160; Use '*' and '?' to match sample-class names.</option>
86 <option value="FALSE">regular-expression patterns: &#160;&#160; Use regular expressions to match sample-class names.</option>
87 </param>
88 <param name="variableRangeFilter" label="Variable-range filters" type="text" value = ""
89 help="List of filters, each specifying the range of permitted values in a column of 'Variable metadata' (specified as 'column:min:max'), as described in '&lt;i&gt;Variable-range filters&lt;/i&gt;' section below. List should be comma-separated with no stray space characters. (Leave this empty for no filtering.) [default: empty]">
90 <sanitizer>
91 <valid initial="string.letters">
92 <add preset="string.digits"/>
93 <add value="&#44;" /> <!-- comma -->
94 <add value="&#46;" /> <!-- dot, period -->
95 <add value="&#58;" /> <!-- colon -->
96 <add value="&#95;" /> <!-- underscore -->
97 </valid>
98 </sanitizer>
99 </param>
100 <param name="transformation" label="Data transformation" type="select"
101 help="Choose transformation. In all cases, negative intensities become missing values. See '&lt;i&gt;Data transformation and imputation&lt;/i&gt;' section below. [default: 'none']">
102 <option value="none" selected="true">none: &#160;&#160; Do not transform data.</option>
103 <option value="log2">log2: &#160;&#160; Perform log base 2 transformation of data.</option>
104 <option value="log10">log10: &#160;&#160; Perform log base 10 transformation of data.</option>
105 </param>
106 <param name="imputation" label="Imputation of missing values" type="select"
107 help="Choose imputation for missing values. See '&lt;i&gt;Data transformation and imputation&lt;/i&gt;' section below. [default: 'zero']">
108 <option value="zero" selected="true">zero: &#160;&#160; Replace missing values with zero.</option>
109 <option value="center">center: &#160;&#160; Replace missing values with feature-median.</option>
110 <option value="none">none: &#160;&#160; Perform no imputation. Note that 'compute centers' cannot be set to 'medoid'.</option>
111 </param>
112 <param name="order_smpl" label="Columns that specify order for samples" type="text" value = "sampleMetadata"
113 help="List of sample-metadata column names for sorting samples. List should be comma-separated with no stray space characters. (This is ignored when 'Compute centers for classes' is set to either 'centroid' or 'median'.) [default: 'sampleMetadata']">
114 <sanitizer>
115 <valid initial="string.letters">
116 <add preset="string.digits"/>
117 <add value="&#46;" /> <!-- dot, period -->
118 <add value="&#95;" /> <!-- underscore -->
119 <add value="&#44;" /> <!-- comma -->
120 </valid>
121 </sanitizer>
122 </param>
123 <param name="order_vrbl" label="Columns that specify order for features" type="text" value = "variableMetadata"
124 help="List of feature-metadata column names for sorting features. List should be comma-separated with no stray space characters. [default: 'variableMetadata']">
125 <sanitizer>
126 <valid initial="string.letters">
127 <add preset="string.digits"/>
128 <add value="&#46;" /> <!-- dot, period -->
129 <add value="&#95;" /> <!-- underscore -->
130 <add value="&#44;" /> <!-- comma -->
131 </valid>
132 </sanitizer>
133 </param>
134 <param name="centering" label="Compute centers for classes (e.g., treatments)" type="select" help="[default: 'none']">
135 <option value="none" selected="true">none: &#160;&#160; Do not compute centers for classes/treatments.</option>
136 <option value="centroid">centroid: &#160;&#160; For each class, compute the mean for each feature.</option>
137 <option value="median">median: &#160;&#160; For each class, compute the median for each feature.</option>
138 <option value="medoid">medoid: &#160;&#160; For each class, select only the most central member. Note that 'Imputation of missing values' cannot be 'none'.</option>
139 </param>
140 </inputs>
141 <outputs>
142 <data name="dataMatrix_out" format="tabular" label="${dataMatrix_in.name}.subset" ></data>
143 <data name="sampleMetadata_out" format="tabular" label="${sampleMetadata_in.name}.subset" ></data>
144 <data name="variableMetadata_out" format="tabular" label="${variableMetadata_in.name}.subset" ></data>
145 </outputs>
146 <tests>
147 <!-- test 1 -->
148 <test>
149 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
150 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
151 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
152 <param name="classnameColumn" value="gender"/>
153 <param name="sampleclassNames" value="M"/>
154 <param name="wildcards" value="FALSE"/>
155 <param name="inclusive" value="TRUE"/>
156 <param name="variableRangeFilter" value="FEATMAX:2e6:,mz:200:,rt::800"/>
157 <param name="transformation" value="none"/>
158 <output name="dataMatrix_out">
159 <assert_contents>
160 <has_text text="747080" />
161 <not_has_text text="13420742" />
162 <not_has_text text="47259" />
163 </assert_contents>
164 </output>
165 <output name="sampleMetadata_out">
166 <assert_contents>
167 <has_text text="HU_017" />
168 <has_text text="HU_034" />
169 <has_text text="HU_078" />
170 <has_text text="HU_091" />
171 <has_text text="HU_093" />
172 <has_text text="HU_099" />
173 <has_text text="HU_130" />
174 <has_text text="HU_134" />
175 <has_text text="HU_138" />
176 <not_has_text text="HU_028" />
177 <not_has_text text="HU_051" />
178 <not_has_text text="HU_060" />
179 <not_has_text text="HU_110" />
180 <not_has_text text="HU_149" />
181 <not_has_text text="HU_152" />
182 <not_has_text text="HU_175" />
183 <not_has_text text="HU_178" />
184 <not_has_text text="HU_185" />
185 <not_has_text text="HU_204" />
186 <not_has_text text="HU_208" />
187 </assert_contents>
188 </output>
189 <output name="variableMetadata_out">
190 <assert_contents>
191 <has_text text="HMDB00208" />
192 <has_text text="HMDB01032" />
193 <has_text text="HMDB01101.1" />
194 <has_text text="HMDB13189" />
195 <not_has_text text="HMDB00191" />
196 <not_has_text text="HMDB00251" />
197 <not_has_text text="HMDB00299" />
198 <not_has_text text="HMDB00512" />
199 <not_has_text text="HMDB00518" />
200 <not_has_text text="HMDB00715" />
201 <not_has_text text="HMDB00822" />
202 <not_has_text text="HMDB03193" />
203 <not_has_text text="HMDB04824" />
204 <not_has_text text="HMDB10348" />
205 <not_has_text text="HMDB59717" />
206 </assert_contents>
207 </output>
208 </test>
209 <!-- test 2 -->
210 <test>
211 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
212 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
213 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
214 <!-- test that hyphens in regular expressions work -->
215 <param name="sampleclassNames" value="HU_[0-9][0-9][0-9]"/>
216 <param name="inclusive" value="TRUE"/>
217 <param name="wildcards" value="FALSE"/>
218 <param name="classnameColumn" value="sampleMetadata"/>
219 <!-- test that variableRangeFilter works with tranformation -->
220 <param name="variableRangeFilter" value="FEATMAX:6.30103:,mz:200:,rt::800"/>
221 <param name="transformation" value="log10"/>
222 <param name="imputation" value="zero"/>
223 <output name="dataMatrix_out" md5="5644d2ea01d072ee1d0c40e29e9d0089">
224 <assert_contents>
225 <has_text text="5.8733671" />
226 </assert_contents>
227 </output>
228 <output name="sampleMetadata_out">
229 <assert_contents>
230 <has_text text="HU_017" />
231 <has_text text="HU_028" />
232 <has_text text="HU_034" />
233 <has_text text="HU_051" />
234 <has_text text="HU_060" />
235 <has_text text="HU_078" />
236 <has_text text="HU_091" />
237 <has_text text="HU_093" />
238 <has_text text="HU_099" />
239 <has_text text="HU_110" />
240 <has_text text="HU_130" />
241 <has_text text="HU_134" />
242 <has_text text="HU_138" />
243 <has_text text="HU_149" />
244 <has_text text="HU_152" />
245 <has_text text="HU_175" />
246 <has_text text="HU_178" />
247 <has_text text="HU_185" />
248 <has_text text="HU_208" />
249 <not_has_text text="HU_204" />
250 </assert_contents>
251 </output>
252 <output name="variableMetadata_out">
253 <assert_contents>
254 <has_text text="HMDB00191" />
255 <has_text text="HMDB00208" />
256 <has_text text="HMDB01032" />
257 <has_text text="HMDB01101.1" />
258 <has_text text="HMDB13189" />
259 <not_has_text text="HMDB00251" />
260 <not_has_text text="HMDB00299" />
261 <not_has_text text="HMDB00512" />
262 <not_has_text text="HMDB00518" />
263 <not_has_text text="HMDB00715" />
264 <not_has_text text="HMDB00822" />
265 <not_has_text text="HMDB03193" />
266 <not_has_text text="HMDB04824" />
267 <not_has_text text="HMDB10348" />
268 <not_has_text text="HMDB59717" />
269 </assert_contents>
270 </output>
271 </test>
272 <!-- test 3 -->
273 <test>
274 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
275 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
276 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
277 <param name="classnameColumn" value="gender"/>
278 <param name="sampleclassNames" value="M"/>
279 <param name="inclusive" value="TRUE"/>
280 <param name="transformation" value="none"/>
281 <output name="dataMatrix_out">
282 <assert_contents>
283 <not_has_text text="HU_028" />
284 <not_has_text text="HU_051" />
285 <not_has_text text="HU_060" />
286 <not_has_text text="HU_110" />
287 <not_has_text text="HU_149" />
288 <not_has_text text="HU_152" />
289 <not_has_text text="HU_175" />
290 <not_has_text text="HU_178" />
291 <not_has_text text="HU_185" />
292 <not_has_text text="HU_204" />
293 <not_has_text text="HU_208" />
294 <has_text text="HU_017" />
295 <has_text text="HU_034" />
296 <has_text text="HU_078" />
297 <has_text text="HU_091" />
298 <has_text text="HU_093" />
299 <has_text text="HU_099" />
300 <has_text text="HU_130" />
301 <has_text text="HU_134" />
302 <has_text text="HU_138" />
303 <has_text text="HMDB03193" />
304 <not_has_text text="HMDB00822" />
305 <has_text text="HMDB01101" />
306 <has_text text="HMDB01101.1" />
307 <has_text text="HMDB10348" />
308 <has_text text="HMDB59717" />
309 <has_text text="HMDB13189" />
310 <has_text text="HMDB00299" />
311 <has_text text="HMDB00191" />
312 <has_text text="HMDB00518" />
313 <has_text text="HMDB00715" />
314 <has_text text="HMDB01032" />
315 <has_text text="HMDB00208" />
316 <has_text text="HMDB04824" />
317 <has_text text="HMDB00512" />
318 <has_text text="HMDB00251" />
319 </assert_contents>
320 </output>
321 </test>
322 <!-- test 4 -->
323 <test>
324 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
325 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
326 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
327 <param name="classnameColumn" value="gender"/>
328 <param name="sampleclassNames" value="*"/>
329 <param name="wildcards" value="TRUE"/>
330 <param name="inclusive" value="TRUE"/>
331 <param name="imputation" value="zero"/>
332 <output name="dataMatrix_out" md5="b2eac4946d3803a07606286b50451af4">
333 <assert_contents>
334 <not_has_text text="NA" />
335 </assert_contents>
336 </output>
337 <output name="sampleMetadata_out">
338 <assert_contents>
339 <not_has_text text="HU_204" />
340 <has_text text="HU_028" />
341 <has_text text="HU_051" />
342 <has_text text="HU_060" />
343 <has_text text="HU_110" />
344 <has_text text="HU_149" />
345 <has_text text="HU_152" />
346 <has_text text="HU_175" />
347 <has_text text="HU_178" />
348 <has_text text="HU_185" />
349 <has_text text="HU_208" />
350 <has_text text="HU_017" />
351 <has_text text="HU_034" />
352 <has_text text="HU_078" />
353 <has_text text="HU_091" />
354 <has_text text="HU_093" />
355 <has_text text="HU_099" />
356 <has_text text="HU_130" />
357 <has_text text="HU_134" />
358 <has_text text="HU_138" />
359 </assert_contents>
360 </output>
361 </test>
362 <!-- test 5 -->
363 <test>
364 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
365 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
366 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
367 <param name="classnameColumn" value="gender"/>
368 <param name="sampleclassNames" value="M"/>
369 <param name="wildcards" value="FALSE"/>
370 <param name="inclusive" value="TRUE"/>
371 <output name="sampleMetadata_out">
372 <assert_contents>
373 <not_has_text text="HU_028" />
374 <not_has_text text="HU_051" />
375 <not_has_text text="HU_060" />
376 <not_has_text text="HU_110" />
377 <not_has_text text="HU_149" />
378 <not_has_text text="HU_152" />
379 <not_has_text text="HU_175" />
380 <not_has_text text="HU_178" />
381 <not_has_text text="HU_185" />
382 <not_has_text text="HU_204" />
383 <not_has_text text="HU_208" />
384 <has_text text="HU_017" />
385 <has_text text="HU_034" />
386 <has_text text="HU_078" />
387 <has_text text="HU_091" />
388 <has_text text="HU_093" />
389 <has_text text="HU_099" />
390 <has_text text="HU_130" />
391 <has_text text="HU_134" />
392 <has_text text="HU_138" />
393 </assert_contents>
394 </output>
395 </test>
396 <!-- test 6 -->
397 <test>
398 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
399 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
400 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
401 <param name="classnameColumn" value="gender"/>
402 <param name="sampleclassNames" value="M"/>
403 <param name="wildcards" value="FALSE"/>
404 <param name="inclusive" value="TRUE"/>
405 <output name="variableMetadata_out">
406 <assert_contents>
407 <has_text text="HMDB03193" />
408 <not_has_text text="HMDB00822" />
409 <has_text text="HMDB01101" />
410 <has_text text="HMDB01101.1" />
411 <has_text text="HMDB10348" />
412 <has_text text="HMDB59717" />
413 <has_text text="HMDB13189" />
414 <has_text text="HMDB00299" />
415 <has_text text="HMDB00191" />
416 <has_text text="HMDB00518" />
417 <has_text text="HMDB00715" />
418 <has_text text="HMDB01032" />
419 <has_text text="HMDB00208" />
420 <has_text text="HMDB04824" />
421 <has_text text="HMDB00512" />
422 <has_text text="HMDB00251" />
423 </assert_contents>
424 </output>
425 </test>
426 <!-- test 7 -->
427 <test>
428 <param name="dataMatrix_in" value="input_nofilter_dataMatrix.tsv"/>
429 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
430 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
431 <param name="classnameColumn" value="gender"/>
432 <param name="sampleclassNames" value="M"/>
433 <param name="wildcards" value="FALSE"/>
434 <param name="inclusive" value="TRUE"/>
435 <output name="variableMetadata_out">
436 <assert_contents>
437 <has_text text="HMDB03193" />
438 <not_has_text text="HMDB00822" />
439 <has_text text="HMDB01101" />
440 <has_text text="HMDB01101.1" />
441 <has_text text="HMDB10348" />
442 <has_text text="HMDB59717" />
443 <not_has_text text="HMDB13189" />
444 <has_text text="HMDB00299" />
445 <has_text text="HMDB00191" />
446 <has_text text="HMDB00518" />
447 <has_text text="HMDB00715" />
448 <has_text text="HMDB01032" />
449 <has_text text="HMDB00208" />
450 <has_text text="HMDB04824" />
451 <has_text text="HMDB00512" />
452 <has_text text="HMDB00251" />
453 </assert_contents>
454 </output>
455 </test>
456 <!-- test 8 -->
457 <test>
458 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
459 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
460 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
461 <param name="classnameColumn" value="gender"/>
462 <param name="sampleclassNames" value="[Mm],[fF]"/>
463 <param name="wildcards" value="FALSE"/>
464 <param name="inclusive" value="TRUE"/>
465 <output name="sampleMetadata_out">
466 <assert_contents>
467 <has_text text="HU_028" />
468 <has_text text="HU_051" />
469 <has_text text="HU_060" />
470 <has_text text="HU_110" />
471 <has_text text="HU_149" />
472 <has_text text="HU_152" />
473 <has_text text="HU_175" />
474 <has_text text="HU_178" />
475 <has_text text="HU_185" />
476 <not_has_text text="HU_204" />
477 <has_text text="HU_208" />
478 <has_text text="HU_017" />
479 <has_text text="HU_034" />
480 <has_text text="HU_078" />
481 <has_text text="HU_091" />
482 <has_text text="HU_093" />
483 <has_text text="HU_099" />
484 <has_text text="HU_130" />
485 <has_text text="HU_134" />
486 <has_text text="HU_138" />
487 </assert_contents>
488 </output>
489 </test>
490 <!-- test 9 -->
491 <test>
492 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
493 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
494 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
495 <param name="classnameColumn" value=""/>
496 <param name="sampleclassNames" value="M"/>
497 <param name="wildcards" value="FALSE"/>
498 <param name="inclusive" value="TRUE"/>
499 <output name="sampleMetadata_out">
500 <assert_contents>
501 <has_text text="HU_028" />
502 <has_text text="HU_051" />
503 <has_text text="HU_060" />
504 <has_text text="HU_110" />
505 <has_text text="HU_149" />
506 <has_text text="HU_152" />
507 <has_text text="HU_175" />
508 <has_text text="HU_178" />
509 <has_text text="HU_185" />
510 <not_has_text text="HU_204" />
511 <has_text text="HU_208" />
512 <has_text text="HU_017" />
513 <has_text text="HU_034" />
514 <has_text text="HU_078" />
515 <has_text text="HU_091" />
516 <has_text text="HU_093" />
517 <has_text text="HU_099" />
518 <has_text text="HU_130" />
519 <has_text text="HU_134" />
520 <has_text text="HU_138" />
521 </assert_contents>
522 </output>
523 </test>
524 <!-- test 10 - extends test4 with no imputation rather than zero imputation -->
525 <test>
526 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
527 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
528 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
529 <param name="classnameColumn" value="gender"/>
530 <param name="sampleclassNames" value="*"/>
531 <param name="wildcards" value="TRUE"/>
532 <param name="inclusive" value="TRUE"/>
533 <param name="imputation" value="none"/>
534 <output name="dataMatrix_out" md5="6200dfa77d09c56e434f80b1a23b3393">
535 <assert_contents>
536 <not_has_text text="HU_204" />
537 <has_text text="NA" />
538 <has_text text="HU_028" />
539 </assert_contents>
540 </output>
541 <output name="sampleMetadata_out">
542 <assert_contents>
543 <not_has_text text="HU_204" />
544 <has_text text="HU_028" />
545 </assert_contents>
546 </output>
547 </test>
548 <!-- test 11 - extends test4 with center imputation rather than zero imputation -->
549 <test>
550 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
551 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
552 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
553 <param name="classnameColumn" value="gender"/>
554 <param name="sampleclassNames" value="*"/>
555 <param name="wildcards" value="TRUE"/>
556 <param name="inclusive" value="TRUE"/>
557 <param name="imputation" value="center"/>
558 <output name="dataMatrix_out" md5="a404278c5c9ffd5bdadf346c4f8a0184">
559 <assert_contents>
560 <not_has_text text="HU_204" />
561 <not_has_text text="NA" />
562 <has_text text="HU_028" />
563 </assert_contents>
564 </output>
565 <output name="sampleMetadata_out">
566 <assert_contents>
567 <not_has_text text="HU_204" />
568 <has_text text="HU_028" />
569 </assert_contents>
570 </output>
571 </test>
572 <!-- test 12 - select medoid for class -->
573 <test>
574 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
575 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
576 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
577 <param name="classnameColumn" value="gender"/>
578 <param name="sampleclassNames" value=""/>
579 <param name="wildcards" value="TRUE"/>
580 <param name="inclusive" value="FALSE"/>
581 <param name="imputation" value="zero"/>
582 <param name="order_vrbl" value="rt"/>
583 <param name="order_smpl" value="gender"/>
584 <param name="centering" value="medoid"/>
585 <output name="dataMatrix_out" md5="c91bbfbf30004fa24b05a67ec479bfb1">
586 <assert_contents>
587 <not_has_text text="1013302" />
588 <has_text text="4763576" />
589 <has_text text="2003278" />
590 <has_text text="26222916" />
591 </assert_contents>
592 </output>
593 <output name="sampleMetadata_out">
594 <assert_contents>
595 <not_has_text text="HU_099" />
596 <not_has_text text="HU_185" />
597 <has_text text="HU_110" />
598 <has_text text="HU_078" />
599 </assert_contents>
600 </output>
601 </test>
602 <!-- test 13 -->
603 <test>
604 <param name="dataMatrix_in" value="input_med1_dm.tsv"/>
605 <param name="sampleMetadata_in" value="input_med1_sm.tsv"/>
606 <param name="variableMetadata_in" value="input_med1_vm.tsv"/>
607 <param name="classnameColumn" value="pcgroup"/>
608 <param name="sampleclassNames" value=""/>
609 <param name="wildcards" value="TRUE"/>
610 <param name="inclusive" value="FALSE"/>
611 <param name="imputation" value="zero"/>
612 <param name="order_vrbl" value="sample"/>
613 <param name="order_smpl" value="pcgroup"/>
614 <param name="centering" value="medoid"/>
615 <output name="sampleMetadata_out">
616 <assert_contents>
617 <has_text text="X10" />
618 <has_text text="M70T1043" />
619 <has_text text="70.01229" />
620 <has_text text="1042.837" />
621 <has_text text="X345" />
622 <has_text text="M74T33" />
623 <not_has_text text="M70T1042_1" />
624 <not_has_text text="M74T34" />
625 </assert_contents>
626 </output>
627 <output name="dataMatrix_out">
628 <assert_contents>
629 <has_text text="X10" />
630 <has_text text="X345" />
631 </assert_contents>
632 </output>
633 </test>
634 </tests>
635 <!-- Here is the hyphenation standard that I *try* to apply consistently in my documentation:
636 https://web.archive.org/web/20161014025757/http://www.sandranoonan.com/dont-let-hyphenation-drive-crazy/
637 -->
638 <help><![CDATA[
639
640
641 **Author** Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)
642
643 --------------------------------------------------------------------------
644
645
646 **R package**
647
648 The *w4mclassfilter* package (which is used by the W4M Data Subset tool) is available from the Hegeman lab GitHub repository (https://github.com/HegemanLab/w4mclassfilter/releases).
649
650 -----------------------------------------------------------------------------------------------------------------------------------------
651
652
653 **Tool updates**
654
655 See https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper#news
656
657 ---------------------------------------------------
658
659 ======================================================
660 "W4M Data Subset" - Filter Workflow4Metabolomics data
661 ======================================================
662
663 ----------
664 Motivation
665 ----------
666
667 LC-MS metabolomics experiments seek to resolve "features", i.e., species that have distinct chromatographic retention time ("rt") and (after ionization) mass-to-charge ratio ("*m/z*" or "mz").
668 (If a chemical is fragmented or may have a variety of adducts, several features will result.)
669 Data for a sample are collected as mass-spectral intensities, each of which is associated with a position on a 2D plane with dimensions of rt and *m/z*.
670 Ideally, features would be sufficiently reproducible among sample-runs to distinguish features that are similar among samples from those that differ.
671
672 For liquid chromatography, the retention time for a species can vary considerably from one chromatography run to the next.
673 The Workflow4Metabolomics suite of Galaxy tools (W4M, [Giacomoni *et al.*, 2014, Guitton *et al.* 2017])
674 uses the XCMS preprocessing tools [Smith *et al.*, 2006]
675 for "retention-time correction" to align features among samples.
676 Features may be better aligned if pooled samples and blanks are included.
677
678 Multivariate statistical tools may be used to discover clusters of similar samples [Th]]>&#233;<![CDATA[venot *et al.*, 2015].
679 However, once retention-time alignment of features has been achieved among samples in LC-MS datasets:
680
681 - The presence of pools and blanks may confound identification and separation of sample clusters.
682 - Multivariate statistical algorithms may be impacted by missing values or dimensions that have zero variance.
683
684 -----------
685 Description
686 -----------
687
688 The **W4M Data Subset** tool **selects subsets of samples, features, or data values** and **conditions the data** for further analysis.
689
690 - The tool takes as input the *dataMatrix*, *sampleMetadata*, and *variableMetadata* datasets produced by W4M's XCMS and CAMERA [Kuhl *et al.*, 2012] tools.
691 - The tool produces the same trio of output datasets, modified as described below.
692
693 This tool can perform several operations to reduce the number samples or features to be analyzed (although *this should be done only in a statistically sound manner* consistent with the nature of the experiment):
694
695 - *Sample filtering:* Samples may be selected by designating a "sample class" column in *sampleMetadata* and specifying criteria to include or exclude samples based on the contents of this column.
696 - *Feature filtering:* Features may be selected by specifying minimum or maximum value (or both) allowable in columns of *variableMetadata*.
697 - *Intensity filtering:* To exclude minimal features from consideration, a lower bound may be specified for the maximum intensity for a feature across all samples (i.e., for a row in *dataMatrix*).
698
699 This tool also conditions data for statistical analysis:
700
701 - Samples that are missing from either *sampleMetadata* or *dataMatrix* are eliminated.
702 - Features that are missing from either *variableMetadata* or *dataMatrix* are eliminated.
703 - Features and samples that have zero variance are eliminated.
704 - Samples and features are ordered consistently in *variableMetadata*, *sampleMetadata*, and *dataMatrix*.
705 (The columns for sorting *variableMetadata* or *sampleMetadata* may be specified.)
706 - The names of the first columns of *variableMetadata* and *sampleMetadata* are set respectively to "variableMetadata" and "sampleMetadata".
707 - If desired, the values in *dataMatrix* may be log-transformed.
708 - Negative intensities become missing values (before missing-value replacement is performed).
709 - If desired, each missing value in *dataMatrix* may be replaced with zero or the median value observed for the corresponding feature.
710 - If desired, a "center" for each treatment can be computed in lieu of the samples for that treatment.
711
712 This tool may be applied several times sequentially, which may be useful for:
713
714 - analyzing subsets of samples for progressively smaller sets of treatment levels, or
715 - choosing subsets of samples or features, respectively based on criteria in columns of *sampleMetadata* or *variableMetadata*.
716
717 -----------------
718 Workflow Position
719 -----------------
720
721 This tool can be used at any point downstream of Preprocessing.
722
723 - Possible upstream tool categories: Preprocessing, Quality Control, Statistical Analysis, Filter and Sort
724 - Possible downstream tool categories: Normalisation, Statistical Analysis, Quality Control, Filter and Sort
725
726 -----------
727 Input files
728 -----------
729
730 +------------------------+---------------------------------------+------------+
731 | File | Contents | Format |
732 +========================+=======================================+============+
733 | Data matrix | per-feature, per-sample intensities | tabular |
734 +------------------------+---------------------------------------+------------+
735 | Sample metadata | metadata for samples | tabular |
736 +------------------------+---------------------------------------+------------+
737 | Variable metadata | metadata for features | tabular |
738 +------------------------+---------------------------------------+------------+
739
740
741 ----------
742 Parameters
743 ----------
744
745 Data matrix
746 | feature x sample **dataMatrix** (tab-separated values) file of the numeric data matrix, with period-character ('.') as decimal, and 'NA' for missing values.
747 | The file must not contain metadata apart from the required row and column names.
748 |
749
750 Sample metadata
751 | sample x metadata **sampleMetadata** (tab-separated values) file of the numeric and/or character sample metadata, with period-character ('.') as decimal, and 'NA' for missing values.
752 |
753
754 Variable metadata
755 | variable x metadata **variableMetadata** (tab-separated values) file of the numeric and/or character variable metadata, with period-character ('.') as decimal, and 'NA' for missing values.
756 |
757
758 Column containing the sample-class names (default = '``class``')
759 | name of the column in **sampleMetadata** that has the values to be tested against the '``Sample-class names``' input parameter or to be referenced by the '``Compute centers for classes``' input parameter.
760 | Only letters, digits, periods, and underscores are permitted.
761 |
762
763 Sample-class names (default = no names)
764 | names (or regular expressions to match names) of sample-classes to include or exclude
765 | (Separate names with commas, without any extra space characters.)
766 |
767
768 Exclude/include named (or matched) classes (default = '``filter-out``')
769 | '``filter-in``' - include only the named sample-classes
770 | '``filter-out``' - exclude only the named sample-classes
771 |
772
773 Use 'wild card patterns' or 'regular expression patterns' (default = '``wild-card patterns``')
774 | '``wild-card patterns``' - use wild cards to match names of sample-classes (see the *'Wild-card patterns to match class names'* section below.)
775 | '``regular-expression patterns``' - use regular expressions to match the named sample-classes (see the *'Regular-expression patterns to match class names'* section below.)
776 |
777
778 Variable-range filters (default = no filters)
779 | variable-range filters (see the *'Variable-range filters'* section below)
780 | (Separate filter expressions with commas, without any extra space characters.)
781 |
782
783 Data transformation (default = '``none``')
784 | '``none``' - Do not transform data matrix values.
785 | '``log2``' - Take the log base 2 of the values in the data matrix.
786 | '``log10``' - Take the log base 10 of the values in the data matrix.
787 |
788 | Note that negative intensities become missing values regardless of the choice made here.
789 |
790
791 Imputation of missing values (default = '``zero``')
792 | '``none``' - Do not impute data matrix values.
793 | '``zero``' - Negative and missing values are imputed to zero.
794 | '``center``' - For each feature, negative and missing values are imputed to the median of other values.
795 |
796 | Note well: For '``none``' option, '``Compute centers for classes``' cannot be set to '``medoid``'.
797 |
798
799 Columns that specify order for samples (default = '``sampleMetadata``')
800 | names of the columns in **sampleMetadata** that is used to sort samples; only letters, digits, periods, and underscores are permitted.
801 | (Separate column names with commas, without any extra space characters.)
802 |
803
804 Columns that specify order for features (default = '``variableMetadata``')
805 | names of the columns in **variableMetadata** that is used to sort features; only letters, digits, periods, and underscores are permitted.
806 | (Separate column names with commas, without any extra space characters.)
807 |
808
809 Compute centers for classes, e.g., treatments (default = '``none``')
810 | '``none``' - Return all samples; do not compute centers for classes/treatments.
811 | '``centroid``' - For each treatment, return only the centroid (the treatment-center computed as the mean intensity for each feature).
812 | '``median``' - For each treatment, return only the treatment-center computed as the median intensity for each feature.
813 | '``medoid``' - For each treatment, return only the medoid (the sample most similar to the other samples for that treatment).
814 |
815 | Note well: For '``medoid``' option, '``Imputation of missing values``' cannot be set to '``none``'.
816 |
817
818 ------------
819 Output files
820 ------------
821
822 sampleMetadata
823 | (tab-separated values) file.
824 | If centering is '``none``' or '``medoid``', this will be identical to the **sampleMetadata** file given as an input argument, excepting lacking rows for samples that have been filtered out (by the sample-class filter, or because of zero variance, or because they were missing in the input data matrix)
825 | If centering is '``centroid``' or '``median``', most columns will be replaced with the treatment name and the number of samples for that treatment.
826 |
827
828 variableMetadata
829 | (tab-separated values) file identical to the **variableMetadata** file given as an input argument, excepting lacking rows for variables (LC-MS features) that have been filtered out (by the variable-range filter, or because of zero variance, or because they were missing in the input data matrix)
830 |
831
832 dataMatrix
833 | (tab-separated values) file identical to the **dataMatrix** file given as an input argument, excepting lacking rows and columns for variables and samples that have been filtered out, respectively
834 |
835
836
837 -----------------------------------------
838 Wild-card patterns to match class names
839 -----------------------------------------
840
841 W4M Data Subset supports use of "wild card" patterns to select class-names.
842
843 - use '``?``' to match a single character
844 - use '``*``' to match zero or more characters
845 - the entire pattern must match the sample name
846
847 For example
848
849 - '``??.samp*``' matches '``my.sample``' but not '``my.own.sample``'
850 - '``*.sample``' matches '``my.sample``' and '``my.own.sample``'
851 - '``*.sampl``' matches neither '``my.sample``' nor '``my.own.sample``'
852
853 --------------------------------------------------
854 Regular-expression patterns to match class names
855 --------------------------------------------------
856
857 W4M Data Subset supports use of R "extended regular expression" patterns to select class-names.
858
859 R extended regular expressions, which allow precise pattern-matching and are exhaustively defined at
860 https://stat.ethz.ch/R-manual/R-devel/library/base/html/regex.html
861
862 However, only a few basic building blocks of regular expressions need to be mastered for most cases:
863
864 - '``^``' matches the beginning of a class-name
865 - '``$``' matches the end of a class-name
866 - '``.``' outside of square brackets matches a single character
867 - '``*``' matches character specified immediately before zero or more times
868 - square brackets specify a set of characters to be matched.
869
870 Within square brackets
871
872 - '``^``' as the first character specifies that the list of characters are those that should **not** be matched.
873 - '``-``' is used to specify ranges of characters
874
875 Caveat: The tool wrapper uses the comma ('``,``') to split a list of sample-class names, so **commas may not be used within regular expressions for this tool**
876
877 First Example: Consider a field of class-names consisting of '``marq3,marq6,marq9,marq12,front3,front6,front9,front12``'
878
879 - The regular expression '``^front[0-9][0-9]*$``' will match the same sample-classes as '``front3,front6,front9,front12``'
880 - The regular expression '``^[a-z][a-z]3$``' will match the same sample-classes as '``front3,marq3``'
881 - The regular expression '``^[a-z][a-z]12$``' will match the same sample-classes as '``front12,marq12``'
882 - The regular expression '``^[a-z][a-z][0-9]$``' will match the same sample-classes as '``front3,front6,front9,marq3,marq6,marq9``'
883
884 Second Example: Consider these regular expression patterns as possible matches to a sample-class name '``AB0123``':
885
886 - '``^[A-Z][A-Z][0-9][0-9]*$``' MATCHES '``**^AB0123$**``'
887 - '``^[A-Z][A-Z]*[0-9][0-9]*$``' MATCHES '``**^AB0123$**``'
888 - '``^[A-Z][0-9]*``' MATCHES '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched.
889 - '``^[A-Z][A-Z][0-9]``' MATCHES '``**^AB0** 123$``' - first two characters are letters aind the third is a digit.
890 - '``^[A-Z][A-Z]*[0-9][0-9]$``' DOES NOT MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two.
891 - '``^[A-Z][0-9]*$``' DOES NOT MATCH - the pattern specifies that second character and all those that follow, if present, must be digits.
892
893 ----------------------
894 Variable-range filters
895 ----------------------
896
897 An array of range-specification strings may be supplied in the '``Variable-range filters``'
898 argument. If supplied, only features having numerical values in the specified column
899 of **variableMetadata** that fall within the specified ranges will be retained
900 in the output. Each range is a string of three colon-separated values (e.g., '``mz:200:800``') in the
901 following order:
902
903 - the **name of a column** of **variableMetadata** which must have numerical data (only letters, digits, periods, and underscores are permitted in the name itself), e.g., '``mz``';
904 - the **minimum allowed value** in that column for the feature to be retained, e.g., '``200``';
905 - the **maximum allowed value**, e.g., '``800``'.
906
907 Note for the range specification strings:
908
909 - **If the "maximum" is less than the "minimum", then the range is exclusive** (e.g., '``mz:800:200``' means retain only features whose mz is NOT in the range 200-800)
910 - **If the name supplied in the first field** is '``FEATMAX``', then the string is defining the **threshold for the maximum intensity** for each feature in the dataMatrix.
911
912 - For example, '``FEATMAX:1e6:``' would specify that any feature would be excluded if no sample had an intensity for that feature greater than 1,000,000.
913 - Although a maximum may be specified, it seems unlikely that this would be useful. Note that when the "maximum" is less than the "minimum" for the FEATMAX range specification, then the specification is ignored.
914
915 ----------------------------------
916 Data transformation and imputation
917 ----------------------------------
918
919 Data may optionally be log2- or log10-transformed.
920
921 Negative intensities are always substituted with missing values before imputation, even when no transformation is chosen.
922
923 Missing intensity data values may optionally be imputed. Missing values may be substituted:
924
925 - with zeros (as may be appropriate for univariate analysis)
926 - with the median for the feature (as may be appropriate for multivariate analysis).
927
928 - Note that the median feature-intensity is computed for the samples *before* variable-range filters are applied.
929
930 -----------------------------------------
931 Optional Computation of Treatment Centers
932 -----------------------------------------
933
934 A "center" for each treatment may be computed in lieu of all the samples for each treatment.
935
936 - '``none``' - Return all samples; do not compute centers.
937 - '``centroid``' - For each treatment, return only the centroid (the treatment-center computed as the mean intensity for each feature).
938 - '``median``' - For each treatment, return only the treatment-center computed as the median intensity for each feature.
939 - '``medoid``' - For each treatment, return only the medoid (the sample most similar to the other samples for that treatment). This choice requires that the '``Imputation of missing values``' argument must not be set to '``none``'.
940
941 The medoid is the sample having the smallest sum of its distances from other samples in the treatment:
942
943 - Because principal components are uncorrelated, distances are computed in the space defined by the principal-component scores to minimize the distortion of computed distances by correlated features.
944 - Because principal components are used to compute distances, no missing values are permitted, which is why the '``Imputation of missing values``' argument must not be set to '``none``'.
945 - The distances are used to identify the medoid using code adapted from https://web.archive.org/web/20191231012914/https://www.biostars.org/p/11987/#11989
946
947 -----------------------------------------------------------------------------
948
949 ----------------
950 WORKING EXAMPLES
951 ----------------
952
953 -----------
954 Input Files
955 -----------
956
957 +------------------------------------------------------------------------------------------------------------------------------------------------------+
958 | Input File URL |
959 +======================================================================================================================================================+
960 | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/input_dataMatrix.tsv |
961 +------------------------------------------------------------------------------------------------------------------------------------------------------+
962 | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/input_sampleMetadata.tsv |
963 +------------------------------------------------------------------------------------------------------------------------------------------------------+
964 | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/input_variableMetadata.tsv |
965 +------------------------------------------------------------------------------------------------------------------------------------------------------+
966
967 -------------------------------
968 Example without Range-Filtering
969 -------------------------------
970
971 This example retains only samples whose '``gender``' attribute is '``M``'.
972
973 **Input parameters**
974
975 +---------------------------------------------+-------------------------------+
976 | Input Parameter | Value |
977 +=============================================+===============================+
978 | Column that names the sample class | gender |
979 +---------------------------------------------+-------------------------------+
980 | Sample-class names | M |
981 +---------------------------------------------+-------------------------------+
982 | Exclude/include named classes | filter-in |
983 +---------------------------------------------+-------------------------------+
984 | Use 'wild-cards' or 'regular expressions' | wild-cards |
985 +---------------------------------------------+-------------------------------+
986 | Variable range-filters | (Leave this field empty.) |
987 +---------------------------------------------+-------------------------------+
988 | Data transformation | none |
989 +---------------------------------------------+-------------------------------+
990 | Missing-value imputation | center |
991 +---------------------------------------------+-------------------------------+
992 | Sample-sort column | sampleMetadata |
993 +---------------------------------------------+-------------------------------+
994 | Feature-sort column | variableMetadata |
995 +---------------------------------------------+-------------------------------+
996 | Compute centers for classes | none |
997 +---------------------------------------------+-------------------------------+
998
999 **Expected outputs**
1000
1001 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
1002 | Expected Output | Download from URL |
1003 +===================+=================================================================================================================================================+
1004 | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/expected_dataMatrix.tsv |
1005 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
1006 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/expected_sampleMetadata.tsv |
1007 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
1008 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/expected_variableMetadata.tsv |
1009 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
1010
1011 ----------------------------
1012 Example with Range-Filtering
1013 ----------------------------
1014
1015 This example retains only features whose ``mz`` is greater than 200, whose ``rt`` is less than 800, and whose maximum intensity across all samples is 2,000,000.
1016 This example retains all samples (except those having zero variance for all feature), although it would be possible to filter on samples as well.
1017
1018 **Input parameters**
1019
1020 +---------------------------------------------+-----------------------------------+
1021 | Input Parameter | Value |
1022 +=============================================+===================================+
1023 | Column that names the sample class | sampleMetadata |
1024 +---------------------------------------------+-----------------------------------+
1025 | Sample-class names | HU_13[48] |
1026 +---------------------------------------------+-----------------------------------+
1027 | Exclude/include named classes | filter-out |
1028 +---------------------------------------------+-----------------------------------+
1029 | Use 'wild-cards' or 'regular expressions' | regular-expressions |
1030 +---------------------------------------------+-----------------------------------+
1031 | Variable range-filters | FEATMAX:20.93157:,mz:200:,rt::800 |
1032 +---------------------------------------------+-----------------------------------+
1033 | Data transformation | log2 |
1034 +---------------------------------------------+-----------------------------------+
1035 | Missing-value imputation | zero |
1036 +---------------------------------------------+-----------------------------------+
1037 | Sample-sort column | sampleMetadata |
1038 +---------------------------------------------+-----------------------------------+
1039 | Feature-sort column | variableMetadata |
1040 +---------------------------------------------+-----------------------------------+
1041 | Compute centers for classes | none |
1042 +---------------------------------------------+-----------------------------------+
1043
1044 **Expected outputs**
1045
1046 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
1047 | Expected Output | Download from URL |
1048 +===================+===================================================================================================================================================+
1049 | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/rangefilter_dataMatrix.tsv |
1050 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
1051 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/rangefilter_sampleMetadata.tsv |
1052 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
1053 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/rangefilter_variableMetadata.tsv |
1054 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
1055
1056 --------------------------------
1057 Example with Treatment-Centering
1058 --------------------------------
1059
1060 This example retains only the samples that are medoids for their gender.
1061
1062 **Input parameters**
1063
1064 +---------------------------------------------+-----------------------------------+
1065 | Input Parameter | Value |
1066 +=============================================+===================================+
1067 | Column that names the sample class | gender |
1068 +---------------------------------------------+-----------------------------------+
1069 | Sample-class names | (Leave this field empty.) |
1070 +---------------------------------------------+-----------------------------------+
1071 | Exclude/include named classes | filter-out |
1072 +---------------------------------------------+-----------------------------------+
1073 | Use 'wild-cards' or 'regular expressions' | wild-cards |
1074 +---------------------------------------------+-----------------------------------+
1075 | Variable range-filters | (Leave this field empty.) |
1076 +---------------------------------------------+-----------------------------------+
1077 | Data transformation | none |
1078 +---------------------------------------------+-----------------------------------+
1079 | Missing-value imputation | zero |
1080 +---------------------------------------------+-----------------------------------+
1081 | Sample-sort column | gender |
1082 +---------------------------------------------+-----------------------------------+
1083 | Feature-sort column | rt |
1084 +---------------------------------------------+-----------------------------------+
1085 | Compute centers for classes | medoid |
1086 +---------------------------------------------+-----------------------------------+
1087
1088 **Expected outputs**
1089
1090 +-------------------+----------------------------------------------------------------------------------------------------------+
1091 | Expected Output | Download from URL |
1092 +===================+==========================================================================================================+
1093 | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/exp_cent_medoid_dm.tsv |
1094 +-------------------+----------------------------------------------------------------------------------------------------------+
1095 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/exp_cent_medoid_sm.tsv |
1096 +-------------------+----------------------------------------------------------------------------------------------------------+
1097 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/exp_cent_medoid_vm.tsv |
1098 +-------------------+----------------------------------------------------------------------------------------------------------+
1099 ]]></help>
1100 <citations>
1101 <!-- Giacomoni_2014 W4M 2.5 -->
1102 <citation type="doi">10.1093/bioinformatics/btu813</citation>
1103 <!-- Guitton_2017 W4M 3.0 -->
1104 <citation type="doi">10.1016/j.biocel.2017.07.002</citation>
1105 <!-- Kuhl_2012 CAMERA -->
1106 <citation type="doi">10.1021/ac202450g</citation>
1107 <!-- Smith_2006 XCMS -->
1108 <citation type="doi">10.1021/ac051437y</citation>
1109 <!-- Thevenot_2015 Urinary metabolome statistics -->
1110 <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
1111 </citations>
1112 <!--
1113 vim:noet:sw=4:ts=4
1114 --> </tool>