Mercurial > repos > ethevenot > batchcorrection
comparison BC/batch_correction.xml @ 3:2e3a23dd6c24 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 28 Feb 2019 05:12:34 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:57edfd3943ab | 3:2e3a23dd6c24 |
---|---|
1 <tool id="Batch_correction" name="Batch_correction" version="2.2.4"> | |
2 <description>Corrects intensities for signal drift and batch-effects</description> | |
3 | |
4 <requirements> | |
5 <requirement type="package" version="1.1_4">r-batch</requirement> | |
6 <requirement type="package" version="1.7_8">r-ade4</requirement> | |
7 <requirement type="package" version="1.70.0">bioconductor-pcamethods</requirement> | |
8 <requirement type="package" version="1.10.0">bioconductor-ropls</requirement> | |
9 </requirements> | |
10 | |
11 <stdio> | |
12 <exit_code range="1:" level="fatal" /> | |
13 </stdio> | |
14 | |
15 <command><![CDATA[ | |
16 #if str($span_condition.method) == 'all_loess_pool': | |
17 Rscript $__tool_directory__/batch_correction_all_loess_wrapper.R | |
18 dataMatrix "$dataMatrix" | |
19 sampleMetadata "$sampleMetadata" | |
20 variableMetadata "$variableMetadata" | |
21 method "all_loess_pool" | |
22 span "${span_condition.span}" | |
23 | |
24 #elif str($span_condition.method) == 'all_loess_sample': | |
25 Rscript $__tool_directory__/batch_correction_all_loess_wrapper.R | |
26 dataMatrix "$dataMatrix" | |
27 sampleMetadata "$sampleMetadata" | |
28 variableMetadata "$variableMetadata" | |
29 method "all_loess_sample" | |
30 span "${span_condition.span}" | |
31 #else: | |
32 Rscript $__tool_directory__/batch_correction_wrapper.R | |
33 analyse "batch_correction" | |
34 dataMatrix "$dataMatrix" | |
35 sampleMetadata "$sampleMetadata" | |
36 variableMetadata "$variableMetadata" | |
37 method "${span_condition.method}" | |
38 #if str($span_condition.method) == 'linear': | |
39 span "none" | |
40 #else: | |
41 span "${span_condition.span}" | |
42 #end if | |
43 valnull "${span_condition.valnull}" | |
44 ref_factor "${span_condition.ref_factor}" | |
45 detail "${span_condition.detail}" | |
46 #end if | |
47 dataMatrix_out "$dataMatrix_out" variableMetadata_out "$variableMetadata_out" | |
48 graph_output "$graph_output" rdata_output "$rdata_output" | |
49 batch_col_name "$batch_col_name" injection_order_col_name "$injection_order_col_name" | |
50 sample_type_col_name "$sample_type_col_name" | |
51 sample_type_tags "blank=$sampleTypeTagBlank,pool=$sampleTypeTagPool,sample=$sampleTypeTagSample" | |
52 ]]></command> | |
53 | |
54 <inputs> | |
55 <param name="dataMatrix" label="Data Matrix file " format="tabular" type="data" /> | |
56 <param name="sampleMetadata" label="Sample metadata file " format="tabular" type="data" help="must contain at least the three following columns: 'batch' + 'injectionOrder' + 'sampleType'"/> | |
57 <param name="variableMetadata" label="Variable metadata file " format="tabular" type="data" /> | |
58 | |
59 <param name="batch_col_name" label="Batch column name" type="text" size="64" value="batch" help="The name of the column containing the batch values."/> | |
60 <param name="injection_order_col_name" label="Injection order column name" type="text" size="64" value="injectionOrder" help="The name of the column containing the injection order values."/> | |
61 <param name="sample_type_col_name" label="Sample type column name" type="text" size="64" value="sampleType" help="The name of the column containing the sample type values."/> | |
62 <param name="sampleTypeTagPool" type="text" size="64" value="pool" label="Set the name used to tag samples as pool."/> | |
63 <param name="sampleTypeTagBlank" type="text" size="64" value="blank" label="Set the name used to tag samples as blank."/> | |
64 <param name="sampleTypeTagSample" type="text" size="64" value="sample" label="Set the name used to tag samples as real sample."/> | |
65 | |
66 <conditional name="span_condition"> | |
67 <param name="method" label="Type of regression model " type="select" help="To select between linear or non-linear (lowess or loess) methods to be used in Van der Kloet algorithm ; when using loess, you can choose to use pools or samples to model batch effect."> | |
68 <option value="linear">linear</option> | |
69 <option value="lowess">lowess</option> | |
70 <option value="loess">loess</option> | |
71 <option value="all_loess_pool">all loess pool</option> | |
72 <option value="all_loess_sample">all loess sample</option> | |
73 </param> | |
74 <when value="linear"> | |
75 <param name="valnull" label="Null values" type="select" display="radio" help="What to do of generated negative or infinite values"> | |
76 <option value="0">consider it as a null intensity</option> | |
77 <option value="NA">consider it as a missing value</option> | |
78 </param> | |
79 <param name="ref_factor" label="Factor of interest " type="text" value="batch" help="column name of factor of interest (often a biological factor); if none, leave 'batch'" /> | |
80 <param name="detail" label="Level of details for plots " type="select" help="Amount of plots in the pdf file output. See Help section for more details."> | |
81 <option value="no">basic</option> | |
82 <option value="plot">standard</option> | |
83 <option value="reg">complete</option> | |
84 </param> | |
85 </when> | |
86 <when value="lowess"> | |
87 <param name="span" type="float" value="0.85" label="span" help="it is a advanced option. Must be less than or equal to 1"/> | |
88 <param name="valnull" label="Null values" type="select" display="radio" help="What to do of generated negative or infinite values"> | |
89 <option value="0">consider it as a null intensity</option> | |
90 <option value="NA">consider it as a missing value</option> | |
91 </param> | |
92 <param name="ref_factor" label="Factor of interest " type="text" value="batch" help="column name of factor of interest (often a biological factor); if none, leave 'batch'" /> | |
93 <param name="detail" label="Level of details for plots " type="select" help="Amount of plots in the pdf file output. See Help section for more details."> | |
94 <option value="no">basic</option> | |
95 <option value="plot">standard</option> | |
96 <option value="reg">complete</option> | |
97 </param> | |
98 </when> | |
99 <when value="loess"> | |
100 <param name="span" type="float" value="1" label="span" help="it is a advanced option. Must be strictly greater than 0"/> | |
101 <param name="valnull" label="Null values" type="select" display="radio" help="What to do of generated negative or infinite values"> | |
102 <option value="0">consider it as a null intensity</option> | |
103 <option value="NA">consider it as a missing value</option> | |
104 </param> | |
105 <param name="ref_factor" label="Factor of interest " type="text" value="batch" help="column name of factor of interest (often a biological factor); if none, leave 'batch'" /> | |
106 <param name="detail" label="Level of details for plots " type="select" help="Amount of plots in the pdf file output. See Help section for more details."> | |
107 <option value="no">basic</option> | |
108 <option value="plot">standard</option> | |
109 <option value="reg">complete</option> | |
110 </param> | |
111 </when> | |
112 <when value="all_loess_pool"> | |
113 <param name="span" type="float" value="1" label="span" help="smoothing parameter; must be > 0"/> | |
114 </when> | |
115 <when value="all_loess_sample"> | |
116 <param name="span" type="float" value="1" label="span" help="smoothing parameter; must be > 0"/> | |
117 </when> | |
118 </conditional> | |
119 </inputs> | |
120 | |
121 <outputs> | |
122 <data name="dataMatrix_out" label="${tool.name}_${span_condition.method}_${dataMatrix.name}" format="tabular"/> | |
123 <data name="variableMetadata_out" label="${tool.name}_${span_condition.method}_${variableMetadata.name}" format="tabular"/> | |
124 <data name="graph_output" label="${tool.name}_${span_condition.method}_graph" format="pdf"/> | |
125 <data name="rdata_output" label="${tool.name}_${span_condition.method}_rdata" format="rdata"/> | |
126 </outputs> | |
127 | |
128 <tests> | |
129 <test> | |
130 <param name="dataMatrix" value="input-batchcorrection-dataMatrix.tsv"/> | |
131 <param name="sampleMetadata" value="input-batchcorrection-sampleMetadata.tsv"/> | |
132 <param name="variableMetadata" value="input-batchcorrection-variableMetadata.tsv"/> | |
133 <param name="method" value="all_loess_pool"/> | |
134 <param name="span" value="1"/> | |
135 <output name="dataMatrix_out" file="output-batchcorrection-dataMatrix.tsv"/> | |
136 </test> | |
137 <test> | |
138 <param name="dataMatrix" value="input-batchcorrection-dataMatrix.tsv"/> | |
139 <param name="sampleMetadata" value="input-batchcorrection-sampleMetadata-customSampleType.tsv"/> | |
140 <param name="variableMetadata" value="input-batchcorrection-variableMetadata.tsv"/> | |
141 <param name="method" value="all_loess_pool"/> | |
142 <param name="span" value="1"/> | |
143 <param name="sample_type_col_name" value="MySampType"/> | |
144 <param name="sampleTypeTagPool" value="lot"/> | |
145 <param name="sampleTypeTagBlank" value="blanc"/> | |
146 <param name="sampleTypeTagSample" value="echant"/> | |
147 <output name="dataMatrix_out" file="output-batchcorrection-dataMatrix.tsv"/> | |
148 </test> | |
149 </tests> | |
150 | |
151 | |
152 <help> | |
153 | |
154 .. class:: infomark | |
155 | |
156 **Authors** | |
157 | Jean-Francois Martin - PF MetaToul-AXIOM ; INRA ; MetaboHUB (for original version of this tool and overall development of the R script) | |
158 | Melanie Petera - PFEM ; INRA ; MetaboHUB (for R wrapper and R script improvement) | |
159 | Marion Landi - FLAME ; PFEM ; INRA ; MetaboHUB (for xml interface and R wrapper) | |
160 | Franck Giacomoni - PFEM ; INRA ; MetaboHUB (for xml interface and R wrapper) | |
161 | Etienne Thevenot - LIST/LADIS ; CEA ; MetaboHUB (for R script and wrapper regarding "all loess pool" and "all loess sample" methods) | |
162 | |
163 --------------------------------------------------- | |
164 | |
165 .. class:: infomark | |
166 | |
167 **Please cite** If you use this tool, please cite: | |
168 | |
169 when using the **linear**, **lowess** or **loess** methods: | |
170 | `F.M. Van Der Kloet, I. Bobeldijk, E.R. Verheij, R.H. Jellema. (2009). "Analytical error reduction using single point calibration for accurate and precise metabolomic phenotyping." Journal of Proteome Research p5132-5141 <http://www.ncbi.nlm.nih.gov/pubmed/19754161>`_ | |
171 | |
172 when using the **all loess pool** or **all loess sample** method: | |
173 | `Dunn et al (2011). Procedures for large-scale metabolic profiling of serum and plasma using gas chromatography and liquid chromatography coupled to mass spectrometry. Nature Protocols, 6:1060-1083 <http://dx.doi.org/10.1038/nprot.2011.335>`_ | |
174 | Cleveland et al (1997). In Statistical Models in S; Chambers JM. and Hastie TJ. Ed.; Chapman et Hall: London; pp. 309-376 | |
175 | Etienne A. Thevenot, Aurelie Roux, Ying Xu, Eric Ezan, and Christophe Junot (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, **14**:3322-3335 (http://dx.doi.org/10.1021/acs.jproteome.5b00354). | |
176 | |
177 --------------------------------------------------- | |
178 | |
179 .. class:: infomark | |
180 | |
181 **Tool updates** | |
182 | |
183 See the **NEWS** section at the bottom of this page | |
184 | |
185 --------------------------------------------------- | |
186 | |
187 ================ | |
188 Batch_correction | |
189 ================ | |
190 | |
191 ----------- | |
192 Description | |
193 ----------- | |
194 | |
195 | **Instrumental drift** and **offset differences** between batches have been described in **LC-MS** experiments when the number of samples is large and/or multiple batches of acquisition are needed. | |
196 | Recently a normalization strategy relying on the measurements of a **pooled** (or QC) sample injected periodically has been described: for each variable, a **regression model** is fitted to the values of the **pool** and subsequently used to adjust the intensities of the samples of interest (van der Kloet et al, 2009; Dunn et al, 2011). | |
197 | | |
198 | The current tool implements **two strategies** which differ in the way the regression model is applied to the variables (either depending on variable quality metrics, or 'loess' model for all variables) and also in the generated figure. | |
199 | | |
200 | |
201 | |
202 ----------------- | |
203 Workflow position | |
204 ----------------- | |
205 | |
206 .. image:: batch_correction.png | |
207 :width: 800 | |
208 | |
209 | |
210 ----------- | |
211 Input files | |
212 ----------- | |
213 | |
214 +----------------------------+------------+ | |
215 | Parameter : num + label | Format | | |
216 +============================+============+ | |
217 | 1 : Data Matrix file | tabular | | |
218 +----------------------------+------------+ | |
219 | 2 : Sample metadata file | tabular | | |
220 +----------------------------+------------+ | |
221 | 3 : Variable metadata file | tabular | | |
222 +----------------------------+------------+ | |
223 | |
224 | |
225 Data Matrix file must contain the intensity values of variables. | |
226 | First line must contain all the samples' names | |
227 | First column must contain all the variables' ID | |
228 | | |
229 | |
230 Sample metadata file must contain at least the three following columns: | |
231 | "batch" to identify the batches of analyses | |
232 | "injectionOrder" (integers) defining the injection order of all samples (QC-pools as well as analysed samples) | |
233 | "sampleType" indicating if a sample ("sample") or a QC-pool ("pool"); each batch needs | |
234 | at least 3 QC-pools for intra-batch linear adjustment and 8 for lo(w)ess adjustment (5 for **all loess** methods) | |
235 | |
236 | |
237 .. class:: warningmark | |
238 | |
239 MISSING DATA are allowed only with the **all loess** methods | |
240 | |
241 | |
242 ---------- | |
243 Parameters | |
244 ---------- | |
245 | |
246 Type of regression model | |
247 | To choose between *linear*, *lowess*, *loess*, *all loess pool*, and *all loess sample* strategies | |
248 | **- Option 1** (**linear**, **lowess**, and **loess** methods): before the normalisation of each variable, some quality metrics are computed (see the "Determine Batch Correction" module); depending on the result, the variable can be normalized or not, with either the **linear**, **lowess** or **loess** model. | |
249 | **- Option 2** (**all loess pool** and **all loess sample**): each variable is normalized by using the 'loess' model; | |
250 | in the case **all loess pool** is chosen and the number of pool observations is below 5, the linear method is used (for all variables) and a warning is generated; | |
251 | if the pool intensities are not representative of the samples (which can be viewed on the figure where both trends are shown), the case **all loess sample** enables using the sample intensities (instead of the pool intensities) as the reference for the loess curve. | |
252 | In all "option 2" cases: the **median intensity of the reference observations** (either 'pool' or 'sample') is used as the scaling factor after the initial intensities have been divided by the loess predictions. | |
253 | | |
254 | |
255 Span | |
256 | Smoothing parameter, advanced option for *lo(w)ess* and *all loess* methods | |
257 | In case of a loess fit, the **span** parameter (between 0 and 1) controls the smoothing | |
258 | (the higher the smoother; higher values are prefered to avoid overfitting; Cleveland et al, 1997). | |
259 | | |
260 | |
261 Null values | |
262 | available for regression model *linear*, *lowess* and *loess* | |
263 | Controls what is done regarding negative or infinite values that can be generated during regression estimation. | |
264 | *consider it as a missing value* will switch concerned intensities to NA; | |
265 | this option implies that concerned ions will not be considered in PCA display. | |
266 | *consider it as a null intensity* will switch concerned intensities to 0 for lo(w)ess | |
267 | or correct them by the batch mean instead of regression estimate for linear. | |
268 | | |
269 | |
270 Factor of interest | |
271 | available for regression model *linear*, *lowess* and *loess* | |
272 | Name of the factor (column header) in Sample metadata file that will be used as a categorical variable for plots and PCA. | |
273 | (often a biological factor ; if none, leave "batch") | |
274 | This factor does not affect correction calculation. | |
275 | | |
276 | |
277 Level of details for plots | |
278 | available for regression model *linear*, *lowess* and *loess* | |
279 | *basic*: PCA + CV boxplot (before and after correction) | |
280 | *standard*: 'basic' plots + before/after-correction plots of intensities over injection order, and design effects for each ion | |
281 | *complete*: 'standard' plots + QC-pool regression plots per batch with samples' intensities over injection order | |
282 | This factor is not used by the *all loess* methods where a unique figure is generated showing the sum of intensities along injection order, and the first 4 PCA scores. | |
283 | | |
284 | |
285 | |
286 ------------ | |
287 Output files | |
288 ------------ | |
289 | |
290 Batch_correction_$method_rdata.rdata | |
291 | binary data | |
292 | Download, open R and use the 'load' function; objects are in the 'res' list | |
293 | | |
294 | |
295 Batch_correction_$method_graph.pdf | |
296 | graphical output | |
297 | For the *linear* and *lo(w)ess* methods, content depends on level of details chosen | |
298 | | |
299 | |
300 Batch_correction_$method_variableMetadata.tabular | |
301 | tsv output | |
302 | Identical to the Variable metadata input file, with x more columns (where x is the number of batches) in case of *linear*, *lowess* and *loess* methods | |
303 | | |
304 | |
305 Batch_correction_$method_dataMatrix.tabular | |
306 | tsv output (tabulated) | |
307 | Same formatting as Data Matrix file; contains corrected intensities | |
308 | | |
309 | |
310 | |
311 --------------------------------------------------- | |
312 | |
313 --------------- | |
314 Working example | |
315 --------------- | |
316 | |
317 .. class:: infomark | |
318 | |
319 Refer to the corresponding "W4M HowTo" page: | |
320 | `MS data processing - Filters and normalisation <http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf>`_ | |
321 | | |
322 | | |
323 | |
324 See also the reference history: | |
325 | `W4M00001_Sacurine-statistics (DOI:10.15454/1.4811121736910142E12) <http://dx.doi.org/10.15454/1.4811121736910142E12>`_ | |
326 | | |
327 | |
328 --------------------------------------------------- | |
329 | |
330 ---- | |
331 NEWS | |
332 ---- | |
333 | |
334 CHANGES IN VERSION 2.2.4 | |
335 ======================== | |
336 | |
337 INTERNAL MODIFICATIONS | |
338 | |
339 Fixed bug for pool selection ("all_loess" methods) | |
340 | |
341 CHANGES IN VERSION 2.2.2 | |
342 ======================== | |
343 | |
344 INTERNAL MODIFICATIONS | |
345 | |
346 Fixed bug for color plot ("all_loess" methods) | |
347 | |
348 CHANGES IN VERSION 2.2.0 | |
349 ======================== | |
350 | |
351 NEW FEATURE | |
352 | |
353 Specific names for the 'sampleType', 'injectionOrder', and 'batch' from sampleMetadata can be selected by the user (for compatibility with the MTBLS downloader) | |
354 | |
355 CHANGES IN VERSION 2.1.2 | |
356 ======================== | |
357 | |
358 INTERNAL MODIFICATIONS | |
359 | |
360 Minor modifications in config file | |
361 | |
362 CHANGES IN VERSION 2.1.0 | |
363 ======================== | |
364 | |
365 INTERNAL MODIFICATIONS | |
366 | |
367 For PCA figure display only (**all_loess** options): missing values are set to the minimum value before PCA computation is performed (with svd) | |
368 | |
369 Additional running and installation tests added with planemo, conda, and travis | |
370 | |
371 BUG FIX | |
372 | |
373 Variables with NA or 0 values in all reference samples are discarded before applying the **all_loess** normalization | |
374 | |
375 INTERNAL MODIFICATIONS | |
376 | |
377 Modifications of the **all_loess_wrapper** file to handle the recent **ropls** package versions (i.e. 1.3.15 and above) which use S4 classes | |
378 | |
379 </help> | |
380 <!-- [RECOMMANDED] All citations associated to this tool (main citation given above and other references). Can be extracted from the history panel --> | |
381 <citations> | |
382 <!-- [HELP] As DOI or BibTex entry --> | |
383 <citation type="doi">10.1021/pr900499r</citation> | |
384 <citation type="doi">10.1038/nprot.2011.335</citation> | |
385 <citation type="bibtex">@ARTICLE{Cleveland91, | |
386 author = {Cleveland et al}, | |
387 year = {1991}, | |
388 journal = {Statistical Models in S, Chambers JM. and Hastie TJ. Ed., Chapman et Hall: London}, | |
389 title = {Local Regression Models}, | |
390 pages = {309-376}, | |
391 editor = {Chambers JM. and Hastie TJ. Ed.}, | |
392 publisher = {Chapman et Hall: London}, | |
393 chapter = {8} | |
394 }</citation> | |
395 <citation type="doi">10.1021/acs.jproteome.5b00354</citation> | |
396 </citations> | |
397 | |
398 | |
399 </tool> |