Mercurial > repos > lecorguille > xcms_group
view abims_xcms_group.xml @ 29:020d065edd9e draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e384d6dd5f410799ec211f73bca0b5d5d7bc651e
author | lecorguille |
---|---|
date | Thu, 01 Mar 2018 04:16:06 -0500 |
parents | a18fc7554c6d |
children | f248fd3b89d6 |
line wrap: on
line source
<tool id="abims_xcms_group" name="xcms groupChromPeaks (group)" version="@WRAPPER_VERSION@.0"> <description>Perform the correspondence, the grouping of chromatographic peaks within and between samples.</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ @COMMAND_XCMS_SCRIPT@/xcms_group.r image '$image' method $methods.method #if $methods.method == "PeakDensity": bw $methods.bw minFraction $methods.minFraction minSamples $methods.minSamples binSize $methods.binSize ## Advanced maxFeatures $methods.PeakDensityAdv.maxFeatures #elif $methods.method == "MzClust": ppm $methods.ppm absMz $methods.absMz minFraction $methods.minFraction minSamples $methods.minSamples #else: mzVsRtBalance $methods.mzVsRtBalance absMz $methods.absMz absRt $methods.absRt kNN $methods.kNN #end if @COMMAND_PEAKLIST@ @COMMAND_FILE_LOAD@ @COMMAND_LOG_EXIT@ ]]></command> <inputs> <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata" label="xset RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" /> <conditional name="methods"> <param name="method" type="select" label="Method to use for grouping" help="See the help section below"> <option value="PeakDensity" selected="true">PeakDensity - peak grouping based on time dimension peak densities</option> <option value="MzClust">MzClust - high resolution peak grouping for single spectra (direct infusion) MS data</option> <option value="NearestPeaks">NearestPeaks - chromatographic peak grouping based on their proximity in the mz-rt space</option> </param> <when value="PeakDensity"> <param argument="bw" type="float" value="30" label="Bandwidth" help="bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" /> <param argument="minFraction" type="float" value="0.5" label="Minimum fraction of samples" help="in at least one sample group in which the peaks have to be present to be considered as a peak group (feature)" /> <param argument="minSamples" type="integer" value="1" label="Minimum number of samples" help="in at least one sample group in which the peaks have to be detected to be considered a peak group (feature)" /> <param argument="binSize" type="float" value="0.25" label="Width of overlapping m/z slices" help="to use for creating peak density chromatograms and grouping peaks across samples (previously mzdiff)" /> <section name="PeakDensityAdv" title="Advanced Options" expanded="False"> <param argument="maxFeatures" type="integer" value="50" label="Maximum number of groups to identify in a single m/z slice" /> </section> </when> <when value="MzClust"> <param argument="ppm" type="integer" value="20" label="Relative mz error for clustering/grouping in ppm" /> <param argument="absMz" type="float" value="0" label="Absolute mz error for clustering/grouping" /> <param argument="minFraction" type="float" value="0.5" label="Minimum fraction of samples" help="in at least one sample group in which the peaks have to be present to be considered as a peak group (feature)" /> <param argument="minSamples" type="integer" value="1" label="Minimum number of samples" help="in at least one sample group in which the peaks have to be detected to be considered a peak group (feature)" /> </when> <when value="NearestPeaks"> <param argument="mzVsRtBalance" type="integer" value="10" label="Factor by which mz values are multiplied before calculating the (euclician) distance between two peaks" /> <param argument="absMz" type="float" value="0.2" label="Maximum tolerated distance for mz values" /> <param argument="absRt" type="integer" value="15" label="Maximum tolerated distance for RT values" /> <param argument="kNN" type="integer" value="10" label="Number of nearest Neighbours to check" /> </when> </conditional> <expand macro="input_peaklist"/> <expand macro="input_file_load"/> </inputs> <outputs> <data name="xsetRData" format="rdata.xcms.group" label="${image.name[:-6]}.group.RData" from_work_dir="group.RData"/> <data name="plotChromPeakDensity" format="pdf" label="${image.name[:-6]}.group.plotChromPeakDensity.pdf" from_work_dir="plotChromPeakDensity.pdf"/> <expand macro="output_peaklist" function="group"/> </outputs> <tests> <!-- from xcmsSet --> <test> <param name="image" value="faahKO-single-class.xset.merged.RData"/> <conditional name="methods"> <param name="method" value="PeakDensity"/> <param name="bw" value="5"/> <param name="minFraction" value="0.3"/> <param name="binSize" value="0.01"/> <section name="PeakDensityAdv"> <param name="maxFeatures" value="50"/> </section> </conditional> <conditional name="peaklist"> <param name="peaklistBool" value="true" /> <param name="convertRTMinute" value="false" /> <param name="numDigitsMZ" value="4" /> <param name="numDigitsRT" value="1" /> </conditional> <expand macro="test_file_load_single"/> <assert_stdout> <has_text text="bw: 5" /> <has_text text="minFraction: 0.3" /> <has_text text="maxFeatures: 50" /> <has_text text="object with 4 samples" /> <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8372" /> <has_text text="Sample classes: KO, WT" /> </assert_stdout> <output name="variableMetadata" file="faahKO.xset.group.variableMetadata.tsv" /> <output name="dataMatrix" file="faahKO.xset.group.dataMatrix.tsv" /> </test> <!-- DISABLE FOR TRAVIS <test> <param name="image" value="faahKO-single.xset.merged.RData"/> <conditional name="methods"> <param name="method" value="PeakDensity"/> <param name="bw" value="5"/> <param name="minFraction" value="0.3"/> <param name="binSize" value="0.01"/> <conditional name="density_options"> <param name="option" value="show"/> <param name="maxFeatures" value="50"/> </conditional> </conditional> <expand macro="test_file_load_single"/> <assert_stdout> <has_text text="object with 4 samples" /> <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 611" /> <has_text text="Sample classes: ." /> </assert_stdout> </test> --> <!-- DISABLE FOR TRAVIS Zip from xcmsSet <test> <param name="image" value="faahKO.xset.RData"/> <conditional name="methods"> <param name="method" value="PeakDensity"/> <param name="bw" value="5"/> <param name="minFraction" value="0.3"/> <param name="binSize" value="0.01"/> <conditional name="density_options"> <param name="option" value="show"/> <param name="maxFeatures" value="50"/> </conditional> </conditional> <conditional name="peaklist"> <param name="convertRTMinute" value="false" /> <param name="peaklistBool" value="true" /> <param name="numDigitsMZ" value="4" /> <param name="numDigitsRT" value="1" /> </conditional> <expand macro="test_file_load_zip"/> <assert_stdout> <has_text text="object with 4 samples" /> <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8372" /> <has_text text="Sample classes: KO, WT" /> </assert_stdout> <output name="variableMetadata" file="faahKO.xset.group.variableMetadata.tsv" /> <output name="dataMatrix" file="faahKO.xset.group.dataMatrix.tsv" /> </test> --> <!-- DISABLE FOR TRAVIS Zip from retcor <test> <param name="image" value="faahKO.xset.group.retcor.RData"/> <conditional name="methods"> <param name="method" value="PeakDensity"/> <param name="bw" value="5"/> <param name="minFraction" value="0.3"/> <param name="binSize" value="0.01"/> <conditional name="density_options"> <param name="option" value="show"/> <param name="maxFeatures" value="50"/> </conditional> </conditional> <expand macro="test_file_load_zip"/> <assert_stdout> <has_text text="object with 4 samples" /> <has_text text="Time range: 2509.2-4480.3 seconds (41.8-74.7 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8209" /> <has_text text="Sample classes: KO, WT" /> </assert_stdout> </test> --> <!-- from retcor --> <test> <param name="image" value="faahKO-single.xset.merged.group.retcor.RData"/> <conditional name="methods"> <param name="method" value="PeakDensity"/> <param name="bw" value="5"/> <param name="minFraction" value="0.3"/> <param name="binSize" value="0.01"/> <section name="PeakDensityAdv"> <param name="maxFeatures" value="50"/> </section> </conditional> <expand macro="test_file_load_single"/> <assert_stdout> <has_text text="bw: 5" /> <has_text text="minFraction: 0.3" /> <has_text text="binSize: 0.01"/> <has_text text="maxFeatures: 50" /> <has_text text="object with 4 samples" /> <has_text text="Time range: 2509.2-4480.3 seconds (41.8-74.7 minutes)" /> <has_text text="Mass range: 200.1-600 m/z" /> <has_text text="Peaks: 9251 (about 2313 per sample)" /> <has_text text="Peak Groups: 8209" /> <has_text text="Sample classes: KO, WT" /> </assert_stdout> </test> <!-- DISABLE FOR TRAVIS Test to test the different methods parameters <test expect_failure="True"> <param name="image" value="faahKO-single.xset.merged.group.retcor.RData"/> <conditional name="methods"> <param name="method" value="MzClust"/> <param name="ppm" value="21"/> <param name="absMz" value="0.1"/> </conditional> <expand macro="test_file_load_single"/> <assert_stdout> <has_text text="ppm: 21" /> <has_text text="absMz: 0.1" /> </assert_stdout> </test> <test> <param name="image" value="faahKO-single.xset.merged.group.retcor.RData"/> <conditional name="methods"> <param name="method" value="NearestPeaks"/> <param name="mzVsRtBalance" value="11"/> <param name="absMz" value="0.1"/> </conditional> <expand macro="test_file_load_single"/> <assert_stdout> <has_text text="mzVsRtBalance: 11" /> <has_text text="absMz: 0.1" /> </assert_stdout> </test> --> </tests> <help><![CDATA[ @HELP_AUTHORS@ ==================== xcms groupChromPeaks ==================== ----------- Description ----------- After peak identification with xcmsSet, this tool groups the peaks which represent the same analyte across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time. Allows rejection of features, which are only partially detected within the replicates of a sample class. ----------------- Workflow position ----------------- **Upstream tools** ========================= ================= =================== ========== Name output file format parameter ========================= ================= =================== ========== xcms.xcmsSet xset.RData rdata.xcms.raw RData file ------------------------- ----------------- ------------------- ---------- xcms.xcmsSet Merger xset.RData rdata.xcms.raw RData file ------------------------- ----------------- ------------------- ---------- xcms.retcor xset.RData rdata.xcms.retcor RData file ========================= ================= =================== ========== **Downstream tools** +---------------------------+--------------------------------------+ | Name | Output file | Format | +===========================+=================+====================+ |xcms.retcor | xset.RData | rdata.xcms.group | +---------------------------+--------------------------------------+ |xcms.fillPeaks | xset.RData | rdata.xcms.group | +---------------------------+--------------------------------------+ The output file is an xcmsSet.RData file. You can continue your analysis using it in **xcms.retcor** tool as an next step and then **xcms.fillPeaks**. **General schema of the metabolomic workflow** .. image:: xcms_group_workflow.png ----------- Input files ----------- +---------------------------+-----------------------+ | Parameter : num + label | Format | +===========================+=======================+ | Or : RData file | rdata.xcms.raw | +---------------------------+-----------------------+ | Or : RData file | rdata.xcms.retcor | +---------------------------+-----------------------+ ---------- Parameters ---------- Method to use for grouping -------------------------- **MzClust** | This method performs high resolution correspondence for single spectra samples. | See the MzClust_manual_ **PeakDensity** | This method performs performs correspondence (chromatographic peak grouping) based on the density (distribution) of identified peaks along the retention time axis within slices of overlapping mz ranges. All peaks (from the same or from different samples) being close on the retention time axis are grouped into a feature (peak group). | See the PeakDensity_manual_ **NearestPeaks** | This method is inspired by the grouping algorithm of mzMine [Katajamaa 2006] and performs correspondence based on proximity of peaks in the space spanned by retention time and mz values. The method creates first a master peak list consisting of all chromatographic peaks from the sample in which most peaks were identified, and starting from that, calculates distances to peaks from the sample with the next most number of peaks. If peaks are closer than the defined threshold they are grouped together. | See the NearestPeaks_manual_ .. _MzClust_manual: https://rdrr.io/bioc/xcms/man/groupChromPeaks-mzClust.html#heading-2 .. _PeakDensity_manual: https://rdrr.io/bioc/xcms/man/groupChromPeaks-density.html#heading-2 .. _NearestPeaks_manual: https://rdrr.io/bioc/xcms/man/groupChromPeaks-nearest.html#heading-2 @HELP_XCMS_MANUAL@ @HELP_PEAKLIST@ ------------ Output files ------------ xset.group.Rplots.pdf xset.group.RData: rdata.xcms.group format | Rdata file that will be necessary in the third and fourth step of the workflow (xcms.retcor and xcms.fillpeaks). ------ .. class:: infomark The output file is an xset.group.RData file. You can continue your analysis using it in **xcms.retcor** tool. --------------------------------------------------- --------------- Working example --------------- Input files ----------- | RData file -> **xset.RData** Parameters ---------- | Method -> **density** | bw -> **5** | minFraction -> **0.3** | binSize -> **0.01** | Advanced options: **show** | maxFeatures -> **50** Output files ------------ | **1) xset.RData: RData file** | **2) Example of an xset.group.Rplots pdf file** .. image:: xcms_group.png :width: 700 --------------------------------------------------- Changelog/News -------------- **Version 3.0.0.0 - 14/02/2018** - UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlining codes and methods **Version 2.1.1 - 29/11/2017** - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C **Version 2.1.0 - 07/02/2017** - IMPROVEMENT: Add an option to export the peak list at this step without have to wait camara.annotate - IMPROVEMENT: xcms.group can deal with merged individual data from "xcms.xcmsSet Merger" - BUGFIX: the default value of "density" -> "Maximum number of groups to identify in a single m/z slice" which was of 5 have been changed to fix with the XMCS default values to 50 **Version 2.0.6 - 06/07/2016** - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 **Version 2.0.5 - 04/04/2016** - TEST: refactoring to pass planemo test using conda dependencies **Version 2.0.4 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed - UPDATE: refactoring of internal management of inputs/outputs - UPDATE: refactoring to feed the new report tool **Version 2.0.2 - 02/06/2015** - IMPROVEMENT: new datatype/dataset formats (rdata.xcms.raw, rdata.xcms.group, rdata.xcms.retcor ...) will facilitate the sequence of tools and so avoid incompatibility errors. - IMPROVEMENT: parameter labels have changed to facilitate their reading. ]]></help> <expand macro="citation" /> </tool>