comparison abims_xcms_xcmsSet.xml @ 32:2bf1cb023c94 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e384d6dd5f410799ec211f73bca0b5d5d7bc651e
author lecorguille
date Thu, 01 Mar 2018 04:14:39 -0500
parents e93153c07be0
children c363b9f1caef
comparison
equal deleted inserted replaced
31:e93153c07be0 32:2bf1cb023c94
1 <tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.1.1"> 1 <tool id="abims_xcms_xcmsSet" name="xcms findChromPeaks (xcmsSet)" version="@WRAPPER_VERSION@.0">
2 <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description> 2 <description>Chromatographic peak detection</description>
3 3
4 <macros> 4 <macros>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 7
8 <expand macro="requirements"/> 8 <expand macro="requirements"/>
9 <expand macro="stdio"/> 9 <expand macro="stdio"/>
10 10
11 <command><![CDATA[ 11 <command><![CDATA[
12 @COMMAND_XCMS_SCRIPT@ 12 @COMMAND_XCMS_SCRIPT@/xcms_xcmsSet.r
13 13
14 #if $input.is_of_type("mzxml") or $input.is_of_type("mzml") or $input.is_of_type("mzdata") or $input.is_of_type("netcdf"): 14 #if $input.is_of_type("mzxml") or $input.is_of_type("mzml") or $input.is_of_type("mzdata") or $input.is_of_type("netcdf"):
15 singlefile_galaxyPath '$input' singlefile_sampleName '$input.name' 15 singlefile_galaxyPath '$input' singlefile_sampleName '$input.name'
16 #else 16 #else
17 zipfile '$input' 17 zipfile '$input'
18 #end if 18 #end if
19 19
20 xfunction xcmsSet 20 BPPARAM \${GALAXY_SLOTS:-1}
21 21 method $methods.method
22 xsetRdataOutput '$xsetRData' 22
23 sampleMetadataOutput '$sampleMetadata' 23 #if $methods.method == "CentWave":
24 ticspdf '$ticsRawPdf'
25 bicspdf '$bpcsRawPdf'
26
27
28 #if $options_scanrange.option == "show":
29 scanrange "c($options_scanrange.scanrange)"
30 #end if
31
32 ## profmethod $profmethod
33 nSlaves \${GALAXY_SLOTS:-1} method $methods.method
34 #if $methods.method == "centWave":
35 ppm $methods.ppm 24 ppm $methods.ppm
36 peakwidth "c($methods.peakwidth)" 25 peakwidth "c($methods.peakwidth)"
37 #if $methods.options_c.option == "show": 26 ## Advanced
38 mzdiff $methods.options_c.mzdiff 27 snthresh $methods.CentWaveAdv.snthresh
39 snthresh $methods.options_c.snthresh 28 prefilter "c($methods.CentWaveAdv.prefilter)"
40 integrate $methods.options_c.integrate 29 mzCenterFun $methods.CentWaveAdv.mzCenterFun
41 noise $methods.options_c.noise 30 integrate $methods.CentWaveAdv.integrate
42 prefilter "c($methods.options_c.prefilter)" 31 mzdiff $methods.CentWaveAdv.mzdiff
32 fitgauss $methods.CentWaveAdv.fitgauss
33 noise $methods.CentWaveAdv.noise
34 verboseColumns $methods.CentWaveAdv.verboseColumns
35 #elif $methods.method == "MatchedFilter":
36 fwhm $methods.fwhm
37 binSize $methods.binSize
38 impute $methods.impute_cond.impute
39 #if $methods.impute_cond.impute == "linbase":
40 #if $methods.impute_cond.baseValue != "":
41 baseValue $methods.impute_cond.baseValue
42 #end if
43 distance $methods.impute_cond.distance
43 #end if 44 #end if
44 #elif $methods.method == "matchedFilter": 45 ## Advanced
45 step $methods.step 46 #if $methods.MatchedFilterAdv.sigma != "":
46 fwhm $methods.fwhm 47 sigma $methods.MatchedFilterAdv.sigma
47 #if $methods.options_m.option == "show":
48 ## sigma "$methods.options_m.sigma"
49 max $methods.options_m.max
50 snthresh $methods.options_m.snthresh
51 steps $methods.options_m.steps
52 mzdiff $methods.options_m.mzdiff
53 #end if 48 #end if
49 max $methods.MatchedFilterAdv.max
50 snthresh $methods.MatchedFilterAdv.snthresh
51 steps $methods.MatchedFilterAdv.steps
52 mzdiff $methods.MatchedFilterAdv.mzdiff
54 #elif $methods.method == "MSW": 53 #elif $methods.method == "MSW":
55 snthr $methods.snthr 54 snthresh $methods.snthresh
55 verboseColumns $methods.verboseColumns
56 scales "c($methods.scales)"
56 nearbyPeak $methods.nearbyPeak 57 nearbyPeak $methods.nearbyPeak
57 winSize.noise $methods.winSize_noise 58 ampTh $methods.ampTh
58 amp.Th $methods.amp_Th
59 scales "c($methods.scales)"
60 SNR.method "$methods.SNR_method"
61 #end if 59 #end if
62 @COMMAND_LOG_EXIT@ 60 @COMMAND_LOG_EXIT@
63 ]]></command> 61 ]]></command>
64 62
65 <inputs> 63 <inputs>
66 64
67 <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> 65 <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." />
68 66
69 <conditional name="options_scanrange"> 67 <!--@TODO <param argument="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" >
70 <param name="option" type="select" label="Scan range option " > 68 Should be replaced by MSnBase::filterAcquisition
71 <option value="show">show</option> 69 -->
72 <option value="hide" selected="true">hide</option> 70
73 </param>
74 <when value="show">
75 <param name="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" >
76 <validator type="empty_field"/>
77 </param>
78 </when>
79 <when value="hide">
80 </when>
81 </conditional>
82
83
84 <!--
85 <param name="profmethod" type="select" label="Method to use for profile generation (profmethod)" >
86 <option value="bin" selected="true">bin</option>
87 <option value="binlin">binlin</option>
88 <option value="binlinbase">binlinbase</option>
89 <option value="intlin">intlin</option>
90 </param>
91 -->
92 <conditional name="methods"> 71 <conditional name="methods">
93 <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below"> 72 <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below">
94 <option value="centWave" >centWave</option> 73 <option value="MatchedFilter" selected="true">MatchedFilter - peak detection in chromatographic space</option>
95 <option value="matchedFilter" selected="true">matchedFilter</option> 74 <option value="CentWave">CentWave - chromatographic peak detection using the centWave method</option>
96 <option value="MSW">MSW</option> 75 <option value="MSW">MSW - single-spectrum non-chromatography MS data peak detection</option>
97 </param> 76 </param>
98 77
99 <!-- centWave Filter options --> 78 <!-- centWave Filter options -->
100 <when value="centWave"> 79 <when value="CentWave">
101 <param name="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="[ppm]" /> 80 <param argument="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="for the initial ROI definition." />
102 <param name="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="[peakwidth]" /> 81 <param argument="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="with the expected approximate peak width in chromatographic space." />
103 82
104 <conditional name="options_c"> 83 <section name="CentWaveAdv" title="Advanced Options" expanded="False">
105 <param name="option" type="select" label="Advanced options" > 84 <param argument="snthresh" type="integer" value="10" label="Signal to Noise ratio cutoff" />
106 <option value="show">show</option> 85 <param argument="prefilter" type="text" value="3,100" label="Prefilter step for for the first analysis step (ROI detection)" help="Separate by coma k, I. Mass traces are only retained if they contain at least ‘k‘ peaks with intensity ‘>= I‘." />
107 <option value="hide" selected="true">hide</option> 86 <param argument="mzCenterFun" type="select" label="Name of the function to calculate the m/z center of the chromatographic peak" >
87 <option value="wMean">intensity weighted mean of the peak's m/z values</option>
88 <option value="mean">mean of the peak's m/z values</option>
89 <option value="apex">use the m/z value at the peak apex</option>
90 <option value="wMeanApex3">ntensity weighted mean of the m/z value at the peak apex and the m/z values left and right of it</option>
91 <option value="meanApex3">mean of the m/z value of the peak apex and the m/z values left and right of it</option>
108 </param> 92 </param>
109 <when value="show"> 93 <param argument="integrate" type="select" label="Integration method" >
110 <param name="snthresh" type="integer" value="10" label="Signal/Noise threshold" help="[snthresh] Signal to noise ratio cutoff" /> 94 <option value="1">peak limits are found through descent on the mexican hat filtered data (more robust, but less exact)</option>
111 <param name="mzdiff" type="float" value="-0.001" label="Minimum difference in m/z for peaks with overlapping retention times" help="[mzdiff] Can be negative to allow overlap" /> 95 <option value="2">peak limits based on real data (more accurate but prone to noise)</option>
112 <param name="integrate" type="select" label="peak limits method" help="[integrate]" > 96 </param>
113 <option value="1">peak limits based on smoothed 2nd derivative (less precise)</option> 97 <param argument="mzdiff" type="float" value="-0.001" label="Minimum difference in m/z for peaks with overlapping retention times" help="can be negative to allow overlap" />
114 <option value="2">peak limits based on real data (more sensitive to noise)</option> 98 <param argument="fitgauss" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="fitgauss" help="whether or not a Gaussian should be fitted to each peak" />
115 </param> 99 <param argument="noise" type="integer" value="0" label="Noise filter" help="allowing to set a minimum intensity required for centroids to be considered in the first analysis step (centroids with intensity lower than ‘noise’ are omitted from ROI detection)." />
116 <param name="prefilter" type="text" value="3,100" label="Prefilter step for the first phase" help="[prefilter] Separate by coma k,I. Mass traces are only retained if they contain at least ‘k’ peaks with intensity >= ‘I’"/> 100 <param argument="verboseColumns" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="verbose Columns" help="whether additional peak meta data columns should be returned" />
117 <param name="noise" type="integer" value="0" label="Noise filter" help="[noise] optional argument which is useful for data that was centroided without any intensity threshold, centroids with intensity smaller than ‘noise’ are omitted from ROI detection"/> 101 <!-- roiList -->
102 <!-- firstBaselineCheck -->
103 <!-- roiScales -->
104 </section>
105 </when>
106
107 <!-- matched Filter options -->
108 <when value="MatchedFilter">
109 <param argument="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="Only used to calculate the actual sigma" />
110 <param argument="binSize" type="float" value="0.1" label="Step size to use for profile generation" help="The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size. (Previously step)" />
111 <conditional name="impute_cond">
112 <param argument="impute" type="select" label="Method to be used for missing value imputation" help="(previously profmethod)">
113 <option value="none">none - no linear interpolation</option>
114 <option value="lin">lin - linear interpolation</option>
115 <option value="linbase">linbase - linear interpolation within a certain bin-neighborhood</option>
116 <option value="intlin">intlin - integral of the linearly interpolated data from plus to minus half the step size</option>
117 </param>
118 <when value="none" />
119 <when value="lin" />
120 <when value="linbase">
121 <param argument="baseValue" type="float" value="" optional="true" label="The base value to which empty elements should be set" help="The default for the ‘baseValue’ is half of the smallest value in ‘x’ (‘NA’s being removed)." />
122 <param argument="distance" type="integer" value="0" label="Number of non-empty neighboring element of an empty element that should be considered for linear interpolation." />
118 </when> 123 </when>
119 <when value="hide"> 124 <when value="intlin" />
120 </when>
121 </conditional> 125 </conditional>
122 </when> 126 <section name="MatchedFilterAdv" title="Advanced Options" expanded="False">
123 127 <param argument="sigma" type="float" value="" optional="true" label="Standard deviation (width) of matched filtration model peak" help="Leave it to empty to calculate it using fwhm by default at fwhm/2.3548" />
124 <!-- matched Filter options --> 128 <param argument="max" type="integer" value="5" label="Maximum number of peaks that are expected/will be identified per slice" />
125 <when value="matchedFilter"> 129 <param argument="snthresh" type="integer" value="10" label="Signal to Noise ratio cutoff" help="defining the signal to noise cutoff to be used in the chromatographic peak detection step" />
126 <param name="step" type="float" value="0.1" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" /> 130 <param argument="steps" type="integer" value="2" label="Number of bins to be merged before filtration" help="(i.e. the number of neighboring bins that will be joined to the slice in which filtration and peak detection will be performed)" />
127 <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" /> 131 <param argument="mzdiff" type="float" value="0.6" label="Minimum difference in m/z for peaks with overlapping Retention Times" help="By default: 0.8-step*steps " />
128 <conditional name="options_m"> 132 <!-- index -->
129 <param name="option" type="select" label="Advanced options" > 133 </section>
130 <option value="show">show</option>
131 <option value="hide" selected="true">hide</option>
132 </param>
133 <when value="show">
134 <!--<param name="sigma" type="float" value="12.739935451" label="Standard deviation (width) of matched filtration model peak" help="[sigma] By default: fwhm/2.3548" />-->
135 <param name="max" type="integer" value="5" label="Maximum number of peaks per extracted ion chromatogram" help="[max]" />
136 <param name="snthresh" type="integer" value="10" label="Signal to noise ratio cutoff" help="[snthresh]" />
137 <param name="steps" type="integer" value="2" label="Number of steps to merge prior to filtration" help="[steps] The peak identification algorithm combines a given number of EIBPCs prior to filtration and peak detection, as defined by the steps argument" />
138 <param name="mzdiff" type="float" value="0.6" label="Minimum difference in m/z for peaks with overlapping Retention Times" help="[mzdiff] By default: 0.8-step*steps " />
139 </when>
140 <when value="hide">
141 </when>
142 </conditional>
143 </when> 134 </when>
144 135
145 <!-- MSW Filter options --> 136 <!-- MSW Filter options -->
146 <when value="MSW"> 137 <when value="MSW">
147 <param name="nearbyPeak" type="select" label="Determine whether to include the nearby small peaks of major peaks" help="[nearbyPeak]" > 138 <!---@TODO <param argument="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" />-->
148 <option value="TRUE">TRUE</option> 139 <param argument="snthresh" type="integer" value="3" label="Signal to Noise ratio cutoff" help="" />
149 <option value="FALSE">FALSE</option> 140 <param argument="verboseColumns" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="verbose Columns" help="whether additional peak meta data columns should be returned" />
150 </param> 141 <param argument="scales" type="text" value="1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64" label="Scales of the Continuous Wavelet Transform (CWT)" help="Scales are linked to the width of the peaks that are to be detected." />
151 <param name="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" /> 142 <param argument="nearbyPeak" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Determine whether to include the nearby small peaks of major peaks" />
152 <param name="snthr" type="integer" value="3" label="SNR (Signal to Noise Ratio) threshold" help="[snthr]" /> 143 <!-- peakScaleRange -->
153 <param name="amp_Th" type="float" value="0.002" label="Minimum required relative amplitude of the peak" help="[amp.Th] Ratio to the maximum of CWT coefficients" /> 144 <param argument="ampTh" type="float" value="0.01" label="Minimum required relative amplitude of the peak" help="Ratio to the maximum of CWT coefficients" />
154 <param name="scales" type="text" value="seq(1,22,3)" label="Scales for the Continuous Wavelet Transform (CWT)" help="[scales] Scales are linked to the width of the peaks that are to be detected. Tape as indicaded seq('n,n,n') or c(n,n) : seq(from, to, by steps), c - linear vector " /> 145 <!-- minNoiseLevel -->
155 <param name="SNR_method" type="text" value="data.mean" label="SNR (Signal to Noise Ratio) method" help="[SNR.method] Method to estimate noise level. Currently, only 95 percentage quantile is supported." /> 146 <!-- ridgeLength -->
147 <!-- peakThr -->
148 <!-- tuneIn -->
149 <!---@TODO <param argument="SNR_method" type="text" value="data.mean" label="SNR (Signal to Noise Ratio) method" help="[SNR.method] Method to estimate noise level. Currently, only 95 percentage quantile is supported." />-->
156 </when> 150 </when>
157 </conditional> 151 </conditional>
158 </inputs> 152 </inputs>
159 153
160 <outputs> 154 <outputs>
161 <data name="xsetRData" format="rdata.xcms.raw" label="${input.name.rsplit('.',1)[0]}.xset.RData" /> 155 <data name="xsetRData" format="rdata.xcms.raw" label="${input.name.rsplit('.',1)[0]}.xset.RData" from_work_dir="xcmsSet.RData" />
162 <data name="sampleMetadata" format="tabular" label="${input.name.rsplit('.',1)[0]}.sampleMetadata.tsv"> 156 <data name="sampleMetadata" format="tabular" label="${input.name.rsplit('.',1)[0]}.sampleMetadata.tsv" from_work_dir="sampleMetadata.tsv" >
163 <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter> 157 <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter>
164 </data> 158 </data>
165 <data name="ticsRawPdf" format="pdf" label="${input.name.rsplit('.',1)[0]}.xset.TICs_raw.pdf" /> 159 <data name="ticsRawPdf" format="pdf" label="${input.name.rsplit('.',1)[0]}.xset.TICs_raw.pdf" from_work_dir="TICs.pdf" />
166 <data name="bpcsRawPdf" format="pdf" label="${input.name.rsplit('.',1)[0]}.xset.BPCs_raw.pdf" /> 160 <data name="bpcsRawPdf" format="pdf" label="${input.name.rsplit('.',1)[0]}.xset.BPCs_raw.pdf" from_work_dir="BICs.pdf" />
167 <data name="log" format="txt" label="${input.name.rsplit('.',1)[0]}.xset.log.txt" /> 161 <data name="log" format="txt" label="${input.name.rsplit('.',1)[0]}.xset.log.txt" from_work_dir="log.txt" />
168 </outputs> 162 </outputs>
169 163
170 <tests> 164 <tests>
171 <!--<test> 165
166 <test>
167 <param name="input" value="faahKO_reduce.zip" ftype="zip" />
168 <conditional name="methods">
169 <param name="method" value="CentWave" />
170 <param name="ppm" value="25" />
171 <param name="peakwidth" value="20,50" />
172 </conditional>
173 <assert_stdout>
174 <has_text text="ppm: 25" />
175 <has_text text="peakwidth: 20, 50" />
176 <has_text text="object with 4 samples" />
177 <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" />
178 <has_text text="Mass range: 200.1-600 m/z" />
179 <has_text text="Peaks: 9251 (about 2313 per sample)" />
180 <has_text text="Peak Groups: 0" />
181 <has_text text="Sample classes: KO, WT" />
182 </assert_stdout>
183 </test>
184 <test>
185 <param name="input" value="MM14.mzML" ftype="mzxml" />
186 <conditional name="methods">
187 <param name="method" value="CentWave" />
188 <param name="ppm" value="56" />
189 <param name="peakwidth" value="5.275,13.5" />
190 </conditional>
191 <assert_stdout>
192 <has_text text="ppm: 56" />
193 <has_text text="peakwidth: 5.275, 13.5" />
194 <has_text text="object with 1 samples" />
195 <has_text text="Time range: 271-307.3 seconds (4.5-5.1 minutes)" />
196 <has_text text="Mass range: 117.0357-936.7059 m/z" />
197 <has_text text="Peaks: 222 (about 222 per sample)" />
198 <has_text text="Peak Groups: 0" />
199 <has_text text="Sample classes: ." />
200 </assert_stdout>
201 </test>
202 <!-- DISABLE FOR TRAVIS
203 Useful to generate test-data for the further steps
204 <test>
205 <param name="input" value="ko15.CDF" ftype="netcdf" />
206 <conditional name="methods">
207 <param name="method" value="CentWave" />
208 <param name="ppm" value="25" />
209 <param name="peakwidth" value="20,50" />
210 </conditional>
211 <assert_stdout>
212 <has_text text="object with 1 samples" />
213 <has_text text="Time range: 2506.1-4471.7 seconds (41.8-74.5 minutes)" />
214 <has_text text="Mass range: 200.2-600 m/z" />
215 <has_text text="Peaks: 2262 (about 2262 per sample)" />
216 <has_text text="Peak Groups: 0" />
217 <has_text text="Sample classes: ." />
218 </assert_stdout>
219 </test>
220 <test>
221 <param name="input" value="ko16.CDF" ftype="netcdf" />
222 <conditional name="methods">
223 <param name="method" value="CentWave" />
224 <param name="ppm" value="25" />
225 <param name="peakwidth" value="20,50" />
226 </conditional>
227 <assert_stdout>
228 <has_text text="object with 1 samples" />
229 <has_text text="Time range: 2521.7-4477.9 seconds (42-74.6 minutes)" />
230 <has_text text="Mass range: 200.1-600 m/z" />
231 <has_text text="Peaks: 2408 (about 2408 per sample)" />
232 <has_text text="Peak Groups: 0" />
233 <has_text text="Sample classes: ." />
234 </assert_stdout>
235 </test>
236 <test>
237 <param name="input" value="wt15.CDF" ftype="netcdf" />
238 <conditional name="methods">
239 <param name="method" value="CentWave" />
240 <param name="ppm" value="25" />
241 <param name="peakwidth" value="20,50" />
242 </conditional>
243 <assert_stdout>
244 <has_text text="object with 1 samples" />
245 <has_text text="Time range: 2517-4473.2 seconds (42-74.6 minutes)" />
246 <has_text text="Mass range: 200.2-599.8 m/z" />
247 <has_text text="Peaks: 2278 (about 2278 per sample)" />
248 <has_text text="Peak Groups: 0" />
249 <has_text text="Sample classes: ." />
250 </assert_stdout>
251 </test>
252 <test>
253 <param name="input" value="wt16.CDF" ftype="netcdf" />
254 <conditional name="methods">
255 <param name="method" value="CentWave" />
256 <param name="ppm" value="25" />
257 <param name="peakwidth" value="20,50" />
258 </conditional>
259 <assert_stdout>
260 <has_text text="object with 1 samples" />
261 <has_text text="Time range: 2521.7-4468.5 seconds (42-74.5 minutes)" />
262 <has_text text="Mass range: 200.3-600 m/z" />
263 <has_text text="Peaks: 2303 (about 2303 per sample)" />
264 <has_text text="Peak Groups: 0" />
265 <has_text text="Sample classes: ." />
266 </assert_stdout>
267 </test>
268 -->
269 <!-- DISABLE FOR TRAVIS
270 Test to test the different methods parameters
271 <test>
272 <param name="input" value="MM8.mzML" ftype="mzxml" />
273 <conditional name="methods">
274 <param name="method" value="MatchedFilter" />
275 <param name="fwhm" value="35" />
276 <conditional name="impute_cond">
277 <param name="impute" value="linbase" />
278 <param name="baseValue" value="0.1" />
279 <param name="distance" value="1" />
280 </conditional>
281 </conditional>
282 <assert_stdout>
283 <has_text text="fwhm: 35" />
284 <has_text text="impute: linbase" />
285 <has_text text="baseValue: 0.1" />
286 <has_text text="distance: 1" />
287 </assert_stdout>
288 </test>
289 <test>
290 <param name="input" value="MM8.mzML" ftype="mzxml" />
291 <conditional name="methods">
292 <param name="method" value="MSW" />
293 <param name="snthresh" value="4" />
294 <param name="verboseColumns" value="true" />
295 </conditional>
296 <assert_stdout>
297 <has_text text="snthresh: 4" />
298 <has_text text="verboseColumns: TRUE" />
299 </assert_stdout>
300 </test>
301 -->
302 <!-- DISABLE FOR TRAVIS
303 No more test-data/sacuri_dir_root.zip
304 <test>
172 <param name="input" value="sacuri_dir_root.zip" ftype="zip" /> 305 <param name="input" value="sacuri_dir_root.zip" ftype="zip" />
173 <param name="methods|method" value="matchedFilter" /> 306 <conditional name="methods">
174 <param name="methods|step" value="0.01" /> 307 <param name="method" value="MatchedFilter" />
175 <param name="methods|fwhm" value="4" /> 308 <param name="step" value="0.01" />
176 <param name="methods|options_m|option" value="show" /> 309 <param name="fwhm" value="4" />
177 <param name="methods|options_m|max" value="50" /> 310 <conditional name="options_m">
178 <param name="methods|options_m|snthresh" value="1" /> 311 <param name="option" value="show" />
179 <param name="methods|options_m|steps" value="2" /> 312 <param name="max" value="50" />
180 <output name="log"> 313 <param name="snthresh" value="1" />
181 <assert_contents> 314 <param name="steps" value="2" />
182 <has_text text="object with 4 samples" /> 315 </conditional>
183 <has_text text="Time range: 0.7-1139.7 seconds (0-19 minutes)" /> 316 </conditional>
184 <has_text text="Mass range: 50.0021-999.9863 m/z" /> 317 <assert_stdout>
185 <has_text text="Peaks: 59359 (about 14840 per sample)" /> 318 <has_text text="object with 4 samples" />
186 <has_text text="Peak Groups: 0" /> 319 <has_text text="Time range: 0.7-1139.7 seconds (0-19 minutes)" />
187 <has_text text="Sample classes: bio, blank" /> 320 <has_text text="Mass range: 50.0021-999.9863 m/z" />
188 </assert_contents> 321 <has_text text="Peaks: 59359 (about 14840 per sample)" />
189 </output> 322 <has_text text="Peak Groups: 0" />
190 </test> 323 <has_text text="Sample classes: bio, blank" />
324 </assert_stdout>
325 </test>
326 -->
327 <!-- DISABLE FOR TRAVIS
328 No more test-data/sacuri_current_root.zip
191 <test> 329 <test>
192 <param name="input" value="sacuri_current_root.zip" ftype="zip" /> 330 <param name="input" value="sacuri_current_root.zip" ftype="zip" />
193 <param name="methods|method" value="centWave" /> 331 <conditional name="methods">
194 <param name="methods|ppm" value="25" /> 332 <param name="method" value="CentWave" />
195 <param name="methods|peakwidth" value="20,50" /> 333 <param name="ppm" value="25" />
196 <output name="log"> 334 <param name="peakwidth" value="20,50" />
197 <assert_contents> 335 </conditional>
198 <has_text text="object with 4 samples" /> 336 <assert_stdout>
199 <has_text text="Time range: 3.5-1139.2 seconds (0.1-19 minutes)" /> 337 <has_text text="object with 4 samples" />
200 <has_text text="Mass range: 57.9756-593.4086 m/z" /> 338 <has_text text="Time range: 3.5-1139.2 seconds (0.1-19 minutes)" />
201 <has_text text="Peaks: 1535 (about 384 per sample)" /> 339 <has_text text="Mass range: 57.9756-593.4086 m/z" />
202 <has_text text="Peak Groups: 0" /> 340 <has_text text="Peaks: 1535 (about 384 per sample)" />
203 <has_text text="Sample classes: bio, blank" /> 341 <has_text text="Peak Groups: 0" />
204 </assert_contents> 342 <has_text text="Sample classes: bio, blank" />
205 </output> 343 </assert_stdout>
206 </test>--> 344 </test>
207 <test> 345 -->
208 <param name="input" value="faahKO_reduce.zip" ftype="zip" />
209 <param name="methods|method" value="centWave" />
210 <param name="methods|ppm" value="25" />
211 <param name="methods|peakwidth" value="20,50" />
212 <output name="log">
213 <assert_contents>
214 <has_text text="object with 4 samples" />
215 <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" />
216 <has_text text="Mass range: 200.1-600 m/z" />
217 <has_text text="Peaks: 9251 (about 2313 per sample)" />
218 <has_text text="Peak Groups: 0" />
219 <has_text text="Sample classes: KO, WT" />
220 </assert_contents>
221 </output>
222 </test>
223 <!-- Passed but disable to save time for Travis" -->
224 <!--<test>
225 <param name="input" value="ko15.CDF" ftype="netcdf" />
226 <param name="methods|method" value="centWave" />
227 <param name="methods|ppm" value="25" />
228 <param name="methods|peakwidth" value="20,50" />
229 <output name="log">
230 <assert_contents>
231 <has_text text="object with 1 samples" />
232 <has_text text="Time range: 2506.1-4471.7 seconds (41.8-74.5 minutes)" />
233 <has_text text="Mass range: 200.2-600 m/z" />
234 <has_text text="Peaks: 2262 (about 2262 per sample)" />
235 <has_text text="Peak Groups: 0" />
236 <has_text text="Sample classes: ." />
237 </assert_contents>
238 </output>
239 </test>
240 <test>
241 <param name="input" value="ko16.CDF" ftype="netcdf" />
242 <param name="methods|method" value="centWave" />
243 <param name="methods|ppm" value="25" />
244 <param name="methods|peakwidth" value="20,50" />
245 <output name="log">
246 <assert_contents>
247 <has_text text="object with 1 samples" />
248 <has_text text="Time range: 2521.7-4477.9 seconds (42-74.6 minutes)" />
249 <has_text text="Mass range: 200.1-600 m/z" />
250 <has_text text="Peaks: 2408 (about 2408 per sample)" />
251 <has_text text="Peak Groups: 0" />
252 <has_text text="Sample classes: ." />
253 </assert_contents>
254 </output>
255 </test>
256 <test>
257 <param name="input" value="wt15.CDF" ftype="netcdf" />
258 <param name="methods|method" value="centWave" />
259 <param name="methods|ppm" value="25" />
260 <param name="methods|peakwidth" value="20,50" />
261 <output name="log">
262 <assert_contents>
263 <has_text text="object with 1 samples" />
264 <has_text text="Time range: 2517-4473.2 seconds (42-74.6 minutes)" />
265 <has_text text="Mass range: 200.2-599.8 m/z" />
266 <has_text text="Peaks: 2278 (about 2278 per sample)" />
267 <has_text text="Peak Groups: 0" />
268 <has_text text="Sample classes: ." />
269 </assert_contents>
270 </output>
271 </test>
272 <test>
273 <param name="inputs|input" value="single_file" />
274 <param name="inputs|single_file" value="wt16.CDF" ftype="netcdf" />
275 <param name="methods|method" value="centWave" />
276 <param name="methods|ppm" value="25" />
277 <param name="methods|peakwidth" value="20,50" />
278 <output name="log">
279 <assert_contents>
280 <has_text text="object with 1 samples" />
281 <has_text text="Time range: 2521.7-4468.5 seconds (42-74.5 minutes)" />
282 <has_text text="Mass range: 200.3-600 m/z" />
283 <has_text text="Peaks: 2303 (about 2303 per sample)" />
284 <has_text text="Peak Groups: 0" />
285 <has_text text="Sample classes: ." />
286 </assert_contents>
287 </output>
288 </test>-->
289 <test>
290 <param name="input" value="HU_neg_017.mzXML" ftype="mzxml" />
291 <param name="methods|method" value="centWave" />
292 <param name="methods|ppm" value="25" />
293 <param name="methods|peakwidth" value="20,50" />
294 <output name="log">
295 <assert_contents>
296 <has_text text="object with 1 samples" />
297 <has_text text="Time range: 3.5-1139.1 seconds (0.1-19 minutes)" />
298 <has_text text="Mass range: 57.9756-556.8128 m/z" />
299 <has_text text="Peaks: 380 (about 380 per sample)" />
300 <has_text text="Peak Groups: 0" />
301 <has_text text="Sample classes: ." />
302 </assert_contents>
303 </output>
304 </test>
305 <test>
306 <param name="input" value="MM14.mzML" ftype="mzxml" />
307 <param name="methods|method" value="centWave" />
308 <param name="methods|ppm" value="56" />
309 <param name="methods|peakwidth" value="5.275,13.5" />
310 <output name="log">
311 <assert_contents>
312 <has_text text="object with 1 samples" />
313 <has_text text="Time range: 271-307.3 seconds (4.5-5.1 minutes)" />
314 <has_text text="Mass range: 117.0357-936.7059 m/z" />
315 <has_text text="Peaks: 222 (about 222 per sample)" />
316 <has_text text="Peak Groups: 0" />
317 <has_text text="Sample classes: ." />
318 </assert_contents>
319 </output>
320 </test>
321 </tests> 346 </tests>
322 347
323 <help><![CDATA[ 348 <help><![CDATA[
324 349
325 @HELP_AUTHORS@ 350 @HELP_AUTHORS@
326 351
327 ============ 352 ===================
328 Xcms.xcmsSet 353 xcms findChromPeaks
329 ============ 354 ===================
330 355
331 ----------- 356 -----------
332 Description 357 Description
333 ----------- 358 -----------
334 359
335 This tool is used for preprocessing analyte data from multiple LC/MS files (formats NetCDF, mzXML and mzData). It extracts ion from each sample independently and using a statistic model, peaks are filtered and integrated. 360 This tool is used for preprocessing data from multiple LC/MS files (formats NetCDF, mzXML and mzData) using the xcms_ R package. It extracts ion from each sample independently and using a statistic model, peaks are filtered and integrated.
336 You can read a tutorial on how to perform xcms preprocessing which is available here_. 361 You can read a tutorial on how to perform xcms preprocessing which is available here_.
337 362
363 .. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html
338 .. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToPerformXcmsPreprocessing_v02.pdf 364 .. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToPerformXcmsPreprocessing_v02.pdf
339 365
340 366
341 ----------------- 367 -----------------
342 Workflow position 368 Workflow position
402 ----------------------------------------- 428 -----------------------------------------
403 429
404 **Step1: Creating your directory and hierarchize the subdirectories** 430 **Step1: Creating your directory and hierarchize the subdirectories**
405 431
406 432
407 VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip software (http://www.7-zip.org/), otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug). 433 VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip_ software, otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug).
434
435 .. _7Zip: http://www.7-zip.org/
408 436
409 Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild): 437 Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild):
410 arabidopsis/wild/01.raw 438 arabidopsis/wild/01.raw
411 arabidopsis/mutant/01.raw 439 arabidopsis/mutant/01.raw
412 440
416 444
417 **Step 3 : Uploading it to our Galaxy server** 445 **Step 3 : Uploading it to our Galaxy server**
418 446
419 If your zip file is less than 2Gb, you get use the Get Data tool to upload it. 447 If your zip file is less than 2Gb, you get use the Get Data tool to upload it.
420 448
421 Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO on workflow4metabolomics.org (http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf). 449 Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO_ on workflow4metabolomics.org.
450
451 .. _HOWTO: http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf
422 452
423 For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org). 453 For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org).
424 454
425 Advices for converting your files for the XCMS input 455 Advices for converting your files for the XCMS input
426 ---------------------------------------------------- 456 ----------------------------------------------------
449 ---------- 479 ----------
450 480
451 Extraction method for peaks detection 481 Extraction method for peaks detection
452 ------------------------------------- 482 -------------------------------------
453 483
454 **Matched Filter** 484 **CentWave**
455 485
456 | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm. 486 | The centWave algorithm perform peak density and wavelet based chromatographic peak detection for high resolution LC/MS data in centroid mode [Tautenhahn 2008].
457 | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002).
458
459
460 **cent Wave**
461
462 | This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode.
463 | Due to the fact that peak centroids are used, a binning step is not necessary. 487 | Due to the fact that peak centroids are used, a binning step is not necessary.
464 | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals. 488 | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals.
489 | See the CentWave_manual_
490
491 **MatchedFilter**
492
493 | The _matchedFilter_ algorithm identifies peaks in the chromatographic time domain as described in [Smith 2006]. The intensity values are binned by cutting The LC/MS data into slices (bins) of a mass unit (‘binSize’ m/z) wide. Within each bin the maximal intensity is selected. The chromatographic peak detection is then performed in each bin by extending it based on the ‘steps’ parameter to generate slices comprising bins ‘current_bin - steps +1’ to ‘current_bin + steps - 1’. Each of these slices is then filtered with matched filtration using a second-derative Gaussian as the model peak shape. After filtration peaks are detected using a signal-to-ratio cut-off. For more details and illustrations see [Smith 2006].
494 | See the MatchedFilter_manual_
465 495
466 **MSW** 496 **MSW**
467 497
468 | Wavelet based, used for direct infusion data. Continuous wavelet transform (CWT) can be used to locate chromatographic peaks on different scales. 498 | Wavelet based, used for direct infusion data. Continuous wavelet transform (CWT) can be used to locate chromatographic peaks on different scales.
469 | If you wish to have more details about the other parameters, you can read the following documents: 499 | See the MSW_manual_
470 | -Example of preprocessing data with XCMS : http://www.bioconductor.org/packages/2.12/bioc/vignettes/xcms/inst/doc/xcmsPreprocess.pdf 500
471 | -Details and explanations for all the parameters of XCMS package: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf 501 .. _CentWave_manual: https://rdrr.io/bioc/xcms/man/findChromPeaks-centWave.html#heading-2
472 502 .. _MatchedFilter_manual: https://rdrr.io/bioc/xcms/man/findChromPeaks-matchedFilter.html#heading-2
503 .. _MSW_manual: https://rdrr.io/bioc/xcms/man/findPeaks-MSW.html#heading-2
504
505 @HELP_XCMS_MANUAL@
473 506
474 ------------ 507 ------------
475 Output files 508 Output files
476 ------------ 509 ------------
477 510
555 --------------------------------------------------- 588 ---------------------------------------------------
556 589
557 Changelog/News 590 Changelog/News
558 -------------- 591 --------------
559 592
593 **Version 3.0.0.0 - 14/02/2018**
594
595 - UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlining codes and methods
596
597 - NEW: a bunch of new options: CentWave.mzCenterFun, CentWave.fitgauss, CentWave.verboseColumns, MatchedFilter.sigma
598
599 - UPDATE: since xcms 3.0.0, some options are no more available: scanrange, profmethod, MatchedFilter.step, MatchedFilter.sigma, MSW.winSize.noise, MSW.SNR.method
600
560 **Version 2.1.1 - 29/11/2017** 601 **Version 2.1.1 - 29/11/2017**
561 602
562 - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C 603 - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C
563 604
564 **Version 2.1.0 - 22/02/2017** 605 **Version 2.1.0 - 22/02/2017**
581 622
582 **Version 2.0.8 - 06/04/2016** 623 **Version 2.0.8 - 06/04/2016**
583 624
584 - TEST: refactoring to pass planemo test using conda dependencies 625 - TEST: refactoring to pass planemo test using conda dependencies
585 626
586
587 **Version 2.0.7 - 10/02/2016** 627 **Version 2.0.7 - 10/02/2016**
588 628
589 - BUGFIX: better management of errors. Datasets remained green although the process failed 629 - BUGFIX: better management of errors. Datasets remained green although the process failed
590 630
591 - BUGFIX/IMPROVEMENT: New checking steps around the imported data in order to raise explicte error message before or after launch XCMS: checking of bad characters in the filenames, checking of the XML integrity and checking of duplicates which can appear in the sample names during the XCMS process because of bad characters 631 - BUGFIX/IMPROVEMENT: New checking steps around the imported data in order to raise explicte error message before or after launch XCMS: checking of bad characters in the filenames, checking of the XML integrity and checking of duplicates which can appear in the sample names during the XCMS process because of bad characters
594 634
595 - UPDATE: refactoring of internal management of inputs/outputs 635 - UPDATE: refactoring of internal management of inputs/outputs
596 636
597 - TEST: refactoring to feed the new report tool 637 - TEST: refactoring to feed the new report tool
598 638
599
600 **Version 2.0.2 - 18/01/2016** 639 **Version 2.0.2 - 18/01/2016**
601 640
602 - BUGFIX: Some zip files were tag as "corrupt" by R. We have changed the extraction mode to deal with thoses cases. 641 - BUGFIX: Some zip files were tag as "corrupt" by R. We have changed the extraction mode to deal with thoses cases.
603 642
604
605 **Version 2.0.2 - 09/10/2015** 643 **Version 2.0.2 - 09/10/2015**
606 644
607 - BUGFIX: Some users reported a bug in xcms.xcmsSet. The preprocessing stops itself and doesn't import the whole dataset contained in the zip file without warning. But meanwhile, please check your samplemetadata dataset and the number of rows. 645 - BUGFIX: Some users reported a bug in xcms.xcmsSet. The preprocessing stops itself and doesn't import the whole dataset contained in the zip file without warning. But meanwhile, please check your samplemetadata dataset and the number of rows.
608
609 646
610 **Version 2.0.2 - 02/06/2015** 647 **Version 2.0.2 - 02/06/2015**
611 648
612 - NEW: The W4M workflows will now take as input a zip file to ease the transfer and to improve dataset exchange between tools and users. (See How_to_upload). The previous "Library directory name" is still available but we invite user to switch on the new zip system as soon as possible. 649 - NEW: The W4M workflows will now take as input a zip file to ease the transfer and to improve dataset exchange between tools and users. (See How_to_upload). The previous "Library directory name" is still available but we invite user to switch on the new zip system as soon as possible.
613 650