comparison process_scans.xml @ 0:d129e75a31d9 draft

planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit d30de6d202e1b97aaca189acc612ae87e95d033f
author rjmw
date Tue, 27 Feb 2018 14:04:01 -0500
parents
children 769165c75514
comparison
equal deleted inserted replaced
-1:000000000000 0:d129e75a31d9
1 <tool id="dimspy_process_scans" name="Process Scans (and SIM-Stitch)" version="1.0.0">
2 <description> - Read, filter and average MS scans</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="stdio" />
8 <command><![CDATA[
9 #if $input.format == "data_collection"
10 #for $fn in $input.source
11 #if str( $fn ).endswith(".dat")
12 ln -s "$fn" "$fn.name"
13 &&
14 #end if
15 #end for
16 #elif $input.format == "single_file"
17 #if str( $input.source ).endswith(".dat")
18 ln -s "$input.source" "$input.source.name"
19 &&
20 #end if
21 #end if
22 dimspy process-scans
23 #if $input.format == "data_collection"
24 #for $fn in $input.source
25 --input "$fn.name"
26 #end for
27 #elif $input.format == "single_file"
28 --input "$input.source.name"
29 #elif $input.format == "library"
30 --input $__app__.config.user_library_import_dir/$__user_email__/$input.source
31 #else
32 --input "$input.source"
33 #end if
34 --output "$hdf5_file_out"
35 #if $filelist
36 --filelist "$filelist"
37 #end if
38 --function-noise $function_noise
39 --snr-threshold $snr_threshold
40 --ppm $mults.ppm
41 --min_scans $mults.min_scans
42 #if float($mults.min_fraction) > 0.0
43 --min-fraction $mults.min_fraction
44 #else
45 --min-fraction 0.0
46 #end if
47 #if float($mults.rsd_threshold) > 0.0
48 --rsd-threshold $mults.rsd_threshold
49 #end if
50 #if $adv.skip_stitching
51 --skip-stitching
52 #end if
53 #if float($adv.ringing_threshold) > 0.0
54 --ringing_threshold $adv.ringing_threshold
55 #end if
56 #for $mzr in $adv.remove_mz_range
57 --remove-mz-range $mzr.start $mzr.end
58 #end for
59 #if $scan_events.filter == "true"
60 #for $se in $scan_events.descriptions
61 #if $scan_events.incl_excl == "include"
62 --include-scan-events $se.start $se.end $se.scan_type
63 #elif $scan_events.incl_excl == "exclude"
64 --exclude-scan-events $se.start $se.end $se.scan_type
65 #end if
66 #end for
67 #end if
68 --report "$report"
69 &&
70 dimspy hdf5-pls-to-txt
71 --input "$hdf5_file_out"
72 --output .
73 --delimiter $delimiter
74 ]]></command>
75 <inputs>
76 <conditional name="input">
77 <param name="format" type="select" label="Choose the source for the dataset" >
78 <option value="zip_file" selected="true">Zip file from your History containing *.mzML files</option>
79 <option value="library">Library directory name</option>
80 <option value="data_collection">Data collection (*.mzML or *.raw files)</option>
81 <option value="single_file">Single *.mzML or *.raw file</option>
82 </param>
83 <when value="zip_file">
84 <param name="source" type="data" format="zip" label="Zip file containing *.mzml or *.raw files" argument="--source">
85 <validator type="empty_field" />
86 </param>
87 </when>
88 <when value="library">
89 <param name="source" type="text" size="40" label="Library directory containing *.mzml or *.raw files" argument="--source">
90 <validator type="empty_field" />
91 </param>
92 </when>
93 <when value="data_collection">
94 <param name="source" type="data_collection" format="mzml,thermo.raw,raw" label="Data collection of *.mzml or *.raw files" argument="--source" >
95 <validator type="empty_field" />
96 </param>
97 </when>
98 <when value="single_file">
99 <param name="source" type="data" format="mzml,thermo.raw,raw" label="Single *.mzml or *.raw" argument="--source" >
100 <validator type="empty_field" />
101 </param>
102 </when>
103 </conditional>
104 <param name="filelist" type="data" optional="true" format="tsv,tabular" label="Filelist / Samplelist" argument="--filelist" />
105 <param name="function_noise" type="select" label="Function to calculate the noise from each scan" help="" argument="--function-noise">
106 <option value="median" selected="true">median intensity</option>
107 <option value="mean">mean intensity</option>
108 <option value="mad">mad (mean absolute deviation) intensity</option>
109 <option value="noise_packets">As shown in Xcalibur Qual Browser (Available for *.RAW files only)</option>
110 </param>
111 <param name="snr_threshold" type="float" value="3.0" label="Signal-to-noise ratio threshold" help="" argument="--snr-threshold" />
112 <conditional name="scan_events">
113 <param name="filter" type="boolean" label="Filter specific windows or scan events?" help="(--include-scan-events / --exclude-scan-events)"/>
114 <when value="true">
115 <param name="incl_excl" type="select" label="Include / Exclude scan event(s)" >
116 <option value="exclude" selected="true">Exclude</option>
117 <option value="include">Include</option>
118 </param>
119 <repeat name="descriptions" title="Description">
120 <param name="start" type="float" value="0" label="Start m/z for scan event"/>
121 <param name="end" type="float" value="0" label="End m/z for scan event">
122 <validator type="expression" message="M/z value must be larger than 0.0">float(value) > 0.0</validator>
123 </param>
124 <param name="scan_type" type="select" label="Scan type">
125 <option value="full" selected="true">Full scan</option>
126 <option value="sim">SIM scan</option>
127 </param>
128 </repeat>
129 </when>
130 <when value="false">
131 </when>
132 </conditional>
133 <section name="mults" title="Show options for multiple scans" expanded="True">
134 <param name="min_scans" type="integer" value="1" min="1" label="Minimum number of scans required for each m/z window or event" help="" argument="--min_scans" />
135 <param name="ppm" type="float" value="2.0" label="Ppm error tolerance" help="Maximum tolerated m/z deviation in consecutive scans in parts per million." argument="--ppm" />
136 <param name="min_fraction" min="0.0" max="1.0" type="float" value="0.0" label="Minimum fraction (i.e. percentage) of scans a peak has to be present in." help="Select '0' to skip this step." argument="--min-fraction" />
137 <param name="rsd_threshold" type="float" value="0.0" min="0.0" label="Relative standard deviation threshold" help="Select '0' to skip this step. Maximum tolerated relative standard deviation (RSD) of the peak intensities across scans." argument="--rsd-threshold" />
138 </section>
139 <section name="adv" title="Show advanced options" expanded="True">
140 <param name="skip_stitching" type="boolean" value="false" label="Skip SIM-Stitching?" help="When set to 'yes' it will skip the processing step where (SIM) windows are 'stitched' or 'joined' together. Set this option to 'yes' if you like to proces individual scan/SIM windows (events/ranges) without 'stitching' them."/>
141 <repeat name="remove_mz_range" title="Remove m/z range(s)?">
142 <param name="start" type="float" value="0.0" label="Start m/z of removal range"/>
143 <param name="end" type="float" value="0.0" label="End m/z of removal range">
144 <validator type="expression" message="M/z value must be larger than 0.0">float(value) > 0.0</validator>
145 </param>
146 </repeat>
147 <param name="ringing_threshold" type="float" value="0.0" min="0.0" max="1.0" label="Relative intensity threshold used to remove ringing artifacts" help="Select '0' to skip this filter." argument="--ringing-threshold" />
148 </section>
149 <param name="delimiter" type="hidden" value="tab" argument="--delimiter" />
150 </inputs>
151 <outputs>
152 <data name="hdf5_file_out" format="h5" label="${tool.name} on ${on_string}: Peaklists (HDF5 file)" />
153 <data name="report" format="txt" label="${tool.name} on ${on_string}: Report" />
154 <collection name="peaklists_txt" type="list" label="${tool.name} on ${on_string}: Peaklists">
155 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" format="tsv" directory="." visible="false" />
156 </collection>
157
158 </outputs>
159 <tests>
160 <test>
161 <param name="input|format" value="zip_file" />
162 <param name="input|source" value="MTBLS79_mzml_triplicates.zip" ftype="zip" />
163 <param name="filelist" value="filelist_mzml_triplicates.txt" ftype="tsv" />
164 <param name="function" value="median" />
165 <param name="snr_threshold" value="10.0" />
166 <param name="mults|ppm" value="2.0" />
167 <param name="mults|min_scans" value="1" />
168 <param name="mults|min_fraction" value="0.5" />
169 <param name="mults|rsd_threshold" value="0" />
170 <param name="delimiter" value="tab" />
171 <output name="hdf5_file_out" value="pls.h5" ftype="h5" compare="sim_size" />
172 <output name="report" value="report_pls_01.xt" ftype="txt"/>
173 <output_collection name="peaklists_txt" type="list">
174 <element name="batch04_QC17_rep01_262" file="batch04_QC17_rep01_262.txt" ftype="tsv"/>
175 <element name="batch04_QC17_rep02_263" file="batch04_QC17_rep02_263.txt" ftype="tsv"/>
176 <element name="batch04_QC17_rep03_264" file="batch04_QC17_rep03_264.txt" ftype="tsv"/>
177 </output_collection>
178 </test>
179 <test>
180 <param name="input|format" value="zip_file" />
181 <param name="input|source" value="batch_04_QC18_mzml_triplicate.zip" ftype="zip" />
182 <param name="function" value="median" />
183 <param name="snr_threshold" value="10.0" />
184 <param name="mults|ppm" value="2.0" />
185 <param name="mults|min_scans" value="1" />
186 <param name="mults|min_fraction" value="0.8" />
187 <param name="mults|rsd_threshold" value="20.0" />
188 <param name="delimiter" value="tab" />
189 <output name="hdf5_file_out" value="pls_QC18.h5" ftype="h5" compare="sim_size"/>
190 <output name="report" value="report_pls_02.xt" ftype="txt"/>
191 <output_collection name="peaklists_txt" type="list">
192 <element name="batch04_QC18_rep01_280" file="batch04_QC18_rep01_280.txt" ftype="tsv"/>
193 <element name="batch04_QC18_rep02_281" file="batch04_QC18_rep02_281.txt" ftype="tsv"/>
194 <element name="batch04_QC18_rep03_282" file="batch04_QC18_rep03_282.txt" ftype="tsv"/>
195 </output_collection>
196 </test>
197 </tests>
198 <help>
199 -------------
200 Process Scans
201 -------------
202
203 Description
204 -----------
205
206 | Use this tool to generate a Peaklist for each of the files specified in the filelist. This tool is known to work with Thermo scientific .raw files and with .mzML files.
207 |
208 | Peaks are removed from final Peaklist if:
209
210 1) they occur in fewer than the user-defined 'Number of technical replicates a peak has to be present in' and/or
211 2) the relative standard deviation (measured in % and also termed the coefficient of variation) among intensity values for a peak is greater than the user-defined value.
212
213 Parameters
214 ----------
215
216 **\1. Source for the dataset**
217
218 - **Zip file from history** - use this option if you have uploaded a .zip directory containing ONLY your .mzML or .raw data files
219
220 - **Library directory name** - use this option to select data files stored on a data server (e.g. the Research Data Storage at the University of Birmingham).
221 | In the 'Library directory' box that appears on selecting this option, insert a string specifying the path to your data.
222 | Ensure to exclude the drive letter and user name, and to separate sub-directories using FORWARD SLASHES e.g. '[Y:\\users\\USERNAME]\\my_study_folder\\my_data_folder' becomes 'my_study_folder/my_data_folder'.
223
224 - **Data collection** - this option will provide a drop-down box in which you may select one of the data collections from your current history.
225
226 - **Single .mzML or .raw file** - use this option if you have uploaded a single .mzML or .raw file to the current Galaxy history
227
228 **\2. Filelist / Samplelist** (HIGHLY RECOMMENDED)
229
230 | A tabular-formatted .txt file with columns: filename, replicate, batch, classLabel, injectionOrder.
231 | Additional collumns are allowed but are not used during processing.
232 | This file must be uploaded in to (or available from) the current history in order to allow for it to be selected from the drop-down menu.
233 |
234
235 @example_filelist@
236
237 **\3. Function to calculate the noise from each scan** (REQUIRED)
238
239 Select one option from the drop-down menu to indicate you prefer for noise calculation algorithm - median, mean, mean absolute deviation and 'Xcalibur' (i.e. noise values as displayed in Thermo Xcalibur software will be used) are the valid options.
240
241 **\4. Signal-to-noise ratio threshold** (REQUIRED)
242
243 A numerical value from 0 upwards. Peaks with signal-to-noise ratios less-than or equal to this value will be removed from output Peaklist.
244
245 **\5. Filter specific scan windows or scan events?** (OPTIONAL)
246
247 | A boolean toggle ('No' = do not perform scan event filtering; 'Yes' = filter specific scan events)
248 | When 'Yes' is selected, users must specify whether to 'Exclude' or 'Include' specific scan events.
249
250 Users must then:
251
252 - Click the '+ Description' button and insert the 'Start m/z for scan event' and 'Stop m/z for scan event' for the scan event to be excluded or included.
253 - Select the 'scan type' to be filtered from 'Full scan' or 'SIM scan'
254 - Click '+ Description' to 'Exclude/Include' an additional scan event
255
256 **\6. Show options for multiple scans**
257
258 - **Minimum number of scans required for each m/z window or scan event** - A numerical value from 1 upwards to specify the number of instances of a given scan event required in order to peaks in this scan event to be included in the output Peaklist.
259 - **ppm error tolerance** - A numerical value from 0 upwards. This option serves to define the maximum allowable difference in m/z values (measured in parts-per-million) for mass spectral peaks to be clustered across replicates of the same scan event.
260 - **Minimum fraction (i.e. percentage) of scans a peak has to be present in** - A numerical value specifying the minimum number of scans a given mass spectral peak must be present in for it to be kept in the output Peaklist. The ppm error specified by the user will significantly impact which peaks fulfil this criteria.
261
262 **\7. Show advanced options**
263
264 - **Skip SIM-stitching** (REQUIRED; default = 'NO')
265
266 | A boolean toggle ('No' = perform SIM stitching; 'Yes' = skip stitching procedure)
267 | When set to 'yes' it will skip the processing step where (SIM) windows are 'stitched' or 'joined' together.
268 | Set this option to 'yes' if you like to proces individual scan/SIM windows (events/ranges) without 'stitching' them.
269
270 - **Remove m/z range(s)** (OPTIONAL)
271
272 This option allow specific regions of the output peak matrices to be deleted by the user - this option may be useful for removing sections of a spectrum known to correspond to system noise peaks.
273
274 - **Start m/z of removal range** - a numerical value corresponding to the lowest m/z value in the spectral region to be removed
275 - **End m/z of removal range** - a numerical value corresponding to the highest m/z value in the spectral region to be removed
276
277 - **Relative intensity threshold used to remove ringing artefacts** (OPTIONAL)
278
279 - Fourier transform-based mass spectra often contain many peaks (ringing artefacts) around spectral features corresponding to genuine bio-molecules.
280 - A numerical value indicating the required relative intensity a peak must exceed (with reference to the largest peak in a cluster of peaks) in order to be retained.
281
282 Output file(s)
283 --------------
284
285 | A HDF5 file containing the processed Peaklists
286 |
287 | A processed Peaklist, in .tsv format, for each file specified in the filelist (Data Collection)
288
289 - Tab-delimited text file containing a numeric data matrix, with . as decimal, and NA for missing values.
290 - Includes additional information, such as the signal-to-noise ratio, relative-standard deviation (rsd) and 'purity' for each peak.
291
292 @github_developers_contributors@
293
294 </help>
295
296 <expand macro="citations" />
297 </tool>