comparison dia_umpire_quant.xml @ 0:e8f7be6a6e59 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/dia_umpire commit 5bcb19e47887db334a55235b65eca91c89905fb8
author galaxyp
date Tue, 23 Jan 2018 14:45:50 -0500
parents
children 6caa9011f245
comparison
equal deleted inserted replaced
-1:000000000000 0:e8f7be6a6e59
1 <tool id="dia_umpire_quant" name="DIA_Umpire_Quant" version="@VERSION@.0">
2 <description>DIA quantitation and targeted re-extraction</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="stdio" />
8 <command>
9 <![CDATA[
10 #import shutil
11 ### $shutil.copytree($se_input.extra_files_path.__str__,$work_path.__str__)
12 ## want to save all outputs in a directory output.extra_files_path to be used by
13 ## Is file naming going to be a problem? May need to have a name param
14 cat $quant_params > $dia_umpire_quant && echo "Thread = \$GALAXY_SLOTS" >> $dia_umpire_quant
15 && cp -rp $se_input.extra_files_path.__str__ $work_path.__str__
16 && ln -s $protxml_input ${work_path}/$interact_prot_xml
17 && ln -s $searchdb_input ${work_path}/$searchdb_fa
18 #for $input in $mzxml_inputs:
19 && ln -s $input ${work_path}/${input.name}
20 #end for
21 #for $input in $pepxml_inputs:
22 && ln -s $input ${work_path}/${input.name}
23 #end for
24 ## Make sure pep.xml and prot.xml start with "interact-"
25 ## && echo "# $quant_params" >> $dia_umpire_quant
26 && dia_umpire_quant $quant_params
27 && cp $work_path/ProtSummary*.xls "$ProtSummary"
28 && cp $work_path/PeptideSummary*.xls "$PeptideSummary"
29 && cp $work_path/FragSummary*.xls "$FragSummary"
30 && cp $work_path/IDNoSummary*.xls "$IDNoSummary"
31 && cat $work_path/*.log "$logfile"
32 ]]>
33 </command>
34
35 <configfiles>
36 <configfile name="user_mods"><![CDATA[
37 <?xml version="1.0"?>
38 <MSModSpecSet
39 xmlns="http://www.ncbi.nlm.nih.gov"
40 xmlns:xs="http://www.w3.org/2001/XMLSchema-instance"
41 xs:schemaLocation="http://www.ncbi.nlm.nih.gov OMSSA.xsd"
42 >
43 <MSModSpec>
44 <MSModSpec_mod>
45 <MSMod value="modificationwithneutrallosses">1</MSMod>
46 </MSModSpec_mod>
47 <MSModSpec_type>
48 <MSModType value="modaa">0</MSModType>
49 </MSModSpec_type>
50 <MSModSpec_name>test modification with neutral losses</MSModSpec_name>
51 <MSModSpec_monomass>123.456789</MSModSpec_monomass>
52 <MSModSpec_averagemass>0</MSModSpec_averagemass>
53 <MSModSpec_n15mass>0</MSModSpec_n15mass>
54 <MSModSpec_residues>
55 <MSModSpec_residues_E>B</MSModSpec_residues_E>
56 <MSModSpec_residues_E>O</MSModSpec_residues_E>
57 </MSModSpec_residues>
58 <MSModSpec_neutralloss>
59 <MSMassSet>
60 <MSMassSet_monomass>456.789123</MSMassSet_monomass>
61 <MSMassSet_averagemass>0</MSMassSet_averagemass>
62 <MSMassSet_n15mass>0</MSMassSet_n15mass>
63 </MSMassSet>
64 <MSMassSet>
65 <MSMassSet_monomass>789.123456</MSMassSet_monomass>
66 <MSMassSet_averagemass>0</MSMassSet_averagemass>
67 <MSMassSet_n15mass>0</MSMassSet_n15mass>
68 </MSMassSet>
69 </MSModSpec_neutralloss>
70 <MSModSpec_unimod>00</MSModSpec_unimod>
71 <MSModSpec_psi-ms>testMod</MSModSpec_psi-ms>
72 </MSModSpec>
73 <MSModSpecSet
74 ]]>
75 </configfile>
76 <configfile name="quant_params"><![CDATA[
77 #DIA-Umpire (version @VERSION@)
78 #Data Independent Acquisition data processing and analysis package (Quantitation and targeted re-extraction module)
79
80 #Working folder path: the program will process all mzXML files in the working folder (please make sure the corresponding pepXML files are in the same folder with mzXML file)
81 #Internal spectral library file, output csv files will be stored in the working folder
82 Path = ${work_path}/
83
84 #Or you can specify all DIA mzXML files you want to analyze here (the working folder is still required for storing output files)
85 # ==File list begin
86 # ==File list end
87
88 #No of threads
89 Thread = 6
90
91 InternalLibID = #if $InternalLibID then $InternalLibID else 'LibID'#
92
93 #InternalLibSearch / TargetedExtraction both will work
94 InternalLibSearch = $TargetedExtraction
95 ExternalLibSearch = $external_settings.ExternalLibSearch
96 #if $external_settings.ExternalLibSearch == 'true':
97 ExternalLibPath = $external_settings.ExternalLibPath
98 ExternalLibDecoyTag = $external_settings.ExternalLibDecoyTag
99 ReSearchProb = $external_settings.ReSearchProb
100 #end if
101
102 #Fasta file path
103 # Fasta = $searchdb_input
104 Fasta = ${work_path}/$searchdb_fa
105
106 #Combined prot.xml file
107 Combined_Prot = ${work_path}/$interact_prot_xml
108
109 #Decoy tag
110 DecoyPrefix = $DecoyPrefix
111
112 #FDR threshold
113 #if $fdr_settings.advanced == 'yes':
114 PeptideFDR = #if $fdr_settings.PeptideFDR then $fdr_settings.PeptideFDR else 0.01#
115 ProteinFDR = #if $fdr_settings.ProteinFDR then $fdr_settings.ProteinFDR else 0.01#
116 DataSetLevelPepFDR = false
117 ProbThreshold = #if $fdr_settings.ProbThreshold then $fdr_settings.ProbThreshold else 0.99#
118 #else
119 PeptideFDR = 0.01
120 ProteinFDR = 0.01
121 ProbThreshold = 0.99
122 #end if
123
124 #UserMod path
125 #if $usermod.mod_src == 'history':
126 UserMod= $usermod.UserMods
127 ## #else if $usermod.mod_src == 'config':
128 ## UserMod= $user_mods
129 #else:
130 UserMod=
131 #end if
132
133 ####Peptide-centric targeted re-extraction####
134 #file format for external library: traML
135 ExternalLibPath =
136 ExternalLibDecoyTag= DECOY
137 ReSearchProb=0.5
138
139 ####Peptide/Fragment selection for MS2-based quantitation####
140 #if $quant_settings.advanced == 'yes':
141 ####Peptide filtering####
142 #Use either peptide group weight (GW) or peptide weight (PepW) to filter non-unique peptide (computed by ProteinProphet),
143 #Peptides with weight lower than threshold will be removed
144 FilterWeight = #if $quant_settings.FilterWeight then $quant_settings.FilterWeight else GW#
145 MinWeight = #if $quant_settings.MinWeight then $quant_settings.MinWeight else 0.9#
146 ####Peptide/Fragment selection for MS2-based quantitation####
147 TopNFrag = #if $quant_settings.TopNFrag then $quant_settings.TopNFrag else 6#
148 TopNPep = #if $quant_settings.TopNPep then $quant_settings.TopNPep else 6#
149 Freq = #if $quant_settings.Freq then $quant_settings.Freq else 0.5#
150 #else:
151 ####Peptide filtering####
152 #Use either peptide group weight (GW) or peptide weight (PepW) to filter non-unique peptide (computed by ProteinProphet),
153 #Peptides with weight lower than threshold will be removed
154 FilterWeight = GW
155 MinWeight = 0.9
156 ####Peptide/Fragment selection for MS2-based quantitation####
157 TopNFrag = 6
158 TopNPep = 6
159 Freq = 0.5
160 #end if
161 ]]>
162 </configfile>
163 </configfiles>
164
165 <inputs>
166 <param name="work_path" type="hidden" value="dia_output_dir"/>
167 <param name="interact_prot_xml" type="hidden" value="interact.prot.xml"/>
168 <param name="searchdb_fa" type="hidden" value="searchdb.fasta"/>
169 <param name="mzxml_inputs" type="data" format="mzxml" multiple="true" label="Proteomics Spectrum files in mzXML format"/>
170 <param name="pepxml_inputs" type="data" format="pepxml" multiple="true" label="PepXML"/>
171 <param name="protxml_input" type="data" format="protxml" label="ProtXML"/>
172 <param name="searchdb_input" type="data" format="fasta" label="Fasta Search Database"/>
173 <param name="se_input" type="data" format="dia_umpire.ser" label="DIA-Umpire SE Signal Extraction data"/>
174 <param name="InternalLibID" type="text" value="" label="InternalLibID " >
175 <help>
176 InternalLibID: Identifier for the internal spectral library.
177 If you are processing the dataset for the first time, it will be used as the name for the new library, if you are reprocessing data (e.g. using different thresholds/FDR levels, etc.) first a library with that name will be looked up and used if found.
178 Recommended value: you can use the same name for all analysis; however it is beneficial to provide unique meaningful names, to make the library more easily reusable.
179 </help>
180 </param>
181
182 <param name="TargetedExtraction" type="boolean" truevalue="true" falsevalue="false" checked="true" label="TargetedExtraction" >
183 <help>
184 Whether to process targeted re-extraction across samples and replicates.
185 </help>
186 </param>
187
188 <param name="DecoyPrefix" type="text" value="REVERSED_" label="Decoy Prefix in Protein Search FASTA Database" >
189 <help>
190 Typical values: if you are unsure what that prefix was, check protein names in the FASTA file. "rev_" and "DECOY_" are common choices.
191 </help>
192 </param>
193
194 <conditional name="usermod">
195 <param name="mod_src" type="select" label="User Modifications">
196 <help>
197 </help>
198 <option value="none">none</option>
199 <option value="history">From history dataset</option>
200 <!--
201 <option value="config">Build </option>
202 -->
203 </param>
204 <when value="none"/>
205 <when value="history">
206 <param name="UserMods" type="data" format="xml" label="User Modifications OMSSA XML"/>
207 </when>
208 </conditional>
209
210 <conditional name="external_settings">
211 <param name="ExternalLibSearch" type="select" label="Use ExternalLibSearch">
212 <help>
213 ExternalLibSearch: Whether to process targeted extraction across samples and replicates to research unidentified peptide ions from specified external spectral library. Peptide ions in external library will be research if it satisfies the two conditions. (1) unidentified from initial database search, and (2) unidentified or identified but the probability was lower than the specified threswhold described below.
214 </help>
215 <option value="false">no</option>
216 <option value="true">yes</option>
217 </param>
218 <when value="false"/>
219 <when value="true">
220 <param name="ExternalLibPath" type="data" format="dia_umpire.ser" label="DIA-Umpire ExternalLibPath">
221 <help>
222 ExternalLibPath (new parameter in v1.4): File path of external spectral library file. Currently only traML and custom binary (.serFS) formats are supported, and a decoy spectrum for each forward peptide ion sequence is required in the library file. (Effective only when ExternalLibSearch is set as true)
223 </help>
224 </param>
225 <param name="ExternalLibDecoyTag" type="text" value="DECOY" label="Decoy tag of decoy spectra" >
226 <help>
227 ExternalLibDecoyTag: Decoy tag of decoy spectra. (default: DECOY)
228 </help>
229 </param>
230 <param name="ReSearchProb" type="float" value="0.05" optional="true" min=".00" max="1." label="Probability threshold for re-search">
231 <help>
232 ReSearchProb: Probability threshold to determine which peptide ion will be re-searched using external spectral library. (default: 0.5)
233 </help>
234 </param>
235 </when>
236 </conditional>
237 <!--
238 -->
239
240 <conditional name="fdr_settings">
241 <param name="advanced" type="select" label="Advanced FDR Estimation Settings" help="Usually do not need to be changed">
242 <option value="no">no</option>
243 <option value="yes">yes</option>
244 </param>
245 <when value="no"/>
246 <when value="yes">
247 <param name="PeptideFDR" type="float" value=".01" optional="true" min=".01" max=".1" label="PeptideFDR" >
248 <help>
249 PeptideFDR: Target peptide level FDR.
250 DIA-Umpire estimates peptide level FDR by target-decoy approach according to peptide ion's maximum PeptideProphet probability. (default: 0.01)
251 Recommended value: 0.01 or 0.05 are the standard thresholds used in proteomics studies, corresponding to 1% and 5% FDR.
252 </help>
253 </param>
254 <param name="ProteinFDR" type="float" value=".01" optional="true" min=".01" max=".1" label="ProteinFDR" >
255 <help>
256 ProteinFDR: Target protein level FDR.
257 DIA-Umpire fist removes protein identifications with low protein group probability (&lt;0.5) and estimates protein level FDR of the remaining list by target-decoy approach according to the maximum peptide ion probability. (default: 0.01)
258 Recommended value: 0.01 or 0.05.
259 </help>
260 </param>
261 <param name="ProbThreshold" type="float" value="0.99" optional="true" min=".00" max="1." label="ProbThreshold" >
262 <help>
263 ProbThreshold: (0.0~0.99) Probability threshold for peptide-centric targeted extraction. This probability is calculated by DIA-Umpire based on LDA analysis of true and decoy targeted identifications. (default: 0.99)
264 Recommended value: 0.99 corresponds to 99% confidence in an ID. Which means FDR should be less than 1% in that case.
265 </help>
266 </param>
267 </when>
268 </conditional>
269 <!--
270 -->
271 <conditional name="quant_settings">
272 <param name="advanced" type="select" label="Advanced Quantitation Settings" help="Usually do not need to be changed">
273 <option value="no">no</option>
274 <option value="yes">yes</option>
275 </param>
276 <when value="no"/>
277 <when value="yes">
278 <param name="FilterWeight" type="select" label="FilterWeight to remove shared peptides for protein quantitation">
279 <option value="GW" selected="true">peptide group weight</option>
280 <option value="PepW">peptide weight</option>
281 </param>
282 <param name="MinWeight" type="float" value=".9" optional="true" min="0.0" max="1.0" label="MinWeight" >
283 <help>
284 Minimum weight (peptide group weight or peptide weight chosen from the previous option) threshold of peptides to be considered for protein quantitation. Higher weight (closer to 1) of a peptide for a protein is more likely to be a unique peptide for the protein. (default: 0.9)
285 Recommended value: 0.9
286 </help>
287 </param>
288 <param name="TopNFrag" type="integer" value="6" optional="true" min="1" max="10" label="TopNFrag">
289 <help>
290 Top N fragments in terms of fragment score (Pearson correlation fragment intensity) used for determining peptide ion intensity (default:6).
291 Recommended value: 3 - 6
292 </help>
293 </param>
294 <param name="TopNPep" type="integer" value="6" optional="true" min="1" max="10" label="TopNPep">
295 <help>
296 Top N peptide ions in terms of peptide ion intensity (determined by top fragments) used for determining protein intensity (default:6)
297 Recommended value: 3~6
298 </help>
299 </param>
300 <param name="Freq" type="float" value=".5" optional="true" min=".1" max="1." label="Freq" >
301 <help>
302 Minimum frequency of a peptide ion or fragment across all samples/replicates to
303 be considered for Top N ranking. (default:0.5) Recommended value: 0.5 or more
304 </help>
305 </param>
306 </when>
307 </conditional>
308 </inputs>
309
310 <outputs>
311 <data format="txt" name="logfile" label="${tool.name} log"/>
312 <data format="dia_umpire.quant" name="dia_umpire_quant" label="${tool.name}}"/>
313 <data format="tabular" name="IDNoSummary" label="${tool.name}} IDNoSummary.xls"/>
314 <data format="tabular" name="FragSummary" label="${tool.name}} FragSummary.xls"/>
315 <data format="tabular" name="PeptideSummary" label="${tool.name}} PeptideSummary.xls"/>
316 <data format="tabular" name="ProtSummary" label="${tool.name}} ProtSummary.xls"/>
317 </outputs>
318 <tests>
319 <test>
320 </test>
321 </tests>
322 <help>
323 <![CDATA[
324 =============================================================
325 **DIA-Umpire quantitation and targeted re-extraction module**
326 =============================================================
327
328 DIA_Umpire_Quant.jar provides quantitation and targeted re-extraction analysis by taking results from Step A signal extraction and Step B untargeted MS/MS database search.
329
330 Manual: http://sourceforge.net/projects/diaumpire/files/Manual/DIA_Umpire_Manual_v1.4_pre.pdf
331
332 **Input** (DIA-Umpire quantitation and targeted re-extraction module)
333 =====================================================================
334
335 1. Identification results: a .pep.xml result file for each .mgf file and a prot.xml for the entire dataset.
336 2. Protein sequence database in FASTA format which was used in Step B (untargeted MS/MS database search).
337 3. All files, including the binary files (.serFS) and .mgf files generated from the signal extraction module, as well as the mzXML files converted from mgf files.
338 4. Quantitation parameter file (An example "diaumpire.quant_params" can be downloaded at http://goo.gl/wThAVI)
339
340 **Parameters** (for parameter file diaumpire.quant_params)
341 ==========================================================
342
343 **Basic parameters** that the users usually need to modify accordingly.
344
345 *TargetedExtraction*: Whether to process targeted re-extraction across samples and replicates. Set it as false if you don't want to reprocess data but wish to export quantitation report based on different fragment/peptide selection options
346
347 *Fasta*: Path to a protein sequence database in FASTA format which was used for untargeted MS/MS database search.
348
349 *Combined_Prot*: Path to the combined ProteinProphet .prot.xml file.
350
351 *DecoyPrefix*: Tag/prefix of decoy protein names that you used for protein database search. Typical values: if you are unsure what that prefix was, check protein names in the FASTA file. "rev\_" and "DECOY\_" are common choices.
352
353 *InternalLibID*: Identifier for the internal spectral library. If you are processing the dataset for the first time, it will be used as the name for the new library, if you are reprocessing data (e.g. using different thresholds/FDR levels, etc.) first a library with that name will be looked up and used if found. Recommended value: you can use the same name for all analysis; however it is beneficial to provide unique meaningful names, to make the library more easily reusable.
354
355 *ExternalLibSearch*: (new parameter in v1.4): Whether to process targeted extraction across samples and replicates to research unidentified peptide ions from specified external spectral library. Peptide ions in external library will be research if it satisfies the two conditions. (1) unidentified from initial database search, and (2) unidentified or identified but the probability was lower than the specified threswhold described below. (Please note that this feature is still being tested, and contact us if you have any questions)
356
357 *ExternalLibPath*: (new parameter in v1.4): File path of external spectral library file. Currently only traML and custom binary (.serFS) formats are supported, and a decoy spectrum for each forward peptide ion sequence is required in the library file. (Effective only when ExternalLibSearch is set as true)
358
359 *ExternalLibDecoyTag*: (new parameter in v1.4): Decoy tag of decoy spectra. (default: DECOY)
360
361 *ReSearchProb*: (new parameter in v1.4): Probability threshold to determine which peptide ion will be re-searched using external spectral library. (default: 0.5)
362
363
364 **Advanced parameters** that usually do **not** need to be changed
365
366 **FDR estimation parameters**:
367
368 *PeptideFDR*: Target peptide level FDR.
369 DIA-Umpire estimates peptide level FDR by target-decoy approach according to peptide ion's maximum PeptideProphet probability. (default: 0.01)
370 Recommended value: 0.01 or 0.05 are the standard thresholds used in proteomics studies, corresponding to 1% and 5% FDR.
371
372 *ProteinFDR*: Target protein level FDR.
373 DIA-Umpire fist removes protein identifications with low protein group probability (<0.5) and estimates protein level FDR of the remaining list by target- decoy approach according to the maximum peptide ion's probability. (default: 0.01)
374 Recommended value: 0.01 or 0.05.
375
376 *ProbThreshold*: (0.0~0.99) Probability threshold for peptide-centric targeted extraction. This probability is calculated by DIA-Umpire based on LDA analysis of true and decoy targeted identifications. (default: 0.99)
377 Recommended value: 0.99 corresponds to 99% confidence in an ID. Which means FDR should be less than 1% in that case.
378
379 **Quantitation parameters**
380
381 *FilterWeight*: (GW or PepW) Choice of using peptide group weight or peptide weight (computed by ProteinProphet) to remove shared peptides for protein quantitation. (default: GW)
382 MinWeight: (0.0~0.99) Minimum weight (peptide group weight or peptide weight chosen from the previous option) threshold of peptides to be considered for protein quantitation. Higher weight (closer to 1) of a peptide for a protein is more likely to be a unique peptide for the protein. (default: 0.9)
383 Recommended value: 0.9
384
385 *TopNFrag*: Top N fragments in terms of fragment score (Pearson correlation x fragment intensity) used for determining peptide ion intensity (default:6).
386 Recommended value: 3~6
387
388 *TopNPep*: Top N peptide ions in terms of peptide ion intensity (determined by top
389 fragments) used for determining protein intensity (default:6)
390 Recommended value: 3~6
391
392 *Freq*: Minimum frequency of a peptide ion or fragment across all samples/replicates to
393 be considered for Top N ranking. (default:0.5) Recommended value: 0.5 or more
394
395 **Output** (DIA-Umpire quantitation and targeted re-extraction module):
396 =======================================================================
397
398 Binary files which include identification and quantitation information, and possibly the internal spectral library.
399
400 Three summary tables for protein, peptide ion, and fragment level reports (<filename> denotes the name of the raw file in which a peptide was identified)
401
402 1. Columns printed in protein summary table (ProtSummary.xls)
403
404 1. Protein Key: Protein accession number
405 2. <filename>_Prob: Protein identification probability
406 3. <filename>_Peptides: Number of identified peptide ions assigned to a protein
407 4. <filename>_PSMs: Number of identified pseudo MS/MS spectra assigned to a protein
408 5. <filename>_MS1_iBAQ: Protein abundance estimated by MS1 peptide intensities (See manuscript for details) (iBAQ: sum of all identified peptide intensities divided by the number of theoretical tryptic peptides)
409 6. <filename>_TopNpep/TopNfra, Freq>freq: Protein abundance estimated by top scored peptide ions and fragments (See manuscript for details).
410
411 2. Columns printed in peptide ion summary table (PeptideSummary.xls)
412
413 1. Peptide Key: Peptide ion identifier
414 2. Sequence: Peptide sequence
415 3. ModSeq: Peptide sequence with modification information
416 4. Proteins: Parent proteins
417 5. mz: Precursor m/z of peptide ion
418 6. Charge: Charge state of peptide ion
419 7. MaxProb: Maximum identification probability of peptide ion across the whole data- set from untargeted MS/MS database search
420 8. <filename>_Spec_Centric_Prob: Identification probability of a peptide ion from untargeted MS/MS database search
421 9. <filename>_Pep_Centric_Prob: Identification probability of a peptide ion from targeted re-extraction matching
422 10. <filename>_PSMs: The number of identified pseudo MS/MS spectra assigned to a peptide ion
423 11. <filename>_RT: Retention time of a peptide ion
424 12. <filename>_MS1: Peptide abundance estimated by MS1 precursor intensity 2.13. <filename>_TopNfra: Peptide abundance estimated by top N fragment ions
425
426 3. Columns printed in fragment summary table (FragSummary.xls)
427
428 1. Fragment Key: Fragment ion identifier
429 2. Protein: Parent protein accession number
430 3. Peptide: Parent peptide ion identifier
431 4. Fragment: Fragment ion type
432 5. FragMz: m/z of fragment ion
433 6. <filename>_RT: Retention time of parent peptide ion
434 7. <filename>_Spec_Centric_Prob: Identification probability of peptide ion from untargeted MS/MS database search
435 8. <filename>_Pep_Centric_Prob: Identification probability of peptide ion from targeted re-extraction matching
436 9. <filename>_Intensity: fragment intensity
437 10. <filename>_Corr: Elution profile Pearson correlation between fragment ion and precursor peptide ion
438 11. <filename>_PPM: Mass error of an observed fragment m/z to the theoretical one
439
440 ]]>
441 </help>
442 <expand macro="citations" />
443 </tool>