comparison dia_umpire_se.xml @ 3:6caa9011f245 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dia_umpire commit 2379480213ba2e084a93bf82052fac858ffd074f
author galaxyp
date Mon, 04 Mar 2019 11:49:18 -0500
parents b4f82d15cac0
children e8822850243a
comparison
equal deleted inserted replaced
2:b4f82d15cac0 3:6caa9011f245
1 <tool id="dia_umpire_se" name="DIA_Umpire_SE" version="@VERSION@.0"> 1 <tool id="dia_umpire_se" name="DIA_Umpire_SE" version="@VERSION@.0">
2 <description>DIA signal extraction</description> 2 <description>DIA signal extraction</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>dia_umpire_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <expand macro="stdio" /> 7 <expand macro="stdio" />
8 <command> 8 <command>
9 <![CDATA[ 9 <![CDATA[
10 #import re 10 #import re
11 ## want to save all outputs in a directory output.extra_files_path to be used by 11 ## want to save all outputs in a directory output.extra_files_path to be used by dia_umpire_quant
12 ## Is file naming going to be a problem? May need to have a name param 12 ## Is file naming going to be a problem? May need to have a name param
13 #set $ser_dir = $se_params.extra_files_path 13 #if $se_extraction_data:
14 #set se_params = $se_ser
15 #set $ser_dir = $se_ser.extra_files_path
14 mkdir $ser_dir 16 mkdir $ser_dir
15 && ln -s $ser_dir $output_dir 17 && ln -s '$ser_dir' '$output_dir'
18 && cat $se_config > $se_ser
19 #else:
20 #set se_params = $params
21 mkdir '$output_dir'
16 && cat $se_config > $se_params 22 && cat $se_config > $se_params
23 #end if
17 ## 24 ##
18 && echo " " >> $se_params 25 && echo " " >> $se_params
19 && echo "Thread = \$GALAXY_SLOTS" >> $se_params 26 && echo "Thread = \$GALAXY_SLOTS" >> $se_params
20 #for $i, $input_file in enumerate( $inputs ):
21 #if $input_prefix and len($input_prefix.strip()) > 0: 27 #if $input_prefix and len($input_prefix.strip()) > 0:
22 #set $input_path = $ser_dir + "/" + $input_prefix.__str__ + "_rep" + str($i + 1) + ".mzXML" 28 #set $input_path = str($output_dir) + '/' + $input_prefix.__str__ + '_rep' + str($i + 1) + '.mzXML'
23 #else: 29 #else:
24 #set $input_path = $ser_dir + "/" + $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input_file.name)) + ".mzXML" 30 #set $input_path = str($output_dir) + '/' + $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input.name)) + '.mzXML'
25 #end if 31 #end if
26 && ln -s "${input_file}" "$input_path" 32 && ln -s '${input}' '$input_path'
27 && java -jar \$DIA_UMPIRE_SE_JAR $input_path $se_params 33 && dia_umpire_se '$input_path' '$se_params'
28 && cat $output_dir/*.log >> "$logfile" 34 && cat $output_dir/*.log >> "$logfile"
29 #end for 35 #if not $mgfs_as_collection:
36 && cp "$output_dir/"*_Q1.mgf '$q1_mgf'
37 && cp "$output_dir/"*_Q2.mgf '$q2_mgf'
38 && cp "$output_dir/"*_Q3.mgf '$q3_mgf'
39 #end if
40 #if $ExportPrecursorPeak:
41 && cp "$output_dir/"*PeakCluster.csv '$PrecursorPeak'
42 #end if
30 ]]> 43 ]]>
31 </command> 44 </command>
32 <configfiles> 45 <configfiles>
33 <configfile name="se_config"><![CDATA[#slurp 46 <configfile name="se_config"><![CDATA[#slurp
34 #DIA-Umpire (version @VERSION@) 47 #DIA-Umpire (version @VERSION@)
35 #Data Independent Acquisition data processing and analysis package (Signal extraction module) 48 #Data Independent Acquisition data processing and analysis package (Signal extraction module)
36 49
37 #import re 50 #import re
38 #for $i, $input_file in enumerate( $inputs ):
39 #if $input_prefix: 51 #if $input_prefix:
40 #set $input_path = $input_prefix.__str__ + "_rep" + str($i + 1) + ".mzXML" 52 #set $input_path = $input_prefix.__str__ + "_rep" + str($i + 1) + ".mzXML"
41 #else: 53 #else:
42 #set $input_path = $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input_file.name)) + ".mzXML" 54 #set $input_path = $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input.name)) + ".mzXML"
43 #end if 55 #end if
44 # $input_file.name $input_path $input_file 56 # $input.name $input_path $input
45 #end for
46 57
47 #No of threads 58 #No of threads
48 Thread = 6 59 Thread = 6
49 60
50 #Report peak 61 #Report peak
148 SE.EstimateBG = true 159 SE.EstimateBG = true
149 SE.MinNoPeakCluster = 2 160 SE.MinNoPeakCluster = 2
150 SE.MaxNoPeakCluster = 3#slurp 161 SE.MaxNoPeakCluster = 3#slurp
151 #end if 162 #end if
152 163
164 ## SE.RemoveGroupedPeaksRTOverlap
165 ## SE.RemoveGroupedPeaksCorr
166 ## SE.IsoCorrThreshold
167
153 #if $instrument.model != 'Thermo_Orbitrap': 168 #if $instrument.model != 'Thermo_Orbitrap':
154 WindowType = $instrument.window.WindowType 169 WindowType = $instrument.window.WindowType
155 #if $instrument.window.WindowType == 'SWATH': 170 #if $instrument.window.WindowType == 'SWATH':
156 WindowSize = #if $instrument.window.WindowSize then $instrument.window.WindowSize else 25# 171 WindowSize = #if $instrument.window.WindowSize then $instrument.window.WindowSize else 25#
157 #else if $instrument.window.WindowType == 'V_SWATH': 172 #else if $instrument.window.WindowType == 'V_SWATH':
180 #slurp]]> 195 #slurp]]>
181 </configfile> 196 </configfile>
182 </configfiles> 197 </configfiles>
183 198
184 <inputs> 199 <inputs>
185 <param name="inputs" type="data" format="mzxml" multiple="true" label="Proteomics Spectrum files in mzXML format"/> 200 <param name="input" type="data" format="mzxml" label="Proteomics Spectrum files in mzXML format"/>
186 <param name="input_prefix" type="text" value="" optional="true" label="File name prefix" help="Names inputs: prefix_rep#.mzXML Leave blank to use History names of inputs"> 201 <param name="input_prefix" type="text" value="" optional="true" label="File name prefix" help="Names inputs: prefix_rep#.mzXML Leave blank to use History names of input">
187 <validator type="regex" message="">[a-zA-Z][a-zA-Z0-9_-]*</validator> 202 <validator type="regex" message="">[a-zA-Z][a-zA-Z0-9_-]*</validator>
188 </param> 203 </param>
189 <param name="output_dir" type="hidden" value="gx_path"/> 204 <param name="output_dir" type="hidden" value="gx_path"/>
190 205
191 <conditional name="instrument"> 206 <conditional name="instrument">
414 </help> 429 </help>
415 </param> 430 </param>
416 </when> 431 </when>
417 </conditional> 432 </conditional>
418 433
419 <param name="ExportPrecursorPeak" type="boolean" truevalue="true" falsevalue="false" checked="false" label="ExportPrecursorPeak" 434 <param name="ExportPrecursorPeak" type="boolean" truevalue="true" falsevalue="false" checked="false"
435 label="ExportPrecursorPeak"
420 help="Output detailed information about detected MS1 precursor and MS2 unfragmented precursor signals"/> 436 help="Output detailed information about detected MS1 precursor and MS2 unfragmented precursor signals"/>
421 <param name="ExportFragmentPeak" type="boolean" truevalue="true" falsevalue="false" checked="false" label="ExportFragmentPeak" 437 <param name="ExportFragmentPeak" type="boolean" truevalue="true" falsevalue="false" checked="false"
438 label="ExportFragmentPeak"
422 help="Output detailed information about detected MS2 signals"/> 439 help="Output detailed information about detected MS2 signals"/>
440 <param name="se_extraction_data" type="boolean" truevalue="Signal Extraction data" falsevalue="diaumpire_se.params" checked="false"
441 label="Output Signal Extraction data for DIA_Umpire_Quant" />
442 <param name="mgfs_as_collection" type="boolean" truevalue="true" falsevalue="false" checked="false"
443 label="Output MGFs as a collection" />
423 444
424 </inputs> 445 </inputs>
425 446
426 <outputs> 447 <outputs>
427 <data format="txt" name="logfile" label="${tool.name} log"/> 448 <data format="txt" name="logfile" label="${tool.name} ${on_string} log"/>
428 <data format="dia_umpire.ser" name="se_params" label="${tool.name} Signal Extraction data"> 449 <data format="dia_umpire.ser" name="se_ser" label="${tool.name} ${input.name} ${se_extraction_data}">
429 </data> 450 <filter>se_extraction_data</filter>
430 <data format="csv" name="PrecursorPeak" label="" from_work_dir="gx_path/swath_PeakCurve.csv"> 451 </data>
452 <data format="txt" name="params" label="${tool.name} ${input.name} ${se_extraction_data}">
453 <filter>not se_extraction_data</filter>
454 </data>
455 <data format="csv" name="PrecursorPeak" label="${tool.name} ${input.name} PeakCluster.csv" from_work_dir="gx_path/swath_PeakCurve.csv">
431 <filter>ExportPrecursorPeak</filter> 456 <filter>ExportPrecursorPeak</filter>
432 </data> 457 </data>
458 <!--
433 <data format="csv" name="FragmentPeak" label="" from_work_dir="gx_path/swath_PeakCurve.csv"> 459 <data format="csv" name="FragmentPeak" label="" from_work_dir="gx_path/swath_PeakCurve.csv">
434 <filter>ExportFragmentPeak</filter> 460 <filter>ExportFragmentPeak</filter>
435 </data> 461 </data>
462 -->
436 <collection name="dia_umpire_se_mgfs" type="list" label="${tool.name} MGFs"> 463 <collection name="dia_umpire_se_mgfs" type="list" label="${tool.name} MGFs">
464 <filter>mgfs_as_collection</filter>
437 <discover_datasets pattern="(?P&lt;name&gt;.*Q[1-3]\.mgf)" ext="mgf" visible="false" directory="gx_path" /> 465 <discover_datasets pattern="(?P&lt;name&gt;.*Q[1-3]\.mgf)" ext="mgf" visible="false" directory="gx_path" />
438 </collection> 466 </collection>
439 467 <data format="mgf" name="q1_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q1.mgf" >
468 <filter>not mgfs_as_collection</filter>
469 </data>
470 <data format="mgf" name="q2_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q2.mgf" >
471 <filter>not mgfs_as_collection</filter>
472 </data>
473 <data format="mgf" name="q3_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q3.mgf" >
474 <filter>not mgfs_as_collection</filter>
475 </data>
440 </outputs> 476 </outputs>
441 <tests> 477 <tests>
442 <test> 478 <test>
479 <param name="input" value="LongSwath_UPS1_1ug_rep1_xs.mzXML" ftype="mzxml"/>
480 <conditional name="instrument">
481 <param name="model" value="AB_SCIEX_Triple_TOF_5600"/>
482 <param name="SE_MS1PPM" value="30"/>
483 <param name="SE_MS2PPM" value="40"/>
484 <conditional name="window">
485 <param name="WindowType" value="SWATH"/>
486 <param name="WindowSize" value="25"/>
487 </conditional>
488 </conditional>
489 <output name="q2_mgf">
490 <assert_contents>
491 <has_text text="BEGIN IONS" />
492 <has_text_matching expression="^PEPMASS=740.\d+$" />
493 </assert_contents>
494 </output>
443 </test> 495 </test>
444 </tests> 496 </tests>
445 <help> 497 <help>
446 <![CDATA[ 498 <![CDATA[
447 ================================ 499 ================================
455 **Input** (DIA-Umpire signal extraction module): 507 **Input** (DIA-Umpire signal extraction module):
456 ================================================ 508 ================================================
457 509
458 1. Spectral data in mzXML format 510 1. Spectral data in mzXML format
459 511
460 **Important**: for AB SCIEX data, use AB SCIEX MS Data Converter (http://goo.gl/wf7KRV): 512 **Important**: for AB SCIEX data, use AB SCIEX MS Data Converter (https://sciex.com/x32750):
461 Use it for .wiff -> .mzML conversion, then use MSConvert for .mzML -> .mzXML. Read "Raw spectral data files conversion to mzXML" section in the manual for more details. 513
514 Galaxy tool: https://toolshed.g2.bx.psu.edu/view/galaxyp/ms_data_converter/a36e9f847308
515
516 Use it for .wiff -> .mzML conversion, then use MSConvert for .mzML -> .mzXML. Read "Raw spectral data files conversion to mzXML" section in the manual for more details.
517
462 518
463 **Signal extraction parameters**: 519 **Signal extraction parameters**:
464 ================================= 520 =================================
465 521
466 *SE.MS1PPM*: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs. Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap, 20-40pm for AB SCIEX Triple TOF 5600. 522 *SE.MS1PPM*: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs. Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap, 20-40pm for AB SCIEX Triple TOF 5600.
495 551
496 552
497 **Output files of DIA-Umpire signal extraction module**: 553 **Output files of DIA-Umpire signal extraction module**:
498 ======================================================== 554 ========================================================
499 555
500 1. *DIA_Umpire_SE MGFs* - A Dataset Collection containing three .mgf files per input .mzXML file - pseudo MS/MS spectra sets for different quality categories of detected precursor signals (see the Online Methods of the publication for details). Example: 556 1. *DIA_Umpire_SE MGFs* - Three .mgf files per input .mzXML file - pseudo MS/MS spectra sets for different quality categories of detected precursor signals (see the Online Methods of the publication for details). These can be either individual history items or a dataset collection. Example:
501 557
502 1. <filename>_Q1.mgf 558 1. <filename>_Q1.mgf
503 2. <filename>_Q2.mgf 559 2. <filename>_Q2.mgf
504 3. <filename>_Q3.mgf 560 3. <filename>_Q3.mgf
505 561