view dia_umpire_se.xml @ 3:6caa9011f245 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dia_umpire commit 2379480213ba2e084a93bf82052fac858ffd074f
author galaxyp
date Mon, 04 Mar 2019 11:49:18 -0500
parents b4f82d15cac0
children e8822850243a
line wrap: on
line source

<tool id="dia_umpire_se" name="DIA_Umpire_SE" version="@VERSION@.0">
    <description>DIA signal extraction</description>
    <macros>
        <import>dia_umpire_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <command>
<![CDATA[
#import re
## want to save all outputs in a directory output.extra_files_path to be used by dia_umpire_quant
## Is file naming going to be a problem? May need to have a name param
#if $se_extraction_data:
#set se_params = $se_ser
#set $ser_dir = $se_ser.extra_files_path
mkdir $ser_dir
&& ln -s '$ser_dir' '$output_dir'
&& cat $se_config > $se_ser 
#else:
#set se_params = $params
mkdir '$output_dir'
&& cat $se_config > $se_params 
#end if
##
&& echo " " >> $se_params 
&& echo "Thread = \$GALAXY_SLOTS" >> $se_params
#if $input_prefix and len($input_prefix.strip()) > 0:
#set $input_path = str($output_dir) + '/' + $input_prefix.__str__ + '_rep' + str($i + 1) + '.mzXML' 
#else:
#set $input_path = str($output_dir) + '/' + $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input.name)) + '.mzXML'
#end if
&& ln -s '${input}' '$input_path'
&&  dia_umpire_se '$input_path' '$se_params'
&& cat $output_dir/*.log >> "$logfile"
#if not $mgfs_as_collection:
&& cp "$output_dir/"*_Q1.mgf '$q1_mgf'
&& cp "$output_dir/"*_Q2.mgf '$q2_mgf'
&& cp "$output_dir/"*_Q3.mgf '$q3_mgf'
#end if
#if $ExportPrecursorPeak:
&& cp "$output_dir/"*PeakCluster.csv '$PrecursorPeak'
#end if
]]>
    </command>
  <configfiles>
    <configfile name="se_config"><![CDATA[#slurp
#DIA-Umpire (version @VERSION@)
#Data Independent Acquisition data processing and analysis package (Signal extraction module)

#import re
#if $input_prefix:
#set $input_path = $input_prefix.__str__ + "_rep" + str($i + 1) + ".mzXML" 
#else:
#set $input_path = $re.sub('\.[mM]\w+$','',$re.sub('[^-a-zA-Z0-9_.]','_',$input.name)) + ".mzXML"
#end if
# $input.name $input_path $input

#No of threads
Thread = 6

#Report peak
ExportPrecursorPeak = $ExportPrecursorPeak
ExportFragmentPeak = $ExportFragmentPeak

#Signal extraction parameters
#if $instrument.model == 'Thermo_Orbitrap':
SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 5#
SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 5#
SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#
SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#
SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#
SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#
SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#
#else if $instrument.model == 'AB_SCIEX_Triple_TOF_5600':
SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 30#
SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 40#
SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#
SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#
SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#
SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#
SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#
#else:
SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 30#
SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 40#
SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#
SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#
SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#
SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#
SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#
#end if

#if $frag_settings.advanced == 'yes':
#Fragment grouping parameters
RPmax = #if $frag_settings.RPmax then $frag_settings.RPmax else 25#
RFmax = #if $frag_settings.RFmax then $frag_settings.RFmax else 300#
CorrThreshold = #if $frag_settings.CorrThreshold then $frag_settings.CorrThreshold else 0.2#
DeltaApex = #if $frag_settings.DeltaApex then $frag_settings.DeltaApex else 0.6#
RTOverlap = #if $frag_settings.RTOverlap then $frag_settings.RTOverlap else 0.3#
AdjustFragIntensity = #if $frag_settings.AdjustFragIntensity then $frag_settings.AdjustFragIntensity else true#
BoostComplementaryIon = #if $frag_settings.BoostComplementaryIon then $frag_settings.BoostComplementaryIon else true#
#else:
#Fragment grouping parameters
RPmax = 25
RFmax = 300
CorrThreshold = 0.2
DeltaApex = 0.6
RTOverlap = 0.3
AdjustFragIntensity = true
BoostComplementaryIon = true
#end if

#if $se_settings.advanced == 'yes':
#Signal extraction parameters
SE.SN = #if $se_settings.SE_SN then $se_settings.SE_SN else 2#
SE.MS2SN = #if $se_settings.SE_MS2SN then $se_settings.SE_MS2SN else 2#
SE.MinMSIntensity = #if $se_settings.SE_MinMSIntensity then $se_settings.SE_MinMSIntensity else 10#
SE.MinMSMSIntensity = #if $se_settings.SE_MinMSMSIntensity then $se_settings.SE_MinMSMSIntensity else 10#
SE.MaxCurveRTRange = #if $se_settings.SE_MaxCurveRTRange then $se_settings.SE_MaxCurveRTRange else 1#
SE.NoMissedScan = #if $se_settings.SE_NoMissedScan then $se_settings.SE_NoMissedScan else 1#
SE.MinFrag = #if $se_settings.SE_MinFrag then $se_settings.SE_MinFrag else 10#
SE.EstimateBG = #if $se_settings.SE_EstimateBG then $se_settings.SE_EstimateBG else true#
SE.MinNoPeakCluster = #if $se_settings.SE_MinNoPeakCluster then $se_settings.SE_MinNoPeakCluster else 2#
SE.MaxNoPeakCluster = #if $se_settings.SE_MaxNoPeakCluster then $se_settings.SE_MaxNoPeakCluster else 4#
SE.StartRT = #if $se_settings.SE_StartRT then $se_settings.SE_MaxNoPeakCluster else 0#
SE.EndRT = #if $se_settings.SE_EndRT then $se_settings.SE_EndRT else 9999#
SE.MinMZ = #if $se_settings.SE_MinMZ then $se_settings.SE_MinMZ else 200#
SE.MinPrecursorMass = #if $se_settings.SE_MinPrecursorMass then $se_settings.SE_MinPrecursorMass else 700#
SE.MaxPrecursorMass = #if $se_settings.SE_MaxPrecursorMass then $se_settings.SE_MaxPrecursorMass else 5000#
SE.IsoPattern = #if $se_settings.SE_IsoPattern then $se_settings.SE_IsoPattern else 0.3#
SE.MassDefectFilter = #if $se_settings.SE_MassDefectFilter then $se_settings.SE_MassDefectFilter else true#
SE.MassDefectOffset = #if $se_settings.SE_MassDefectOffset then $se_settings.SE_MassDefectOffset else 0.1#
#if $se_settings.SE_MinMS2NoPeakCluster:
SE.MinMS2NoPeakCluster = $se_settings.SE_MinMS2NoPeakCluster
#end if
#if $se_settings.SE_MinRTRange:
SE.MinRTRange = $se_settings.SE_MinRTRange
#end if
#if $se_settings.SE_RTtol:
SE.RTtol = $se_settings.SE_RTtol
#end if
#if $se_settings.SE_Denoise:
SE.Denoise = $se_settings.SE_Denoise
#end if
#if $se_settings.SE_NoPeakPerMin:
SE.NoPeakPerMin = $se_settings.SE_NoPeakPerMin
#end if
#if $se_settings.SE_RemoveGroupedPeaks:
SE.RemoveGroupedPeaks = $se_settings.SE_RemoveGroupedPeaks
#end if
#else:
#Signal extraction parameters
SE.SN = 2
SE.MS2SN = 2
SE.MinMSIntensity = 5
SE.MinMSMSIntensity = 1
SE.MaxCurveRTRange = 1
SE.NoMissedScan = 1
SE.MinFrag=10
SE.EstimateBG = true
SE.MinNoPeakCluster = 2
SE.MaxNoPeakCluster = 3#slurp
#end if

## SE.RemoveGroupedPeaksRTOverlap
## SE.RemoveGroupedPeaksCorr
## SE.IsoCorrThreshold

#if $instrument.model != 'Thermo_Orbitrap':
WindowType = $instrument.window.WindowType
#if $instrument.window.WindowType == 'SWATH':
WindowSize = #if $instrument.window.WindowSize then $instrument.window.WindowSize else 25#
#else if $instrument.window.WindowType == 'V_SWATH':
==window setting begin
#if $instrument.window.window_list.window_list_src == 'history':
#set $fh = open(str($instrument.window.window_list.WindowListFile),'r')
#for $i,$line in enumerate($fh):
#set $fields = $line.split()
#if len($fields) >= 2:
#set $win = '\t'.join($fields[:2])
$win
#end if
#end for
$fh.close()
#else:
#set $win_list = $instrument.window.window_list.WindowList.split('\n')
#for $win in $win_list:
#set $row = '\t'.join($win.split())
$row
#end for
#end if
==window setting end
#echo '#'
#end if
#end if
#slurp]]>
    </configfile>
  </configfiles>

    <inputs>
        <param name="input" type="data" format="mzxml" label="Proteomics Spectrum  files in mzXML format"/>
        <param name="input_prefix" type="text" value="" optional="true" label="File name prefix" help="Names inputs: prefix_rep#.mzXML Leave blank to use History names of input">
          <validator type="regex" message="">[a-zA-Z][a-zA-Z0-9_-]*</validator>
        </param>
        <param name="output_dir" type="hidden" value="gx_path"/>

        <conditional name="instrument">
          <param name="model" type="select" label="instrument used" help="Sets default parameters">
            <option value="Thermo_Orbitrap">Thermo Orbitrap</option>
            <option value="AB_SCIEX_Triple_TOF_5600">AB SCIEX Triple TOF 5600</option>
            <option value="other">other</option>
          </param>
          <when value="Thermo_Orbitrap">
       
            <param name="SE_MS1PPM" type="float" value="5" min="1" max="20" optional="true" label="Maximum mass error for two MS1 peaks">
                <help>
SE.MS1PPM: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs.
Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap.
                </help>
            </param>
            <param name="SE_MS2PPM" type="integer" value="5" min="1" max="20" optional="true" label="Maximum mass error for two MS2 peaks">
                <help>
SE.MS2PPM: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion.
Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.
                </help>
            </param>
            <expand macro="common_se_params" />

          </when>
          <when value="AB_SCIEX_Triple_TOF_5600">
            <param name="SE_MS1PPM" type="float" value="30" min="1" max="50" optional="true" label="Maximum mass error for two MS1 peaks">
                <help>
            SE.MS1PPM: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs.
Recommended value: Depends on the instrument. Typical values are 20-40pm for AB SCIEX Triple TOF 5600.
                </help>
            </param>
            <param name="SE_MS2PPM" type="integer" value="40" min="1" max="50" optional="true" label="Maximum mass error for two MS2 peaks">
                <help>
SE.MS2PPM: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion.
Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.
                </help>
            </param>
            <expand macro="common_se_params" />
            <expand macro="se_window_params" />
          </when>
          <when value="other">
            <param name="SE_MS1PPM" type="float" value="30" min="1" max="100" optional="true" label="Maximum mass error for two MS1 peaks">
                <help>
            SE.MS1PPM: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs.
Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap, 20-40pm for AB SCIEX Triple TOF 5600.
                </help>
            </param>
            <param name="SE_MS2PPM" type="integer" value="40" min="1" max="100" optional="true" label="Maximum mass error for two MS2 peaks">
                <help>
SE.MS2PPM: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion.
Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.
                </help>
            </param>
            <expand macro="common_se_params" />
            <expand macro="se_window_params" />
          </when>
        </conditional>

        <conditional name="frag_settings">
          <param name="advanced" type="select" label="Advanced Precursor-fragment Grouping Settings" help="Usually do not need to be changed">
            <option value="no">no</option>
            <option value="yes">yes</option>
          </param>
          <when value="no"/>
          <when value="yes">
            <param name="RPmax" type="integer" value="25" optional="true" min="1" label="RPmax">
              <help>
RPmax: Determines how many precursors a single fragment is allowed to be grouped to. Precursors are first sorted by Pearson correlation of elution profiles; this option specifies the rank of a precursor in this sorted list. Lowering the value for this parameter increases the stringency of precursor-fragments grouping. (Default: 25)
              </help>
            </param>
            <param name="RFmax" type="integer" value="300" optional="true" min="1" label="RFmax">
              <help>
RFmax: Determines how many fragments a single precursor is allowed to have. Fragments are first sorted by Pearson correlation of elution profiles; this option specifies the rank of a fragment in this sorted list. The lower - the more stringent. (Default: 300)
              </help>
            </param>
            <param name="CorrThreshold" type="float" value=".2" optional="true" min="0." max="1." label="CorrThreshold">
              <help>
CorrThreshold: (0.0~1.0) Minimum Pearson correlation between a precursor and a fragment to be considered, the higher, the more stringent. (Default: 0.2)
              </help>
            </param>
            <param name="RTOverlap" type="float" value=".3" min="0" optional="true" label="RTOverlap" >
              <help>
RTOverlap: Retention time overlap. (Default: 0.3)
              </help>
            </param>

            <param name="DeltaApex" type="float" value=".6" min="0" optional="true" label="DeltaApex" >
              <help>
DeltaApex: (Unit: minute) Maximum retention time difference of LC profile apexes between precursor and fragment (the lower, the more stringent). (Default: 0.6)
              </help>
            </param>
            <param name="BoostComplementaryIon" type="boolean" truevalue="true" falsevalue="false" checked="true" label="BoostComplementaryIon" >
              <help>
BoostComplementaryIon:  set to true if you want to boost complementary ions' intensity. The process of complementary ion boosting will also deisotope fragment peaks into singly charged m/z position. (Default: true)
              </help>
            </param>
            <param name="AdjustFragIntensity" type="boolean" truevalue="true" falsevalue="false" checked="true" label="AdjustFragIntensity" >
              <help>
AdjustFragIntensity:  set to true if you want to adjust fragment intensity by the Pearson correlation between a precursor and a fragment. (Default: true)
              </help>
            </param>
         </when>
        </conditional>

        <conditional name="se_settings">
          <param name="advanced" type="select" label="Advanced Signal Extraction Settings" help="Usually do not need to be changed">
            <option value="no">no</option>
            <option value="yes">yes</option>
          </param>
          <when value="no"/>
          <when value="yes">

            <param name="SE_MinMSIntensity" type="float" value="" optional="true" label="MinMSIntensity" >
              <help>
SE.MinMSIntensity: Minimum signal intensity for a peak in an MS1 spectrum to be considered as a valid signal. Any MS1 peak having intensity lower than this threshold will be ignored. It is the main parameter controlling how many peaks and isotopic envelopes will be detected.
Recommended value: Depends on the data. Check raw data for average noise- levels. E.g. TOF data often have thousands of random small intensity peaks. 
Warning: Setting this parameter too low (or zero) in such a case will significantly increase processing time and memory requirements.
              </help>
            </param>
            <param name="SE_MinMSMSIntensity" type="float" value="" optional="true" label="MinMSMSIntensity" >
              <help>
SE.MinMSMSIntensity: Same as MinMSIntensity, but for MS2 signals.
              </help>
            </param>
            <param name="SE_MaxCurveRTRange" type="float" value="" optional="true" label="MaxCurveRTRange" >
              <help>
SE.MaxCurveRTRange: (Unit: minute) The maximum allowed retention time (RT) range for elution profile of a single ion. If a detected elution profile exceeds that time span, it will be trimmed around the apex to fit into this range. Used to avoid having lots of ions which elute during the whole LC/MS run or over a very long period of time, as this greatly complicates grouping of precursors to fragments. Such long-eluting ions are likely to be contaminants, lock-mass ions, calibrants, etc.
Recommended value: The expected maximum peak chromatographic time. E.g. set to several percent of the whole run time, if the run was 100 min long, set to 5 min.
              </help>
            </param>
            <param name="SE_SN" type="float" value="" optional="true" label="SN Threshold" >
              <help>
SE.SN: Minimum signal-to-noise threshold for MS1 precursor signal detection. It is not the real S/N value, but rather a multiplier for MinMSIntensity, if a detected elution profile is less intense in the apex than SN x MinMSIntensity) it will be discarded.
Recommended value: Typical values depend on the MinMSIntensity setting. If you've set MinMSIntensity to a very low value, consider setting this one to some small number in range 1.0 - 5.0.
              </help>
            </param>
            <param name="SE_MS2SN" type="float" value="" optional="true" label="MS2SN Threshold" >
              <help>
Same as para.SN, but for possible unfragmented precursors in MS2 data (i.e. for selecting precursors to generate Q3 tier pseudo spectra).
              </help>
            </param>
            <param name="SE_StartRT" type="float" value="0" optional="true" label="StartRT" >
              <help>
              </help>
            </param>
            <param name="SE_EndRT" type="float" value="9999" optional="true" label="EndRT" >
              <help>
              </help>
            </param>
            <param name="SE_MinMZ" type="float" value="200" optional="true" label="MinMZ" >
              <help>
              </help>
            </param>
            <param name="SE_MinPrecursorMass" type="float" value="700" optional="true" label="MinPrecursorMass" >
              <help>
              </help>
            </param>
            <param name="SE_MaxPrecursorMass" type="float" value="5000" optional="true" label="MaxPrecursorMass" >
              <help>
              </help>
            </param>
            <param name="SE_IsoPattern" type="float" value="0.3" optional="true" label="IsoPattern" >
              <help>
              </help>
            </param>
            <param name="SE_MassDefectFilter" type="boolean" truevalue="true" falsevalue="false" checked="true" label="MassDefectFilter" >
              <help>
              </help>
            </param>
            <param name="SE_MassDefectOffset" type="float" value="0.1" optional="true" label="MassDefectOffset" >
              <help>
              </help>
            </param>

            <param name="SE_MinRTRange" type="float" value="" optional="true" label="MinRTRange" >
              <help>

              </help>
            </param>
            <param name="SE_MaxNoPeakCluster" type="integer" value="" optional="true" label="MaxNoPeakCluster" >
              <help>
SE.MaxNoPeakCluster (new parameter in v1.4): Maximum number of isotope peaks for a precursor feature.
              </help>
            </param>
            <param name="SE_MinNoPeakCluster" type="integer" value="" optional="true" label="MinNoPeakCluster" >
              <help>
SE.MinNoPeakCluster (new parameter in v1.4): Minimum number of isotope peaks for a precursor feature. When it is set as 1, the algorithm will group fragments even for peaks without any isotope signal being found. For these cases, the assumed charged states will be from the parameter SE.StartCharge to SE.EndCharge.
              </help>
            </param>
            <param name="SE_MinMS2NoPeakCluster" type="integer" value="" optional="true" label="MinMS2NoPeakCluster" >
              <help>
SE.MinMS2NoPeakCluster (new parameter in v1.4): Minimum number of isotope peaks for a MS2 feature. When it is set as 1, the algorithm will group fragments even for peaks without any isotope signal being found. For these cases, the assumed charged states will be from the parameter SE.StartCharge to SE.EndCharge.

              </help>
            </param>
            <param name="SE_RTtol" type="float" value="" optional="true" label="RTtol" >
              <help>
              </help>
            </param>
            <param name="SE_NoPeakPerMin" type="integer" value="" optional="true" label="NoPeakPerMin" >
              <help>
              </help>
            </param>
            <param name="SE_NoMissedScan" type="integer" value="" optional="true" label="NoMissedScan" >
              <help>
SE.NoMissedScan: Maximum number of consecutive "gaps" allowed during extraction of elution profile (scans, in which the precursor mass being traced was not detected). E.g. if set to 1 and a particular mass can be found at every second scan, the algorithm will trace such a peak unless it can't find the peak in 2 scans in a row.
              </help>
            </param>
            <param name="SE_Denoise" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Denoise" >
              <help>
              </help>
            </param>
            <param name="SE_EstimateBG" type="boolean" truevalue="true" falsevalue="false" checked="true" label="EstimateBG" >
              <help>
SE.MinFrag: Minimum number of fragments for a precursor. Precursors which have less than the set number of fragments will be removed from pseudo MS/MS spectra.
              </help>
            </param>
            <param name="SE_RemoveGroupedPeaks" type="boolean" truevalue="true" falsevalue="false" checked="true" label="RemoveGroupedPeaks" >
              <help>
              </help>
            </param>
            <param name="SE_MinFrag" type="integer" value="" optional="true" label="MinFrag" >
              <help>
SE.MinFrag: Minimum number of fragments for a precursor. Precursors which have less than the set number of fragments will be removed from pseudo MS/MS spectra.
              </help>
            </param>
          </when>
        </conditional>

        <param name="ExportPrecursorPeak" type="boolean" truevalue="true" falsevalue="false" checked="false" 
               label="ExportPrecursorPeak"
               help="Output detailed information about detected MS1 precursor and MS2 unfragmented precursor signals"/>
        <param name="ExportFragmentPeak" type="boolean" truevalue="true" falsevalue="false" checked="false" 
               label="ExportFragmentPeak"
               help="Output detailed information about detected MS2 signals"/>
        <param name="se_extraction_data" type="boolean" truevalue="Signal Extraction data" falsevalue="diaumpire_se.params" checked="false" 
               label="Output Signal Extraction data for DIA_Umpire_Quant" />
        <param name="mgfs_as_collection" type="boolean" truevalue="true" falsevalue="false" checked="false" 
               label="Output MGFs as a collection" />

    </inputs>

    <outputs>
        <data format="txt" name="logfile" label="${tool.name} ${on_string} log"/>
        <data format="dia_umpire.ser" name="se_ser" label="${tool.name} ${input.name} ${se_extraction_data}">
            <filter>se_extraction_data</filter>
        </data>
        <data format="txt" name="params" label="${tool.name} ${input.name} ${se_extraction_data}">
            <filter>not se_extraction_data</filter>
        </data>
        <data format="csv" name="PrecursorPeak" label="${tool.name} ${input.name} PeakCluster.csv" from_work_dir="gx_path/swath_PeakCurve.csv">
            <filter>ExportPrecursorPeak</filter>
        </data>
        <!--
        <data format="csv" name="FragmentPeak" label="" from_work_dir="gx_path/swath_PeakCurve.csv">
            <filter>ExportFragmentPeak</filter>
        </data>
        -->
        <collection name="dia_umpire_se_mgfs" type="list" label="${tool.name} MGFs">
            <filter>mgfs_as_collection</filter>
            <discover_datasets pattern="(?P&lt;name&gt;.*Q[1-3]\.mgf)" ext="mgf" visible="false" directory="gx_path" />
        </collection>
        <data format="mgf" name="q1_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q1.mgf" >
            <filter>not mgfs_as_collection</filter>
        </data>
        <data format="mgf" name="q2_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q2.mgf" >
            <filter>not mgfs_as_collection</filter>
        </data>
        <data format="mgf" name="q3_mgf" label="${tool.name} ${($input.name[:-6] if $input.name.endswith('.mxXML') or $input.name.endswith('.mxxml') else $input.name).rsplit('.',1)[0]}_Q3.mgf" >
            <filter>not mgfs_as_collection</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input" value="LongSwath_UPS1_1ug_rep1_xs.mzXML" ftype="mzxml"/>
            <conditional name="instrument">
                <param name="model" value="AB_SCIEX_Triple_TOF_5600"/>
                <param name="SE_MS1PPM" value="30"/>
                <param name="SE_MS2PPM" value="40"/>
                <conditional name="window">
                    <param name="WindowType" value="SWATH"/>
                    <param name="WindowSize" value="25"/>
                </conditional>
            </conditional>
            <output name="q2_mgf">
                <assert_contents>
                    <has_text text="BEGIN IONS" />
                    <has_text_matching expression="^PEPMASS=740.\d+$" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
<![CDATA[
================================
**DIA-Umpire signal extraction**
================================

DIA_Umpire_SE.jar provides the signal extraction module for DIA data (regular SWATH with fixed isolation window size, variable window SWATH, MSX) which generates pseudo MS/MS spectra to be searched against a protein database using conventional proteomics search engines such as X!Tandem, SEQUEST, MSGF+, OMSSA, etc.

Manual:  http://sourceforge.net/projects/diaumpire/files/Manual/DIA_Umpire_Manual_v1.4_pre.pdf

**Input** (DIA-Umpire signal extraction module):
================================================

  1. Spectral data in mzXML format

    **Important**: for AB SCIEX data, use AB SCIEX MS Data Converter (https://sciex.com/x32750):

      Galaxy tool:  https://toolshed.g2.bx.psu.edu/view/galaxyp/ms_data_converter/a36e9f847308

      Use it for .wiff -> .mzML conversion, then use MSConvert for .mzML -> .mzXML. Read "Raw spectral data files conversion to mzXML" section in the manual for more details.


**Signal extraction parameters**:
=================================

  *SE.MS1PPM*: (Unit: ppm) Maximum mass error for two MS1 peaks in consecutive spectra to be considered signal of the same ion. Used in MS1 signal detection and precursor alignment between samples/runs.  Recommended value: Depends on the instrument. Typical values are 5-10 ppm for Thermo Orbitrap, 20-40pm for AB SCIEX Triple TOF 5600.

  *SE.MS2PPM*: (Unit: ppm) Maximum mass error for two MS2 peaks in consecutive spectra to be considered signal of the same ion.  Recommended value: Depends on the instrument. If fragmentation spectra are measured with the same detector as MS1 spectra, set the same as Para.MS1PPM or a little higher, e.g. if you've set Para.MS1PPM=30 ppm for AB SCIEX Triple TOF 5600, consider setting to 40ppm.  

  *SE.Resolution*: Used only if the input spectra are stored in profile mode (i.e. not centroided, e.g. by using "Peak Picking" option in MSConvert when converting raw spectral data to mzXML format).  Profile spectra will be centroided using a sliding window. The window is moved across the entire mass range of a spectrum. Only the most intense peak in the window centered at the peak m/z is kept, others are discarded. The window width is calculated based on this parameter as: width = mz / para.Resolution.  Recommended value: Depends on the instrument and acquisition settings. Either check raw data to see the real average resolution of peaks in spectra or consult vendor specifications for the instrument. For AB SCIEX TripleTOF 5600 we use 15000-20000.

  *SE.StartCharge*: The minimum charge state for MS1 precursor ion to be detected during isotopic peak grouping.

  *SE.EndCharge*: The maximum charge state for MS1 precursor ion to be detected during isotopic peak grouping.  Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.

  *SE.MS2StartCharge*: The minimum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping.

  *SE.MS2EndCharge*: The maximum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping.  Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.

**DIA isolation window settings**:

  *WindowType*: DIA experiment type. DIA is implemented differently by different vendors and current support for data-formats is lacking, so the program needs additional info to properly interpret input spectral data.  Supported values in this version:

    * SWATH - fixed window size SWATH, as described in the original SWATH paper. If you're using this option, it's mandatory to specify WindowSize option as well.

    * V_SWATH - variable window size SWATH. If you're using this option, it's mandatory to specify Variable SWATH window setting (see section below).

    * MSX - 2Da isolation window, its position is shuffled randomly until the whole MS1 range is covered, the process is then repeated but coverage of MS1 range by isolation windows will be different because of randomization.

    * MSE - as originally implemented in Waters instruments. The full MS1 range is being fragmented at once.

  *WindowSize*: Isolation window size setting for fixed window SWATH. (Please skip this part if the data is from Thermo instrument) Note: The window size is to be specified including overlapping regions. I.e. if your windows are: 399.5-425.5, 424.5 - 450.5, etc., then the window size should be set to 26.  Note: Was tested only on AB SCIEX TripleTOF 5600 and Thermo Q-Exactive and Fusion data.  

  Variable SWATH window setting: Isolation settings for variable window size SWATH. (Please skip this part if the data is from Thermo instrument). The format should be a tab-delimited list of m/z low and high values, one window per row. 


**Output files of DIA-Umpire signal extraction module**:
========================================================

  1. *DIA_Umpire_SE MGFs* - Three .mgf files per input .mzXML file - pseudo MS/MS spectra sets for different quality categories of detected precursor signals (see the Online Methods of the publication for details). These can be either individual history items or a dataset collection.  Example:

    1. <filename>_Q1.mgf 
    2. <filename>_Q2.mgf
    3. <filename>_Q3.mgf

    Note: Each file corresponds to a different "quality level" of precursor ions (Q1= More than two isotopic peaks detected in MS1, Q2 = only two isotopic peak detected, Q3 = detected unfragmented precursor in MS2). These spectra are written to separate files, because they must be searched separately against a protein database as a consequence of differences in FDR estimates for these varying quality data.

  2. *DIA_Umpire_SE Signal Extraction data* - includes the binary files (.ser) containing contain all necessary information for quantitation procedures (parameter settings, all detected precursor and fragment peaks, precursor-fragment grouping information).  

  3. If ExportPrecursorPeak and/or ExportFragmentPeak options were set to true, text files with detailed information about detected MS1 and/or MS2 features will be generated.


]]>
    </help>
    <expand macro="citations" />
</tool>