diff msconvert_macros.xml @ 6:6b6bba73eadb draft

planemo upload commit d56659dd48f8c554a832787e71aca6ae65c90848
author galaxyp
date Tue, 14 Mar 2017 16:52:39 -0400
parents 637e309295cf
children e638f7fad66a
line wrap: on
line diff
--- a/msconvert_macros.xml	Mon Dec 12 17:07:11 2016 -0500
+++ b/msconvert_macros.xml	Tue Mar 14 16:52:39 2017 -0400
@@ -1,26 +1,50 @@
 <macros>
+  <xml name="generic_requirements">
+    <requirements>
+      <requirement type="package" version="3.0.9992">proteowizard</requirement>
+    </requirements>
+  </xml>
+  <token name="@VERSION@">3.0.9992</token>
+  
   <xml name="msconvertCommand">
-    <command>
+    <command interpreter="python">
 <![CDATA[
-    #set $output_type_str = str($output_type)
-    #set $input_name = $input.display_name
-    #set $output_name = $input_name.split(".")[0] + "." + $output_type_str
-    ln -s '$input' '${input_name}' &&
+    #import re
+    #set $ext = $input.ext
 
-    #if $data_processing.precursor_refinement.use_mzrefinement
-      #set $input_ident_name = $data_processing.precursor_refinement.input_ident.display_name
-      ln -s '${data_processing.precursor_refinement.input_ident}' '${input_ident_name}' &&
+    ## sanitize display name for use as temp filename
+    #set basename = 'pwiz_in'
+    
+    #if hasattr($input, 'display_name')
+      ## explicit inclusion or exclusion ??
+      #set basename = $re.sub(r'[^\w\.\-\+]','_',$input.display_name)
+      ##set basename = $re.sub(r'[\/\\\;\|\&\>\<]','_',$input.display_name)
     #end if
 
-    mkdir outdir &&
-    msconvert ${input_name} --$output_type_str -o outdir --outfile '$output_name'
+    msconvert_wrapper.py
+
+    #if $ext == 'wiff':
+      --input='${input.extra_files_path}/wiff'
+      --input_name='${basename}.wiff'
+      --implicit='${input.extra_files_path}/wiff_scan'
+      --input='${input.extra_files_path}/wiff_scan'
+      --input_name='${basename}.wiff.scan'
+    #else
+      --input='${input}'
+      --input_name='$basename'
+    #end if
+    --output='${output}'
+    ## BEGIN_VERSION_DEFAULT
+    --fromextension=$ext
+    ## END_VERSION_DEFAULT
+    --toextension=${output_type}
 
     ## DATA PROCESSING FILTERS (NOTE: FOR VENDOR METHOD TO WORK, PEAK PICKING MUST BE THE FIRST FILTER!)
-      #if $data_processing.peak_picking.pick_peaks
-      --filter "peakPicking $data_processing.peak_picking.pick_peaks_algorithm msLevel=$data_processing.peak_picking.pick_peaks_ms_levels"
-      #end if
-
       #if $data_processing.precursor_refinement.use_mzrefinement
+        #set input_ident_name = ".".join(($basename, $data_processing.precursor_refinement.input_ident.ext))
+      --ident='$data_processing.precursor_refinement.input_ident'
+      --ident_name='$input_ident_name'
+      --refinement='$output_refinement'
       --filter "mzRefiner $input_ident_name
         msLevels=$data_processing.precursor_refinement.precursor_refinement_ms_levels
         thresholdScore=$data_processing.precursor_refinement.thresholdScore
@@ -29,6 +53,10 @@
         maxSteps=$data_processing.precursor_refinement.thresholdMaxSteps"
       #end if
 
+      #if $data_processing.peak_picking.pick_peaks
+        --filter "peakPicking $data_processing.peak_picking.pick_peaks_algorithm msLevel=$data_processing.peak_picking.pick_peaks_ms_levels"
+      #end if
+
       #if str($data_processing.charge_state_calculation.charge_state_calculation_method) == "predictor"
       --filter "chargeStatePredictor
         overrideExistingCharge=$data_processing.charge_state_calculation.predictor_overrideExistingCharge
@@ -48,27 +76,27 @@
       #end if
 
       #for threshold_entry in $data_processing.thresholds
-      --filter "threshold $threshold_entry.threshold_type $threshold_entry.value $threshold_entry.orientation"
+        --filter "threshold $threshold_entry.threshold_type $threshold_entry.value $threshold_entry.orientation"
       #end for
 
       #if $data_processing.filter_mz_windows.do_mzwindow_filter
-      --filter "mzWindow [$data_processing.filter_mz_windows.mz_window_from,$data_processing.filter_mz_windows.mz_window_to]"
+        --filter "mzWindow [$data_processing.filter_mz_windows.mz_window_from,$data_processing.filter_mz_windows.mz_window_to]"
       #end if
 
       #if $data_processing.etd_filtering.do_etd_filtering
-      --filter "ETDFilter $data_processing.etd_filtering.remove_precursor
-        $data_processing.etd_filtering.remove_charge_reduced
-        $data_processing.etd_filtering.remove_neutral_loss
-        $data_processing.etd_filtering.blanket_removal
-        $data_processing.etd_filtering.matching_tolerance $data_processing.etd_filtering.matching_tolerance_units"
+        --filter "ETDFilter $data_processing.etd_filtering.remove_precursor
+          $data_processing.etd_filtering.remove_charge_reduced
+          $data_processing.etd_filtering.remove_neutral_loss
+          $data_processing.etd_filtering.blanket_removal
+          $data_processing.etd_filtering.matching_tolerance $data_processing.etd_filtering.matching_tolerance_units"
       #end if
 
       #if $data_processing.ms2denoise.denoise
-      --filter "MS2Denoise $data_processing.ms2denoise.num_peaks $data_processing.ms2denoise.window_width $data_processing.ms2denoise.relax"
+        --filter "MS2Denoise $data_processing.ms2denoise.num_peaks $data_processing.ms2denoise.window_width $data_processing.ms2denoise.relax"
       #end if
 
       #if str($data_processing.ms2deisotope) == "true"
-      --filter "MS2Deisotope"
+        --filter "MS2Deisotope"
       #end if
 
 
@@ -78,38 +106,39 @@
       #end if
 
       #if len($filtering.indices) > 0
-      --filter "index
-      #for $index in $filtering.indices
-      [${index.from},${index.to}]
-      #end for
-      "
+        --filter "index
+        #for $index in $filtering.indices
+          [${index.from},${index.to}]
+        #end for
+        "
       #end if
 
       #if len($filtering.scan_numbers) > 0
-      --filter "scanNumber
-      #for $scan_number in $filtering.scan_numbers
-      [${scan_number.from},${scan_number.to}]
-      #end for
-      "
+        --filter "scanNumber
+        #for $scan_number in $filtering.scan_numbers
+          [${scan_number.from},${scan_number.to}]
+        #end for
+        "
       #end if
 
       #if $filtering.strip_it.value
-      --filter "stripIT"
+         --filter "stripIT"
       #end if
 
       #if $filtering.filter_ms_levels.do_ms_level_filter
-      --filter "msLevel [$filtering.filter_ms_levels.ms_level_from, $filtering.filter_ms_levels.ms_level_to]"
+        --filter "msLevel [$filtering.filter_ms_levels.ms_level_from, $filtering.filter_ms_levels.ms_level_to]"
       #end if
 
       #if str($filtering.polarity) != "false"
-      --filter "polarity $filtering.polarity"
+        --filter "polarity $filtering.polarity"
       #end if
 
       #if str($filtering.analyzer) != "false"
-      --filter "analyzer $filtering.analyzer"
+        --filter "analyzer $filtering.analyzer"
       #end if
       
     ## OUTPUT ENCODING
+      -- 
       #set $mz_encoding = str($settings.mz_encoding)
       #set $intensity_encoding = str($settings.intensity_encoding)
       #if $mz_encoding == $intensity_encoding
@@ -125,31 +154,23 @@
 
       #set binary_compression = str($settings.binary_compression)
       #if $binary_compression == "zlib"
-      --zlib
+        --zlib
       #else if $binary_compression == "numpressLinearPic"
-      --numpressLinear --numpressPic
+        --numpressLinear --numpressPic
       #else if $binary_compression == "numpressLinearSlof"
-      --numpressLinear --numpressSlof
+        --numpressLinear --numpressSlof
       #else if $binary_compression == "numpressLinear"
-      --numpressLinear
+        --numpressLinear
       #else if $binary_compression == "numpressPic"
-      --numpressPic
+        --numpressPic
       #else if $binary_compression == "numpressSlof"
-      --numpressSlof
+        --numpressSlof
       #end if
 
       #if $settings.gzip_compression
-      --gzip
+        --gzip
       #end if
 
-      &&
-
-    #if $data_processing.precursor_refinement.use_mzrefinement
-      mv '${input.name.rsplit('.',1)[0]}.mzRefinement.tsv' output.refinement &&
-    #end if
-
-    mv 'outdir/$output_name' output &&
-    rmdir outdir
 ]]>
     </command>
   </xml>
@@ -187,8 +208,18 @@
         <when value="false"></when>
         <when value="true">
           <param name="input_ident" type="data" format="pepxml,mzid" label="MZRefinery - Input identification data" />
-          <param name="thresholdScore" type="text" value="mvh" label="MZRefinery - Threshold Score Name" help="E.g. 'mvh' for MyriMatch, 'xcorr' for Sequest, 'specevalue' for MS-GF+" />
-          <param name="thresholdValue" type="text" value="50-" label="MZRefinery - Threshold Score Value" help="MZRefinery uses peptide-spectrum-matches with scores from this range to build its model. '100-' means score equal to or greater than 100. '-1e-10' means less than or equal to 1e-10." />
+          <param name="thresholdScore" type="text" value="mvh" label="MZRefinery - Threshold Score Name" help="E.g. 'mvh' for MyriMatch, 'xcorr' for Sequest, 'specevalue' for MS-GF+">
+            <sanitizer>
+              <valid initial="string.letters" />
+            </sanitizer>
+          </param>
+          <param name="thresholdValue" type="text" value="50-" label="MZRefinery - Threshold Score Value" help="MZRefinery uses peptide-spectrum-matches with scores from this range to build its model. '100-' means score equal to or greater than 100. '-1e-10' means less than or equal to 1e-10.">
+            <sanitizer>
+              <valid initial="string.letters,string.digits">
+                <add value="-" />
+              </valid>
+            </sanitizer>
+          </param>
           <param name="thresholdStep" type="float" value="0" label="MZRefinery - Threshold Score Step" help="If there are not enough quality hits at the given score threshold value, the threshold can be increased by this step (until maxSteps is reached)." />
           <param name="thresholdMaxSteps" type="integer" value="0" label="MZRefinery - At most, how many steps to widen the threshold?" />
           <param name="precursor_refinement_ms_levels" type="select" label="MZRefinery - Apply to MS Levels">
@@ -301,14 +332,14 @@
         <option value="CID">CID</option>
         <option value="SA">SA</option>
         <option value="HCD">HCD</option>
-        <option>BIRD</option>
-        <option>ECD</option>
-        <option>IRMPD</option>
-        <option>PD</option>
-        <option>PSD</option>
-        <option>PQD</option>
-        <option>SID</option>
-        <option>SORI</option>
+        <option value="BIRD">BIRD</option>
+        <option value="ECD">ECD</option>
+        <option value="IRMPD">IRMPD</option>
+        <option value="PD">PD</option>
+        <option value="PSD">PSD</option>
+        <option value="PQD">PQD</option>
+        <option value="SID">SID</option>
+        <option value="SORI">SORI</option>
       </param>
 
       <repeat name="indices" title="Filter Scan Indices">
@@ -373,7 +404,7 @@
   
   <xml name="msconvertOutput">
     <outputs>
-      <data format="mzml" name="output" from_work_dir="output" label="${input.name.rsplit('.',1)[0]}.${output_type}" >
+      <data format="mzml" name="output" label="${input.name.rsplit('.',1)[0]}.${output_type}" >
         <change_format>
           <when input="output_type" value="mz5" format="mz5" />
           <when input="output_type" value="mzXML" format="mzxml" />
@@ -381,9 +412,274 @@
           <when input="output_type" value="mgf" format="mgf" />
         </change_format>
       </data>
-      <data format="csv" name="output.refinement" from_work_dir="output.refinement" label="${input.name.rsplit('.',1)[0]}.mzRefinement.tsv">
+      <data format="csv" name="output_refinement" label="${input.name.rsplit('.',1)[0]}.mzRefinement.tsv">
         <filter>data_processing['precursor_refinement']['use_mzrefinement'] == True</filter>
       </data>
     </outputs>
   </xml>
+
+
+  <xml name="msconvert_tests">
+    <test>
+      <param name="input" value="small.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="pick_peaks" value="true" />
+      <param name="pick_peaks_algorithm" value="cwt" />
+      <param name="pick_peaks_ms_levels" value="1-" />
+      <output name="output" file="small-peakpicking-cwt-allMS.mzML" />
+    </test>
+
+    <!-- this data file only has profile MS1, so the result is the same -->
+    <test>
+      <param name="input" value="small.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="pick_peaks" value="true" />
+      <param name="pick_peaks_algorithm" value="cwt" />
+      <param name="pick_peaks_ms_levels" value="1" />
+      <output name="output" file="small-peakpicking-cwt-allMS.mzML" /> 
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mz5" />
+      <param name="mz_encoding" value="64" />
+      <param name="intensity_encoding" value="64" />
+      <output name="output" file="small-zlib-64.mz5" compare="sim_size" delta="150000" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzXML" />
+      <param name="mz_encoding" value="32" />
+      <param name="intensity_encoding" value="32" />
+      <output name="output" file="small-zlib-32.mzXML" />
+    </test>
+    <!-- TODO: how to test gzipped output?
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzXML" />
+      <param name="mz_encoding" value="32" />
+      <param name="intensity_encoding" value="32" />
+      <param name="binary_compression" value="false" />
+      <param name="gzip_compression" value="true" />
+      <output name="output" file="small-off-32.mzXML.gz" compare="sim_size" delta="100" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="mz_encoding" value="32" />
+      <param name="intensity_encoding" value="32" />
+      <param name="binary_compression" value="false" />
+      <param name="gzip_compression" value="true" />
+      <output name="output" file="small-off-32.mzML.gz" compare="sim_size" delta="100" />
+    </test>-->
+
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-numpressLP.mzML" />
+    </test>
+
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressLinearSlof" />
+      <output name="output" file="small-numpressLS.mzML" />
+    </test>
+
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressLinear" />
+      <output name="output" file="small-numpressL.mzML" />
+    </test>
+
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressPic" />
+      <output name="output" file="small-numpressP.mzML" />
+    </test>
+
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressSlof" />
+      <output name="output" file="small-numpressS.mzML" />
+    </test>
+
+    <test>
+      <param name="input" value="Rpal_01.mz5" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <param name="use_mzrefinement" value="true" />
+      <param name="input_ident" value="Rpal_01.pepXML" />
+      <param name="thresholdScore" value="mvh" />
+      <param name="thresholdValue" value="40-" />
+      <output name="output" file="Rpal_01-mzRefinement.mzML" compare="sim_size" delta="0" />
+      <output name="output_refinement" file="Rpal_01.pepXML.mzRefinement.tsv" />
+    </test>
+    
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <param name="charge_state_calculation_method" value="predictor" />
+      <param name="predictor_overrideExistingCharge" value="true" />
+      <param name="minMultipleCharge" value="2" />
+      <param name="maxMultipleCharge" value="5" />
+      <param name="singleChargeFractionTIC" value="0.95" />
+      <param name="maxKnownCharge" value="8" />
+      <output name="output" file="small-chargeStatePredictor.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <param name="charge_state_calculation_method" value="turbocharger" />
+      <param name="minCharge" value="1" />
+      <param name="maxCharge" value="5" />
+      <param name="precursorsBefore" value="1" />
+      <param name="precursorsAfter" value="1" />
+      <param name="halfIsoWidth" value="1.5" />
+      <param name="defaultMinCharge" value="1" />
+      <param name="defaultMaxCharge" value="5" />
+      <output name="output" file="small-turbocharger.mzML" />
+    </test>
+    <test>
+      <param name="input" value="D100930_yeast_SCX10S_rak_ft8E_pc_01.mz5" />
+      <param name="output_type" value="mzML" />
+      <param name="do_etd_filtering" value="true" />
+      <param name="remove_precursor" value="true" />
+      <param name="remove_charge_reduced" value="true" />
+      <param name="remove_neutral_loss" value="false" />
+      <param name="blanket_removal" value="false" />
+      <param name="matching_tolerance" value="50" />
+      <param name="matching_tolerance_units" value="ppm" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="D100930_yeast_SCX10S_rak_ft8E_pc_01-etdfilter.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="thresholds_0|threshold_type" value="count" />
+      <param name="thresholds_0|value" value="100" />
+      <param name="thresholds_0|orientation" value="most-intense" />
+      <param name="thresholds_1|threshold_type" value="absolute" />
+      <param name="thresholds_1|value" value="1" />
+      <param name="thresholds_1|orientation" value="most-intense" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-threshold.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="do_mzwindow_filter" value="true" />
+      <param name="mz_window_from" value="420" />
+      <param name="mz_window_to" value="840" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-mzWindow.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="denoise" value="true" />
+      <param name="num_peaks" value="10" />
+      <param name="window_width" value="40" />
+      <param name="relax" value="false" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-denoise.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="ms2deisotope" value="true" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-deisotope.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="activation" value="CID" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-activation.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="indices_0|from" value="2" />
+      <param name="indices_0|to" value="4" />
+      <param name="indices_1|from" value="10" />
+      <param name="indices_1|to" value="10" />
+      <param name="indices_2|from" value="13" />
+      <param name="indices_2|to" value="15" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-index-filter.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="strip_it" value="true" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-strip-it.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="do_ms_level_filter" value="true" />
+      <param name="ms_level_from" value="2" />
+      <param name="ms_level_to" value="2" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-ms-level-filter.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="polarity" value="positive" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-polarity-filter.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="analyzer" value="IT" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-analyzer-filter.mzML" />
+    </test>
+    <test>
+      <param name="input" value="small-peakpicking-cwt-allMS.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="scan_numbers_0|from" value="3" />
+      <param name="scan_numbers_0|to" value="5" />
+      <param name="scan_numbers_1|from" value="11" />
+      <param name="scan_numbers_1|to" value="11" />
+      <param name="scan_numbers_2|from" value="14" />
+      <param name="scan_numbers_2|to" value="16" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-index-filter.mzML" /> <!-- the scan numbers here produce the same output as the index test above -->
+    </test>
+    <!--<test>
+      <param name="input" value="small.mzML" />
+      <param name="output_type" value="mzML" />
+      <param name="binary_compression" value="numpressLinearPic" />
+      <output name="output" file="small-deisotope-poisson.mzML" />
+    </test>-->
+  </xml>
+  <xml name="msconvert_help">
+**What it does**
+
+Allows interconversion within various mass spectrometry peak list formats. Additional options such as filtering and/or precursor recalculation are available.
+
+You can view the original documentation here_.
+
+.. _here: http://proteowizard.sourceforge.net/tools/msconvert.html
+  </xml>
+
+  <xml name="citations">
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btn323</citation>
+        <citation type="bibtex">@misc{toolsGalaxyP, author = {Chilton, J, Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
+                                      year = {2015}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
+    </citations>
+  </xml>
+
 </macros>