diff spectrast_search.xml @ 1:390a3c4a7f6b draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/spectrast commit 301559f22171a8644b5be6cefc2a26cd477b6a7f-dirty
author jjohnson
date Mon, 18 Jun 2018 10:08:02 -0400
parents
children e67b0cc10377
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spectrast_search.xml	Mon Jun 18 10:08:02 2018 -0400
@@ -0,0 +1,426 @@
+<tool id="specrast_search" name="SpectraST Search" version="@VERSION@.0">
+    <description>with Spectral Library</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        ## Need to symlink to data using name with extension that spectrast recognizes 
+        #import re
+        ## spectrum datasets
+        #set $exts = {'mgf':'MGF', 'mzml':'mzML', 'mzxml':'mzXML', 'msp': 'msp', 'mzdata':'mzData'}
+        #set $input_files = []
+        #def ln_scan($sf):
+            #set $ext = $exts[$sf.extension.lower()]
+            #set $input_name = $re.sub('[.]([^.]*)$','$ext',$sf.display_name.split('/')[-1]) 
+            #silent $input_files.append($input_name)
+            ln -s -f '${sf}' '${input_name}' &&
+            #set $encoded_id = $__app__.security.encode_id($sf.id)
+            echo "Spectrums:${sf.display_name}(API:${encoded_id}) ";
+        #end def
+        #if $collection.collection_selector == 'dataset':
+            ln_scan($collection.spectrum_file)
+        #else:
+            #for $spectrum_file in $spectrum_files:
+                ln_scan($collection.spectrum_file)
+            #end for
+        #end if
+        python $__tool_directory__/spectrast_params.py 
+           #if def $spectrastParams:
+               '$spectrastParams'
+           #end if
+           $spectrast_params' -o spectrast.params &&
+        mkdir out &&
+        spectrast -sFspectrast.params -sL'${libraryFile.files_path}/library.splib' -sE $output_format
+        #echo ' '.join($input_files)
+        #if $collection.collection_selector == 'dataset':
+            && cp out/* $output
+        #end if
+    ]]></command>
+    <configfiles>
+        <configfile name="spectrast_params"><![CDATA[#slurp
+libraryFile = $libraryFile
+#if $databaseFile is not None:
+databaseFile = $databaseFile
+#end if
+#if $databaseType is not None:
+databaseType = $databaseType
+#end if
+#if $indexCacheAll is not None:
+indexCacheAll = $indexCacheAll
+#end if
+#if $filterSelectedListFileName is not None:
+filterSelectedListFileName = $filterSelectedListFileName
+#end if
+#if $candidate_selection_and_scoring.precursorMzTolerance is not None:
+precursorMzTolerance = $candidate_selection_and_scoring.precursorMzTolerance
+#end if
+#if $candidate_selection_and_scoring.precursorMzUseAverage is not None:
+precursorMzUseAverage = $candidate_selection_and_scoring.precursorMzUseAverage
+#end if
+#if $candidate_selection_and_scoring.searchAllCharges is not None:
+searchAllCharges = $candidate_selection_and_scoring.searchAllCharges
+#end if
+#if $candidate_selection_and_scoring.detectHomologs is not None:
+detectHomologs = $candidate_selection_and_scoring.detectHomologs
+#end if
+#if $candidate_selection_and_scoring.fvalFractionDelta is not None:
+fvalFractionDelta = $candidate_selection_and_scoring.fvalFractionDelta
+#end if
+#if $candidate_selection_and_scoring.useSp4Scoring is not None:
+useSp4Scoring = $candidate_selection_and_scoring.useSp4Scoring
+#end if
+#if $candidate_selection_and_scoring.fvalUseDotBias is not None:
+fvalUseDotBias = $candidate_selection_and_scoring.fvalUseDotBias
+#end if
+#if $candidate_selection_and_scoring.usePValue is not None:
+usePValue = $candidate_selection_and_scoring.usePValue
+#end if
+#if $candidate_selection_and_scoring.useTierwiseOpenModSearch is not None:
+useTierwiseOpenModSearch = $candidate_selection_and_scoring.useTierwiseOpenModSearch
+#end if
+#if $outputExtension is not None:
+outputExtension = $outputExtension
+#end if
+#if $outputDirectory is not None:
+outputDirectory = $outputDirectory
+#end if
+#if $output_and_display.hitListTopHitFvalThreshold is not None:
+hitListTopHitFvalThreshold = $output_and_display.hitListTopHitFvalThreshold
+#end if
+#if $output_and_display.hitListLowerHitsFvalThreshold is not None:
+hitListLowerHitsFvalThreshold = $output_and_display.hitListLowerHitsFvalThreshold
+#end if
+#if $output_and_display.hitListShowHomologs is not None:
+hitListShowHomologs = $output_and_display.hitListShowHomologs
+#end if
+#if $output_and_display.hitListShowMaxRank is not None:
+hitListShowMaxRank = $output_and_display.hitListShowMaxRank
+#end if
+#if $output_and_display.hitListOnlyTopHit is not None:
+hitListOnlyTopHit = $output_and_display.hitListOnlyTopHit
+#end if
+#if $output_and_display.hitListExcludeNoMatch is not None:
+hitListExcludeNoMatch = $output_and_display.hitListExcludeNoMatch
+#end if
+#if $output_and_display.enzymeForPepXMLOutput is not None:
+enzymeForPepXMLOutput = $output_and_display.enzymeForPepXMLOutput
+#end if
+#if $printFingerprintingSummary is not None:
+printFingerprintingSummary = $printFingerprintingSummary
+#end if
+#if $spectrum_filtering.filterMinPeakCount is not None:
+filterMinPeakCount = $spectrum_filtering.filterMinPeakCount
+#end if
+#if $spectrum_filtering.filterAllPeaksBelowMz is not None:
+filterAllPeaksBelowMz = $spectrum_filtering.filterAllPeaksBelowMz
+#end if
+#if $spectrum_filtering.filterMaxIntensityBelow is not None:
+filterMaxIntensityBelow = $spectrum_filtering.filterMaxIntensityBelow
+#end if
+#if $spectrum_filtering.filterMinMzRange is not None:
+filterMinMzRange = $spectrum_filtering.filterMinMzRange
+#end if
+#if $spectrum_filtering.filterCountPeakIntensityThreshold is not None:
+filterCountPeakIntensityThreshold = $spectrum_filtering.filterCountPeakIntensityThreshold
+#end if
+#if $spectrum_processing.filterRemovePeakIntensityThreshold is not None:
+filterRemovePeakIntensityThreshold = $spectrum_processing.filterRemovePeakIntensityThreshold
+#end if
+#if $spectrum_processing.filterMaxPeaksUsed is not None:
+filterMaxPeaksUsed = $spectrum_processing.filterMaxPeaksUsed
+#end if
+#if $spectrum_processing.filterMaxDynamicRange is not None:
+filterMaxDynamicRange = $spectrum_processing.filterMaxDynamicRange
+#end if
+#if $spectrum_processing.peakScalingMzPower is not None:
+peakScalingMzPower = $spectrum_processing.peakScalingMzPower
+#end if
+#if $spectrum_processing.peakScalingIntensityPower is not None:
+peakScalingIntensityPower = $spectrum_processing.peakScalingIntensityPower
+#end if
+#if $spectrum_processing.peakScalingUnassignedPeaks is not None:
+peakScalingUnassignedPeaks = $spectrum_processing.peakScalingUnassignedPeaks
+#end if
+#if $spectrum_processing.peakNoBinning is not None:
+peakNoBinning = $spectrum_processing.peakNoBinning
+#end if
+#if $spectrum_processing.peakBinningNumBinsPerMzUnit is not None:
+peakBinningNumBinsPerMzUnit = $spectrum_processing.peakBinningNumBinsPerMzUnit
+#end if
+#if $spectrum_processing.peakBinningFractionToNeighbor is not None:
+peakBinningFractionToNeighbor = $spectrum_processing.peakBinningFractionToNeighbor
+#end if
+#if $spectrum_processing.filterLibMaxPeaksUsed is not None:
+filterLibMaxPeaksUsed = $spectrum_processing.filterLibMaxPeaksUsed
+#end if
+#if $spectrum_processing.filterLightIonsMzThreshold is not None:
+filterLightIonsMzThreshold = $spectrum_processing.filterLightIonsMzThreshold
+#end if
+#if $spectrum_processing.filterITRAQReporterPeaks is not None:
+filterITRAQReporterPeaks = $spectrum_processing.filterITRAQReporterPeaks
+#end if
+#if $spectrum_processing.filterTMTReporterPeaks is not None:
+filterTMTReporterPeaks = $spectrum_processing.filterTMTReporterPeaks
+#end if
+]]>        
+        </configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="collection">
+            <param name="collection_selector" type="select" label="Process Spectrums Indivdiual or Collection ">
+                <option value="dataset" selected="true">Search Single Spectrum file and output results as a dataset</option>
+                <option value="collection">Search Multiple Spectrum files and output results as a collection</option>
+            </param>
+            <when value="dataset">
+                <param name="spectrum_file" type="data" format="mzxml,mzml,mgf,msp,mzdata" label="Spectrum file" help="mzxml,mzml,mgf,msp,mzdata"/>
+            </when>
+            <when value="collection">
+                <param name="spectrum_files" multiple="true" type="data" format="mzxml,mzml,mgf,msp,mzdata" label="Spectrum files" help="mzxml,mzml,mgf,msp,mzdata"/>
+            </when>
+        </conditional>
+
+        <param name="libraryFile" type="data" format="splib" label="Spectral library" help=""/>
+        <param name="spectrastParams" type="data" format="txt" optional="true" label="SpectraST param file" help=""/>
+        <section name="general" expanded="false" title="General Options">
+            <param name="databaseFile"  type="data" format="fasta" label="Fasta sequence database" optional="true" 
+                help="will not affect the search in any way, but this information will be included in the output for any downstream data processing."/>
+            <param name="databaseType"  type="select" label="Database type: Protein or DNA" >
+                <option value="AA" selected="true">protein AA</option>
+                <option value="DNA">genomic DNA</option>
+            </param>
+            <param name="indexCacheAll" type="boolean" label="indexCacheAll" help="Cache all entries in RAM. Requires a lot of memory (the library will usually be loaded almost in its entirety), but speeds up search for unsorted queries."/>
+        </section>
+        <section name="candidate_selection_and_scoring" expanded="false" title="Candidate Selection and Scoring Options">
+            <param name="precursorMzTolerance" type="float" value="" optional="true" 
+                label="precursorMzTolerance" 
+                help="Specify precursor m/z tolerance in Th. Monoisotopic mass is assumed. Default is 3.0"/>
+            <param name="precursorMzUseAverage"  type="select" optional="true" label="precursorMzUseAverage Default: false" >
+                <help> Report average mass instead of monoisotopic mass in search results. 
+                       Precursor m/z window is expanded to account for difference between average and monoisotopic mass.
+                </help>
+                <option value="false">false</option>
+                <option value="true">true</option>
+            </param>
+            <param name="searchAllCharges"  type="select" optional="true" label="searchAllCharges Default: false" >
+                <help> Report average mass instead of monoisotopic mass in search results. 
+                       Precursor m/z window is expanded to account for difference between average and monoisotopic mass.
+                </help>
+                <option value="false">false</option>
+                <option value="true">true</option>
+            </param>
+            <param name="detectHomologs" type="integer" value="" optional="true" 
+                label="detectHomologs" 
+                help="Detect homologous lower hits up to this rank. Looks for lower hits homologous to the top hit and adjust delta accordingly. Default is 4"/>
+            <param name="fvalFractionDelta" type="float" value="" optional="true" 
+                label="fvalFractionDelta" 
+                help="Specify the fraction of the normalized delta score (delta/dot) in the F-value formula.  Default is 0.4"/>
+            <param name="useSp4Scoring"  type="select" optional="true" label="useSp4Scoring (SP4) Default: false" >
+                <help> Use original SpectraST (4.0 or earlier) scoring, based on dot products of square-root intensities.
+                </help>
+                <option value="false">false</option>
+                <option value="true">true</option>
+            </param>
+            <param name="fvalUseDotBias" type="select" optional="true" label="fvalUseDotBias (Only applicable for SP4 scoring) Default: true" >
+                <help> Use dot bias to penalize high-scoring matches with massive noise and/or dominant peak.
+                </help>
+                <option value="true">true</option>
+                <option value="false">false</option>
+            </param>
+            <param name="usePValue" type="select" optional="true" label="usePValue (NOT applicable for SP4 scoring) Default: false" >
+                <help> Use dot bias to penalize high-scoring matches with massive noise and/or dominant peak.
+                </help>
+                <option value="false">false</option>
+                <option value="true">true</option>
+            </param>
+            <param name="useTierwiseOpenModSearch" type="select" optional="true" label="useTierwiseOpenModSearch (NOT applicable for SP4 scoring) Default: false" >
+                <help> Perform tier-wise open modification search for modifications within precursor m/z window specified.
+                       Note that the scoring is different from normal SpectraST searches.
+                </help>
+                <option value="false">false</option>
+                <option value="true">true</option>
+            </param>
+        </section>
+        <section name="output_and_display" expanded="false" title="Output and Display Options">
+            <param name="hitListTopHitFvalThreshold" type="float" value="" optional="true" 
+                label="hitListTopHitFvalThreshold" 
+                help="Minimum F value threshold for the top hit. Only top hits having F value greater than this will be printed.  Default is 0.0 (all top hits)"/>
+            <param name="hitListLowerHitsFvalThreshold" type="float" value="" optional="true" 
+                label="hitListLowerHitsFvalThreshold" 
+                help="Minimum F value threshold for the lower hits. Only lower hits having F value greater than this will be printed. Default=0.45"/>
+            <param name="hitListShowHomologs" type="select" optional="true" label="hitListShowHomologs  Default: true" >
+                <help> Always displays homologous lower hits regardless of F value.
+                </help>
+                <option value="true">true</option>
+                <option value="false">false</option>
+            </param>
+            <param name="hitListShowMaxRank" type="integer" value="" optional="true" min="1"
+                label="hitListShowMaxRank" 
+                help="Maximum rank for hits shown for each query. Default is 1 (Just the top hit)"/>
+            <param name="hitListOnlyTopHit" type="select" optional="true" label="hitListOnlyTopHit  Default: true" >
+                <help> Only display the top hit for each query.
+                </help>
+                <option value="true">true</option>
+                <option value="false">false</option>
+            </param>
+            <param name="hitListExcludeNoMatch" type="select" optional="true" label="hitListExcludeNoMatch  Default: true" >
+                <help> Do not display queries for which there is no candidate, or the top hit is below the minimum F value threshold specified.
+                </help>
+                <option value="true">true</option>
+                <option value="false">false</option>
+            </param>
+            <param name="enzymeForPepXMLOutput" type="text" value="trypsin" optional="true" label="enzymeForPepXMLOutput">
+                <help>Specify the proteolytic enzyme used, for the purpose of pepXML output. 
+                </help>
+                <option value="trypsin">trypsin</option>
+                <option value="lysc">lysc</option>
+            </param>
+        </section>
+        <section name="spectrum_filtering" expanded="false" title="Spectrum Filtering Options">
+            <param name="filterMinPeakCount" type="integer" value="" optional="true" min="1" 
+                label="filterMinPeakCount" 
+                help="Discard query spectra with fewer than peaks above set threshold.  Default is 10"/>
+            <param name="filterAllPeaksBelowMz" type="integer" value="" optional="true" min="1" 
+                label="filterAllPeaksBelowMz" 
+                help="Discard query spectra with almost no peaks above specified m/z value. All query spectra with 95%+ of the total intensity below m/z will be removed.  Default is 520"/>
+            <param name="filterMaxIntensityBelow" type="integer" value="" optional="true" min="0" 
+                label="filterMinPeakCount" 
+                help="Discard query spectra with no peaks with intensity above this.  Default is 0"/>
+            <param name="filterMinMzRange" type="integer" value="" optional="true" min="0" 
+                label="filterMinMzRange" 
+                help="Discard query spectra with m/z range narrower than this.  Default 350"/>
+            <param name="filterCountPeakIntensityThreshold" type="float" value="" optional="true" 
+                label="filterCountPeakIntensityThreshold" 
+                help="Minimum peak intensity for peaks to be counted. Only peaks with intensity above this will be counted to meet the requirement for minimum number of peaks.  Default is 2.01"/>
+        </section>
+        <section name="spectrum_processing" expanded="false" title="Spectrum Processing Options">
+            <param name="filterRemovePeakIntensityThreshold" type="float" value="" optional="true" 
+                label="filterRemovePeakIntensityThreshold" 
+                help="Noise peak threshold. All peaks with intensities below this will be zeroed.  Default is 2.01"/>
+            <param name="filterMaxPeaksUsed" type="integer" value="" optional="true" min="1" 
+                label="filterMaxPeaksUsed" 
+                help="Remove all but the top number peaks in query spectra.  Default 150"/>
+            <param name="filterMaxDynamicRange" type="integer" value="" optional="true" min="1" 
+                label="filterMaxDynamicRange" 
+                help="Remove all peaks smaller than 1/number of the base (highest) peak in query spectra.  Default 1000"/>
+
+            <param name="peakScalingMzPower" type="float" value="" optional="true" 
+                label="peakScalingMzPower" 
+                help="Intensity scaling power with respect to the m/z value and the raw intensity. The scaled intensity will be (m/z)^mzpow * (raw intensity)^intpow  Default is 2.01"/>
+            <param name="peakScalingIntensityPower" type="float" value="" optional="true" 
+                label="peakScalingIntensityPower" 
+                help="Intensity scaling power with respect to the m/z value and the raw intensity. The scaled intensity will be (m/z)^mzpow * (raw intensity)^intpow  Default is 2.01"/>
+
+            <param name="peakScalingUnassignedPeaks" type="float" value="" optional="true"  min="0.0"
+                label="peakScalingUnassignedPeaks" 
+                help="Scaling factor for unassigned peaks in library spectra. Default is 1.0"/>
+            <param name="peakNoBinning" type="select" optional="true" label="peakNoBinning  Default: false" >
+                <help> Only display the top hit for each query.
+                </help>
+                <option value="true">true - Disable binning and perform peak-to-peak matching.</option>
+                <option value="false">false - Use binning</option>
+            </param>
+            <param name="peakBinningNumBinsPerMzUnit" type="integer" value="" optional="true" min="1" 
+                label="peakBinningNumBinsPerMzUnit" 
+                help="Number of bins per Th. Default 1"/>
+            <param name="peakBinningFractionToNeighbor" type="float" value="" optional="true" 
+                label="peakBinningFractionToNeighbor" 
+                help="Fraction of the scaled intensity assigned to neighboring bins.  Default is 0.5"/>
+            <param name="filterLibMaxPeaksUsed" type="integer" value="" optional="true" min="1" 
+                label="filterLibMaxPeaksUsed" 
+                help="Remove all but the top peaks in the LIBRARY spectra.  Default 50"/>
+            <param name="filterLightIonsMzThreshold" type="integer" value="" optional="true" min="1" 
+                label="filterLightIonsMzThreshold" 
+                help="Remove all light ions with m/z lower than this Th for both library and query spectra. Default 180"/>
+            <param name="filterITRAQReporterPeaks" type="select" optional="true" label="filterITRAQReporterPeaks  Default: false" >
+                <help> Remove iTRAQ reporter peaks in the range 112-122 Th.
+                </help>
+                <option value="true">true - Remove iTRAQ reporter peaks</option>
+                <option value="false">false - Use binning</option>
+            </param>
+            <param name="filterTMTReporterPeaks" type="select" optional="true" label="filterTMTReporterPeaks  Default: false" >
+                <help> Remove TMT reporter peaks in the range 126-132 Th. 
+                </help>
+                <option value="true">true - Remove TMT reporter peaks</option>
+                <option value="false">false</option>
+            </param>
+        </section>
+        
+        <param name="output_format" type="select" label="Output format">
+            <option value="pep.xml">pep.xml</option>
+            <option value="xls">xls (tabular)</option>
+            <option value="txt">txt (fixed width text)</option>
+        </param>
+        <param name="printFingerprintingSummary" type="select" optional="true" label="printFingerprintingSummary Default: no" >
+            <help> Print a text file summarizing fingerprinting results.
+            </help>
+            <option value="fingerprint.summary">yes</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="log" format="txt" label="spectrast.log" from_work_dir="spectrast.log"/>
+        <data name="params" format="txt" label="spectrast.params" from_work_dir="spectrast.params"/>
+        <data name="fingerprint_summary" format="txt" label="fingerprint.summary" from_work_dir="fingerprint.summary">
+            <filter>printFingerprintingSummary and printFingerprintingSummary == 'fingerprint.summary'</filter>
+        </data>
+        <data name="output" format="pep.xml" label="${tool.name} on ${on_string}: ${output_format}">
+            <change_format>
+                <when input="output_format" value="xls" format="tabular" />
+                <when input="output_format" value="txt" format="txt" />
+            </change_format>
+            <actions>
+                <conditional name="output_format">
+                   <when value="xls">
+                       <action name="column_names" type="metadata" default="Geneid,${','.join([a.name for a in $input_files])}" />
+                   </when>
+                </conditional>
+            </actions>
+        </data>
+        <collection name="output_set" type="list" label="${tool.name} on ${on_string}: ${output_format}">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.xls" ext="tabular" directory="out" visible="true" />
+            <discover_datasets pattern="__designation_and_ext__" directory="out" visible="true" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="spectrum_files" value="msgf-test.mzML" ftype="mzml" />
+            <param name="libraryFile" value="splib.html" ftype="splib" >
+                <composite_data value="splib/library.splib"/>
+                <composite_data value="splib/library.spidx"/>
+                <composite_data value="splib/library.pepidx"/>
+            </param>
+            <param name="output_format" value="xls"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="FKWNGTDTNSAAEK" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**SpectraST Search**
+
+SpectraST_ can perform spectral searching from the following data formats:
+
+    .mzML format
+    .mzXML (all versions) format
+    .mzData format
+    .mgf (Mascot Generic) format
+    .dta (SEQUEST) format, a simple peak list preceded by precursor information
+    NIST (National Institute of Standards and Technology)'s .msp format 
+
+The spectral library must be in SpectraST's .splib format, 
+which can be created in SpectraST Create 
+or imported from .msp format which can be generated by BiblioSpec_.
+
+The results can be outputted to the following formats:
+    .pepXML format
+    .txt format, a fixed-width column text format
+    .xls format, a tab-delimited column text format
+
+.. _SpectraST: http://tools.proteomecenter.org/wiki/index.php?title=Software:SpectraST#SpectraST_Create_Mode
+.. _BiblioSpec: https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=default
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>