Mercurial > repos > jjohnson > spectrast

<tool id="specrast_search" name="SpectraST Search" version="@VERSION@.0">
    <description>with Spectral Library</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
    </expand>
    <command detect_errors="exit_code"><![CDATA[
        ## Need to symlink to data using name with extension that spectrast recognizes
        #import re
        ## spectrum datasets
        #set global $exts = {'mgf':'MGF', 'mzml':'mzML', 'mzxml':'mzXML', 'msp': 'msp', 'mzdata':'mzData'}
        #set global $input_files = []
        #def ln_scan($sf):
            #set $ext = '.' + $exts[$sf.extension.lower()]
            #set $input_name = $re.sub('[.]([^.]*)$',$ext,$sf.display_name.split('/')[-1])
            #silent $input_files.append($input_name)
            ln -s -f '${sf}' '${input_name}' &&
            #set $encoded_id = $__app__.security.encode_id($sf.id)
            echo "Spectrums:${sf.display_name}(API:${encoded_id}) ";
        #end def
        #if $collection.collection_selector == 'dataset':
            $ln_scan($collection.spectrum_file)
        #else:
            #for $spectrum_file in $collection.spectrum_files:
                $ln_scan($spectrum_file)
            #end for
        #end if
        python $__tool_directory__/spectrast_params.py
           --mode=search
           #if $spectrastParams:
               '$spectrastParams'
           #end if
           '$spectrast_params' -o spectrast.params &&
        mkdir out &&
        spectrast -sFspectrast.params -sL'${libraryFile.extra_files_path}/library.splib' -sO out -sE $outputExtension
        #echo ' '.join($input_files)
        && ls -l out
        #if $collection.collection_selector == 'dataset':
            && cp out/* $output
        #end if
    ]]></command>
    <configfiles>
        <configfile name="spectrast_params"><![CDATA[#slurp
libraryFile = ${libraryFile.extra_files_path}/library.splib
#if str($general.databaseFile) != 'None':
databaseFile = $general.databaseFile
#if $general.databaseType is not None:
databaseType = $general.databaseType
#end if
#end if
#if $general.indexCacheAll is not None:
indexCacheAll = $general.indexCacheAll
#end if
## #if $filterSelectedListFileName is not None:
## filterSelectedListFileName = $filterSelectedListFileName
## #end if
#if $candidate_selection_and_scoring.precursorMzTolerance is not None:
precursorMzTolerance = $candidate_selection_and_scoring.precursorMzTolerance
#end if
#eif str($candidate_selection_and_scoring.precursorMzUseAverage) != 'None':
precursorMzUseAverage = $candidate_selection_and_scoring.precursorMzUseAverage
#end if
#if str($candidate_selection_and_scoring.searchAllCharges) != 'None':
searchAllCharges = $candidate_selection_and_scoring.searchAllCharges
#end if
#if $candidate_selection_and_scoring.detectHomologs is not None:
detectHomologs = $candidate_selection_and_scoring.detectHomologs
#end if
#if $candidate_selection_and_scoring.fvalFractionDelta is not None:
fvalFractionDelta = $candidate_selection_and_scoring.fvalFractionDelta
#end if
#if str($candidate_selection_and_scoring.useSp4Scoring) != 'None':
useSp4Scoring = $candidate_selection_and_scoring.useSp4Scoring
#end if
#if str($candidate_selection_and_scoring.fvalUseDotBias) != 'None':
fvalUseDotBias = $candidate_selection_and_scoring.fvalUseDotBias
#end if
#if str($candidate_selection_and_scoring.usePValue) != 'None':
usePValue = $candidate_selection_and_scoring.usePValue
#end if
#if str($candidate_selection_and_scoring.useTierwiseOpenModSearch) != 'None':
useTierwiseOpenModSearch = $candidate_selection_and_scoring.useTierwiseOpenModSearch
#end if
#if $outputExtension is not None:
outputExtension = $outputExtension
#end if
## #if $outputDirectory is not None:
## outputDirectory = $outputDirectory
## #end if
#if $output_and_display.hitListTopHitFvalThreshold is not None:
hitListTopHitFvalThreshold = $output_and_display.hitListTopHitFvalThreshold
#end if
#if $output_and_display.hitListLowerHitsFvalThreshold is not None:
hitListLowerHitsFvalThreshold = $output_and_display.hitListLowerHitsFvalThreshold
#end if
#if str($output_and_display.hitListShowHomologs) != 'None':
hitListShowHomologs = $output_and_display.hitListShowHomologs
#end if
#if $output_and_display.hitListShowMaxRank is not None:
hitListShowMaxRank = $output_and_display.hitListShowMaxRank
#end if
#if str($output_and_display.hitListOnlyTopHit) != 'None':
hitListOnlyTopHit = $output_and_display.hitListOnlyTopHit
#end if
#if str($output_and_display.hitListExcludeNoMatch) != 'None':
hitListExcludeNoMatch = $output_and_display.hitListExcludeNoMatch
#end if
#if $output_and_display.enzymeForPepXMLOutput is not None:
enzymeForPepXMLOutput = $output_and_display.enzymeForPepXMLOutput
#end if
#if str($printFingerprintingSummary) != 'None':
printFingerprintingSummary = $printFingerprintingSummary
#end if
#if $spectrum_filtering.filterMinPeakCount is not None:
filterMinPeakCount = $spectrum_filtering.filterMinPeakCount
#end if
#if $spectrum_filtering.filterAllPeaksBelowMz is not None:
filterAllPeaksBelowMz = $spectrum_filtering.filterAllPeaksBelowMz
#end if
#if $spectrum_filtering.filterMaxIntensityBelow is not None:
filterMaxIntensityBelow = $spectrum_filtering.filterMaxIntensityBelow
#end if
#if $spectrum_filtering.filterMinMzRange is not None:
filterMinMzRange = $spectrum_filtering.filterMinMzRange
#end if
#if $spectrum_filtering.filterCountPeakIntensityThreshold is not None:
filterCountPeakIntensityThreshold = $spectrum_filtering.filterCountPeakIntensityThreshold
#end if
#if $spectrum_processing.filterRemovePeakIntensityThreshold is not None:
filterRemovePeakIntensityThreshold = $spectrum_processing.filterRemovePeakIntensityThreshold
#end if
#if $spectrum_processing.filterMaxPeaksUsed is not None:
filterMaxPeaksUsed = $spectrum_processing.filterMaxPeaksUsed
#end if
#if $spectrum_processing.filterMaxDynamicRange is not None:
filterMaxDynamicRange = $spectrum_processing.filterMaxDynamicRange
#end if
#if $spectrum_processing.peakScalingMzPower is not None:
peakScalingMzPower = $spectrum_processing.peakScalingMzPower
#end if
#if $spectrum_processing.peakScalingIntensityPower is not None:
peakScalingIntensityPower = $spectrum_processing.peakScalingIntensityPower
#end if
#if $spectrum_processing.peakScalingUnassignedPeaks is not None:
peakScalingUnassignedPeaks = $spectrum_processing.peakScalingUnassignedPeaks
#end if
#if str($spectrum_processing.peakNoBinning) != 'None':
peakNoBinning = $spectrum_processing.peakNoBinning
#end if
#if $spectrum_processing.peakBinningNumBinsPerMzUnit is not None:
peakBinningNumBinsPerMzUnit = $spectrum_processing.peakBinningNumBinsPerMzUnit
#end if
#if $spectrum_processing.peakBinningFractionToNeighbor is not None:
peakBinningFractionToNeighbor = $spectrum_processing.peakBinningFractionToNeighbor
#end if
#if $spectrum_processing.filterLibMaxPeaksUsed is not None:
filterLibMaxPeaksUsed = $spectrum_processing.filterLibMaxPeaksUsed
#end if
#if $spectrum_processing.filterLightIonsMzThreshold is not None:
filterLightIonsMzThreshold = $spectrum_processing.filterLightIonsMzThreshold
#end if
#if str($spectrum_processing.filterITRAQReporterPeaks) != 'None':
filterITRAQReporterPeaks = $spectrum_processing.filterITRAQReporterPeaks
#end if
#if str($spectrum_processing.filterTMTReporterPeaks) != 'None':
filterTMTReporterPeaks = $spectrum_processing.filterTMTReporterPeaks
#end if
]]>
        </configfile>
    </configfiles>
    <inputs>
        <conditional name="collection">
            <param name="collection_selector" type="select" label="Process Spectrums Indivdiual or Collection ">
                <option value="dataset" selected="true">Search Single Spectrum file and output results as a dataset</option>
                <option value="collection">Search Multiple Spectrum files and output results as a collection</option>
            </param>
            <when value="dataset">
                <param name="spectrum_file" type="data" format="mzxml,mzml,mgf,msp,mzdata" label="Spectrum file" help="mzxml,mzml,mgf,msp,mzdata"/>
            </when>
            <when value="collection">
                <param name="spectrum_files" multiple="true" type="data" format="mzxml,mzml,mgf,msp,mzdata" label="Spectrum files" help="mzxml,mzml,mgf,msp,mzdata"/>
            </when>
        </conditional>

        <param name="libraryFile" type="data" format="splib" label="Spectral library" help=""/>
        <param name="spectrastParams" type="data" format="txt" optional="true" label="SpectraST param file" help=""/>
        <section name="general" expanded="false" title="General Options">
            <param name="databaseFile"  type="data" format="fasta" label="Fasta sequence database" optional="true"
                help="will not affect the search in any way, but this information will be included in the output for any downstream data processing."/>
            <param name="databaseType"  type="select" label="Database type: Protein or DNA" >
                <option value="AA" selected="true">protein AA</option>
                <option value="DNA">genomic DNA</option>
            </param>
            <param name="indexCacheAll" type="boolean" label="indexCacheAll" help="Cache all entries in RAM. Requires a lot of memory (the library will usually be loaded almost in its entirety), but speeds up search for unsorted queries."/>
        </section>
        <section name="candidate_selection_and_scoring" expanded="false" title="Candidate Selection and Scoring Options">
            <param name="precursorMzTolerance" type="float" value="" optional="true"
                label="precursorMzTolerance"
                help="Specify precursor m/z tolerance in Th. Monoisotopic mass is assumed. Default is 3.0"/>
            <param name="precursorMzUseAverage"  type="select" optional="true" label="precursorMzUseAverage Default: false" >
                <help> Report average mass instead of monoisotopic mass in search results.
                       Precursor m/z window is expanded to account for difference between average and monoisotopic mass.
                </help>
                <option value="false">false</option>
                <option value="true">true</option>
            </param>
            <param name="searchAllCharges"  type="select" optional="true" label="searchAllCharges Default: false" >
                <help> Report average mass instead of monoisotopic mass in search results.
                       Precursor m/z window is expanded to account for difference between average and monoisotopic mass.
                </help>
                <option value="false">false</option>
                <option value="true">true</option>
            </param>
            <param name="detectHomologs" type="integer" value="" optional="true"
                label="detectHomologs"
                help="Detect homologous lower hits up to this rank. Looks for lower hits homologous to the top hit and adjust delta accordingly. Default is 4"/>
            <param name="fvalFractionDelta" type="float" value="" optional="true"
                label="fvalFractionDelta"
                help="Specify the fraction of the normalized delta score (delta/dot) in the F-value formula.  Default is 0.4"/>
            <param name="useSp4Scoring"  type="select" optional="true" label="useSp4Scoring (SP4) Default: false" >
                <help> Use original SpectraST (4.0 or earlier) scoring, based on dot products of square-root intensities.
                </help>
                <option value="false">false</option>
                <option value="true">true</option>
            </param>
            <param name="fvalUseDotBias" type="select" optional="true" label="fvalUseDotBias (Only applicable for SP4 scoring) Default: true" >
                <help> Use dot bias to penalize high-scoring matches with massive noise and/or dominant peak.
                </help>
                <option value="true">true</option>
                <option value="false">false</option>
            </param>
            <param name="usePValue" type="select" optional="true" label="usePValue (NOT applicable for SP4 scoring) Default: false" >
                <help> Use dot bias to penalize high-scoring matches with massive noise and/or dominant peak.
                </help>
                <option value="false">false</option>
                <option value="true">true</option>
            </param>
            <param name="useTierwiseOpenModSearch" type="select" optional="true" label="useTierwiseOpenModSearch (NOT applicable for SP4 scoring) Default: false" >
                <help> Perform tier-wise open modification search for modifications within precursor m/z window specified.
                       Note that the scoring is different from normal SpectraST searches.
                </help>
                <option value="false">false</option>
                <option value="true">true</option>
            </param>
        </section>
        <section name="output_and_display" expanded="false" title="Output and Display Options">
            <param name="hitListTopHitFvalThreshold" type="float" value="" optional="true"
                label="hitListTopHitFvalThreshold"
                help="Minimum F value threshold for the top hit. Only top hits having F value greater than this will be printed.  Default is 0.0 (all top hits)"/>
            <param name="hitListLowerHitsFvalThreshold" type="float" value="" optional="true"
                label="hitListLowerHitsFvalThreshold"
                help="Minimum F value threshold for the lower hits. Only lower hits having F value greater than this will be printed. Default=0.45"/>
            <param name="hitListShowHomologs" type="select" optional="true" label="hitListShowHomologs  Default: true" >
                <help> Always displays homologous lower hits regardless of F value.
                </help>
                <option value="true">true</option>
                <option value="false">false</option>
            </param>
            <param name="hitListShowMaxRank" type="integer" value="" optional="true" min="1"
                label="hitListShowMaxRank"
                help="Maximum rank for hits shown for each query. Default is 1 (Just the top hit)"/>
            <param name="hitListOnlyTopHit" type="select" optional="true" label="hitListOnlyTopHit  Default: true" >
                <help> Only display the top hit for each query.
                </help>
                <option value="true">true</option>
                <option value="false">false</option>
            </param>
            <param name="hitListExcludeNoMatch" type="select" optional="true" label="hitListExcludeNoMatch  Default: true" >
                <help> Do not display queries for which there is no candidate, or the top hit is below the minimum F value threshold specified.
                </help>
                <option value="true">true</option>
                <option value="false">false</option>
            </param>
            <param name="enzymeForPepXMLOutput" type="text" value="trypsin" optional="true" label="enzymeForPepXMLOutput">
                <help>Specify the proteolytic enzyme used, for the purpose of pepXML output.
                </help>
                <option value="trypsin">trypsin</option>
                <option value="lysc">lysc</option>
            </param>
        </section>
        <section name="spectrum_filtering" expanded="false" title="Spectrum Filtering Options">
            <param name="filterMinPeakCount" type="integer" value="" optional="true" min="1"
                label="filterMinPeakCount"
                help="Discard query spectra with fewer than peaks above set threshold.  Default is 10"/>
            <param name="filterAllPeaksBelowMz" type="integer" value="" optional="true" min="1"
                label="filterAllPeaksBelowMz"
                help="Discard query spectra with almost no peaks above specified m/z value. All query spectra with 95%+ of the total intensity below m/z will be removed.  Default is 520"/>
            <param name="filterMaxIntensityBelow" type="integer" value="" optional="true" min="0"
                label="filterMinPeakCount"
                help="Discard query spectra with no peaks with intensity above this.  Default is 0"/>
            <param name="filterMinMzRange" type="integer" value="" optional="true" min="0"
                label="filterMinMzRange"
                help="Discard query spectra with m/z range narrower than this.  Default 350"/>
            <param name="filterCountPeakIntensityThreshold" type="float" value="" optional="true"
                label="filterCountPeakIntensityThreshold"
                help="Minimum peak intensity for peaks to be counted. Only peaks with intensity above this will be counted to meet the requirement for minimum number of peaks.  Default is 2.01"/>
        </section>
        <section name="spectrum_processing" expanded="false" title="Spectrum Processing Options">
            <param name="filterRemovePeakIntensityThreshold" type="float" value="" optional="true"
                label="filterRemovePeakIntensityThreshold"
                help="Noise peak threshold. All peaks with intensities below this will be zeroed.  Default is 2.01"/>
            <param name="filterMaxPeaksUsed" type="integer" value="" optional="true" min="1"
                label="filterMaxPeaksUsed"
                help="Remove all but the top number peaks in query spectra.  Default 150"/>
            <param name="filterMaxDynamicRange" type="integer" value="" optional="true" min="1"
                label="filterMaxDynamicRange"
                help="Remove all peaks smaller than 1/number of the base (highest) peak in query spectra.  Default 1000"/>

            <param name="peakScalingMzPower" type="float" value="" optional="true"
                label="peakScalingMzPower"
                help="Intensity scaling power with respect to the m/z value and the raw intensity. The scaled intensity will be (m/z)^mzpow * (raw intensity)^intpow  Default is 2.01"/>
            <param name="peakScalingIntensityPower" type="float" value="" optional="true"
                label="peakScalingIntensityPower"
                help="Intensity scaling power with respect to the m/z value and the raw intensity. The scaled intensity will be (m/z)^mzpow * (raw intensity)^intpow  Default is 2.01"/>

            <param name="peakScalingUnassignedPeaks" type="float" value="" optional="true"  min="0.0"
                label="peakScalingUnassignedPeaks"
                help="Scaling factor for unassigned peaks in library spectra. Default is 1.0"/>
            <param name="peakNoBinning" type="select" optional="true" label="peakNoBinning  Default: false" >
                <help> Only display the top hit for each query.
                </help>
                <option value="true">true - Disable binning and perform peak-to-peak matching.</option>
                <option value="false">false - Use binning</option>
            </param>
            <param name="peakBinningNumBinsPerMzUnit" type="integer" value="" optional="true" min="1"
                label="peakBinningNumBinsPerMzUnit"
                help="Number of bins per Th. Default 1"/>
            <param name="peakBinningFractionToNeighbor" type="float" value="" optional="true"
                label="peakBinningFractionToNeighbor"
                help="Fraction of the scaled intensity assigned to neighboring bins.  Default is 0.5"/>
            <param name="filterLibMaxPeaksUsed" type="integer" value="" optional="true" min="1"
                label="filterLibMaxPeaksUsed"
                help="Remove all but the top peaks in the LIBRARY spectra.  Default 50"/>
            <param name="filterLightIonsMzThreshold" type="integer" value="" optional="true" min="1"
                label="filterLightIonsMzThreshold"
                help="Remove all light ions with m/z lower than this Th for both library and query spectra. Default 180"/>
            <param name="filterITRAQReporterPeaks" type="select" optional="true" label="filterITRAQReporterPeaks  Default: false" >
                <help> Remove iTRAQ reporter peaks in the range 112-122 Th.
                </help>
                <option value="true">true - Remove iTRAQ reporter peaks</option>
                <option value="false">false - Use binning</option>
            </param>
            <param name="filterTMTReporterPeaks" type="select" optional="true" label="filterTMTReporterPeaks  Default: false" >
                <help> Remove TMT reporter peaks in the range 126-132 Th.
                </help>
                <option value="true">true - Remove TMT reporter peaks</option>
                <option value="false">false</option>
            </param>
        </section>

        <param name="outputExtension" type="select" label="Output format">
            <option value="pep.xml">pep.xml</option>
            <option value="xls">xls (tabular)</option>
            <option value="txt">txt (fixed width text)</option>
        </param>
        <param name="printFingerprintingSummary" type="select" optional="true" label="printFingerprintingSummary Default: no" >
            <help> Print a text file summarizing fingerprinting results.
            </help>
            <option value="fingerprint.summary">yes</option>
        </param>
    </inputs>
    <outputs>
        <data name="log" format="txt" label="spectrast.log" from_work_dir="spectrast.log"/>
        <data name="params" format="txt" label="spectrast.params" from_work_dir="spectrast.params"/>
        <data name="fingerprint_summary" format="txt" label="fingerprint.summary" from_work_dir="fingerprint.summary">
            <filter>printFingerprintingSummary and printFingerprintingSummary == 'fingerprint.summary'</filter>
        </data>
        <data name="output" format="pepxml" label="${tool.name} on ${on_string}: ${outputExtension}">
            <filter>collection['collection_selector'] == 'dataset'</filter>
            <change_format>
                <when input="outputExtension" value="xls" format="tabular" />
                <when input="outputExtension" value="txt" format="txt" />
            </change_format>
            <actions>
                <conditional name="outputExtension">
                   <when value="xls">
                       <action name="column_names" type="metadata" default="Geneid,${','.join([a.name for a in $input_files])}" />
                   </when>
                </conditional>
            </actions>
        </data>
        <collection name="output_pepxml" type="list" label="${tool.name} on ${on_string}: ${outputExtension} collection">
            <filter>collection['collection_selector'] == 'collection' and outputExtension == 'pep.xml'</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+\.pep.xml)" ext="pepxml" directory="out" visible="false" />
        </collection>
        <collection name="output_xls" type="list" label="${tool.name} on ${on_string}: ${outputExtension} collection">
            <filter>collection['collection_selector'] == 'collection' and outputExtension == 'xls'</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+\.xls)" ext="tabular" directory="out" visible="false" />
        </collection>
        <collection name="output_txt" type="list" label="${tool.name} on ${on_string}: ${outputExtension} collection">
            <filter>collection['collection_selector'] == 'collection' and outputExtension == 'txt'</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+\.txt)" ext="txt" directory="out" visible="false" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="spectrum_files" value="msgf-test.mzML" ftype="mzml" />
            <param name="libraryFile" value="splib.html" ftype="splib" >
                <composite_data value="splib/library.splib"/>
                <composite_data value="splib/library.spidx"/>
                <composite_data value="splib/library.pepidx"/>
            </param>
            <param name="outputExtension" value="xls"/>
            <output name="output">
                <assert_contents>
                    <has_text text="FKWNGTDTNSAAEK" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**SpectraST Search**

SpectraST_ can perform spectral searching from the following data formats:

    .mzML format
    .mzXML (all versions) format
    .mzData format
    .mgf (Mascot Generic) format
    .dta (SEQUEST) format, a simple peak list preceded by precursor information
    NIST (National Institute of Standards and Technology)'s .msp format

The spectral library must be in SpectraST's .splib format,
which can be created in SpectraST Create
or imported from .msp format which can be generated by BiblioSpec_.

The results can be outputted to the following formats:
    .pepXML format
    .txt format, a fixed-width column text format
    .xls format, a tab-delimited column text format

.. _SpectraST: http://tools.proteomecenter.org/wiki/index.php?title=Software:SpectraST#SpectraST_Create_Mode
.. _BiblioSpec: https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=default

    ]]></help>
    <expand macro="citations" />
</tool>
author	jjohnson
date	Wed, 25 Jul 2018 15:05:34 -0400
parents	7f02fc51bddf
children