view morpheus.xml @ 6:01a572a3a716 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/morpheus commit 93e02b037bfb37001de2bfb6fc731539c1902e80
author galaxyp
date Thu, 12 Jan 2017 15:50:36 -0500
parents f0202024f80b
children
line wrap: on
line source

<tool id="morpheus" name="Morpheus" version="2.255.0">
    <description>database search algorithm for high-resolution tandem mass spectra</description>
    <macros>
        <xml name="modification_options">
            <option value="acetylation of protein N-terminus">acetylation of protein N-terminus</option>
            <option value="acetylation of lysine">acetylation of lysine</option>
            <option value="phosphorylation of S">phosphorylation of S</option>
            <option value="phosphorylation of T">phosphorylation of T</option>
            <option value="phosphorylation of Y">phosphorylation of Y</option>
            <option value="deamidation of N">deamidation of N</option>
            <option value="deamidation of Q">deamidation of Q</option>
            <option value="pyro-cmC">pyro-cmC</option>
            <option value="pyro-E">pyro-E</option>
            <option value="pyro-Q">pyro-Q</option>
            <option value="TMT zero on peptide N-terminus">TMT zero on peptide N-terminus</option>
            <option value="TMT zero on K">TMT zero on K</option>
            <option value="TMT zero on Y">TMT zero on Y</option>
            <option value="TMT duplex on peptide N-terminus">TMT duplex on peptide N-terminus</option>
            <option value="TMT duplex on K">TMT duplex on K</option>
            <option value="TMT duplex on Y">TMT duplex on Y</option>
            <option value="TMT sixplex/tenplex on peptide N-terminus">TMT sixplex/tenplex on peptide N-terminus</option>
            <option value="TMT sixplex/tenplex on K">TMT sixplex/tenplex on K</option>
            <option value="TMT sixplex/tenplex on Y">TMT sixplex/tenplex on Y</option>
            <option value="iTRAQ 4-plex on peptide N-terminus">iTRAQ 4-plex on peptide N-terminus</option>
            <option value="iTRAQ 4-plex on K">iTRAQ 4-plex on K</option>
            <option value="iTRAQ 4-plex on Y">iTRAQ 4-plex on Y</option>
            <option value="iTRAQ 8-plex on peptide N-terminus">iTRAQ 8-plex on peptide N-terminus</option>
            <option value="iTRAQ 8-plex on K">iTRAQ 8-plex on K</option>
            <option value="iTRAQ 8-plex on Y    ">iTRAQ 8-plex on Y    </option>
        </xml>
        <xml name="variable_modification_options">
            <option value="oxidation of M" selected="true">oxidation of M</option>
            <option value="carbamidomethylation of C">carbamidomethylation of C</option>
            <expand macro="modification_options"/>
        </xml>
        <xml name="fixed_modification_options">
            <option value="carbamidomethylation of C" selected="true">carbamidomethylation of C</option>
            <option value="oxidation of M">oxidation of M</option>
            <expand macro="modification_options"/>
        </xml>
    </macros>

    <requirements>
        <requirement type="package" version="255">morpheus</requirement>
    </requirements>

    <stdio>
        <exit_code range="1:" />
        <regex match="System..*Exception"
           source="both"
           level="fatal"
           description="Error encountered" />
    </stdio>

    <command><![CDATA[
        mkdir -p output_reports &&
        cwd=`pwd` &&
        #import re
        #set $searchdb_name = $searchdb
        #if $searchdb.is_of_type('uniprotxml'): 
          #set $searchdb_name = 'searchdb.xml'
        #else
          #set $searchdb_name = 'searchdb.fasta'
        #end if
        ln -s $searchdb $searchdb_name
        ## Need to link each input to a name in cwd, names must be unique
        #set $input_list = []
        #for i,input in enumerate($inputs):
          #set $input_name = $re.sub('(?i)([.]?mzML)*$','.mzML',$re.sub('\W','_',$input.name))
          #if $input_name in $input_list:
            #set $input_name = str($i) + '_' + $input_name
          #end if
          #set $input_list = $input_list + [$input_name]
          && ln -s $input $input_name
        #end for
        #set $input_names = ','.join($input_list)
        &&

        mono `which morpheus`
        -d='$input_names'
        -db='$searchdb_name'

        #if $searchdb.is_of_type('uniprotxml'): 
          #if str( $advanced.adv_options_selector) == "set":
            $advanced.noup
          #end if
        #end if
        ## fm vm fdr mvmi precmt precmtv precmtu
        #if str($fdr) != '':
            -fdr=$fdr
        #end if
        #if  str($mvmi) != '':
            -mvmi=$mvmi
        #end if
        #if  str($precmtv) != '':
            -precmtv=$precmtv
        #end if
        #if  str($precmtu) != 'None':
            -precmtu=$precmtu
        #end if
        #if str( $advanced.adv_options_selector) == "set":
            #if  str($advanced.precmt) != 'None':
                -precmt=$advanced.precmt
            #end if
            #if  str($advanced.minprecz) != '':
                -minprecz=$advanced.minprecz
            #end if
            #if  str($advanced.maxprecz) != '':
                -maxprecz=$advanced.maxprecz
            #end if
            #if  str($advanced.at) != '':
                -at=$advanced.at
            #end if
            #if  str($advanced.rt) != '':
                -rt=$advanced.rt
            #end if
            #if  str($advanced.mp) != '':
                -mp=$advanced.mp
            #end if
            #if  str($advanced.mmc) != '':
                -mmc=$advanced.mmc
            #end if
            #if  str($advanced.prodmt) != 'None':
                -prodmt=$advanced.prodmt
            #end if
            #if  str($advanced.prodmtv) != '':
                -prodmtv=$advanced.prodmtv
            #end if
            #if  str($advanced.prodmtu) != 'None':
                -prodmtu=$advanced.prodmtu
            #end if
            #if  str($advanced.minpmo) != '':
                -minpmo=$advanced.minpmo
            #end if
            #if  str($advanced.maxpmo) != '':
                -maxpmo=$advanced.maxpmo
            #end if
            #if  str($advanced.imb) != 'None':
                -imb=$advanced.imb
            #end if
            #if  str($advanced.ad) != 'None':
                -ad=$advanced.ad
            #end if
            $advanced.acs $advanced.di $advanced.pmc $advanced.cmu $advanced.mmu
        #end if
        #if str($fm) != 'None':
            #set $fmods = str($fm).replace(',',';')
            -fm="$fmods"
        #end if
        #if str($vm) != 'None':
            #set $vmods = str($vm).replace(',',';')
            -vm="$vmods"
        #end if
        -mt=\${GALAXY_SLOTS:-4}
        #set $out_list = 'log.txt summary.tsv aggregate.PSMs.tsv aggregate.unique_peptides.tsv aggregate.protein_groups.tsv aggregate.mzid *.pep.xml'
        #if len($input_list) == 1:
          && ln -s *.log.txt log.txt
          && ln -s *.mzid aggregate.mzid 
          && ln -s *.unique_peptides.tsv aggregate.unique_peptides.tsv
          && ln -s *.protein_groups.tsv aggregate.protein_groups.tsv
          && ln -s *.PSMs.tsv aggregate.PSMs.tsv
        #end if
          && ( basepath=`grep 'Proteome Database:' log.txt  | sed 's/Proteome Database: \(.*\)${$searchdb_name}/\1/'`;
             for i in $out_list; do cat \$i | sed "s#\${basepath}\##" > output_reports/\$i; done )
    ]]></command>
    <inputs>
        <param name="inputs" type="data" format="mzml" multiple="true" label='Indexed mzML' />
        <param name="searchdb" type="data" format="fasta,uniprotxml" label="MS Protein Search Database: UniProt Xml or Fasta"/>
        <param name="fm" type="select" multiple="true" optional="true" label="Fixed Modifications">
            <expand macro="fixed_modification_options" />
        </param>
        <param name="vm" type="select" multiple="true" optional="true" label="Variable Modifications">
            <expand macro="variable_modification_options" />
        </param>
        <param name="fdr" type="float" value="1" optional="true" min="0.0" max="100.0" label="FDR (Maximum False Discovery Rate percent)" />
        <param name="mvmi" type="integer" value="1024" optional="true" min="0" label="Maximum Variable Modification Isoforms Per Peptide" />
        <param name="precmtv" type="float" value="10." optional="true" label="Precursor Mass Tolerance Value" />
        <param name="precmtu" type="select" optional="true" label="Precursor Mass Tolerance Units">
            <option value="ppm" selected="true">ppm</option>
            <option value="Da">Daltons</option>
        </param>
        <conditional name="advanced">
            <param name="adv_options_selector" type="select" label="Set advanced options?" help="Provides additional controls">
                <option value="set">Set</option>
                <option value="do_not_set" selected="True">Do not set</option>
            </param>
            <when value="set">
                <param name="precmt" type="select" optional="true" label="Precursor Mass Type">
                    <option value="Monoisotopic">Monoisotopic</option>
                    <option value="Average">Average</option>
                </param>
                <param name="noup" type="boolean" truevalue="" falsevalue="-noup=True" checked="True" label="Use G-PTM with Uniprot Proteome Search Databases" />
                <param name="minprecz" type="integer" value="2" optional="true" label="Minimum Unknown Precursor Charge State" />
                <param name="maxprecz" type="integer" value="4" optional="true" label="Maximum Unknown Precursor Charge State" />
                <param name="at" type="float" value="" optional="true" min="0.0" label="Absolute MS/MS Intensity Threshold" />
                <param name="rt" type="float" value="" optional="true" min="0.0" label="Relative MS/MS Intensity Threshold" />
                <param name="mp" type="integer" value="400" optional="true" min="-1" label="Maximum Number of MS/MS Peaks" help="to disable set to: -1"/>
                <param name="acs" type="boolean" truevalue="-acs=True" falsevalue="-acs=False" checked="true" optional="true" label="Assign Charge States" />
                <param name="di" type="boolean" truevalue="-di=True" falsevalue="-di=False" checked="false" optional="true" label="Deisotope" />
                <param name="ad" type="select" optional="true" label="Create Target-Decoy Database On The Fly" 
                       help="Set to No if Search Database ia a fasta that already includes decoys.">
                    <option value="True" selected="true">Yes</option>
                    <option value="False">No</option>
                </param>
                <param name="mmc" type="integer" value="2" optional="true" min="0" max="20" label="Maximum Missed Cleavages" />
                <param name="pmc" type="boolean" truevalue="-pmc=True" falsevalue="-pmc=False" checked="false" optional="true" label="Precursor Monoisotopic Peak Correction" />
                <param name="minpmo" type="integer" value="" optional="true" label="Minimum Precursor Monoisotopic Peak Correction" />
                <param name="maxpmo" type="integer" value="" optional="true" label="Maximum Precursor Monoisotopic Peak Correction" />
                <param name="prodmt" type="select" optional="true" label="Product Mass Type">
                    <option value="Monoisotopic">Monoisotopic</option>
                    <option value="Average">Average</option>
                </param>
                <param name="prodmtv" type="float" value="" optional="true" label="Product Mass Tolerance Value" />
                <param name="prodmtu" type="select" optional="true" label="Product Mass Tolerance Units">
                    <option value="Da">Daltons</option>
                    <option value="ppm">ppm</option>
                </param>
                <param name="imb" type="select" optional="true" label="Initiator Methionine Behavior">
                    <option value="Variable">Variable</option>
                    <option value="Retain">Retain</option>
                    <option value="Cleave">Cleave</option>
                </param>
                <param name="cmu" type="boolean" truevalue="-cmu=True" falsevalue="-cmu=False" checked="false" optional="true" label="Consider Modified Forms as Unique Peptides" />
                <param name="mmu" type="boolean" truevalue="-mmu=True" falsevalue="-mmu=False" checked="false" optional="true" label="Minimize Memory Usage" />
            </when>
            <when value="do_not_set"/>
        </conditional>
    </inputs>
    <outputs>
        <data name="log" format="txt" label="${tool.name} on ${on_string}: log.txt" from_work_dir="output_reports/log.txt" />
        <data name="summary" format="txt" label="${tool.name} on ${on_string}: summary.tsv" from_work_dir="output_reports/summary.tsv" />
        <data name="output_psms" format="tabular" label="${tool.name} on ${on_string}: PSMs.tsv" from_work_dir="output_reports/aggregate.PSMs.tsv" />
        <data name="output_unique_peptides" format="tabular" label="${tool.name} on ${on_string}: unique_peptides.tsv" from_work_dir="output_reports/aggregate.unique_peptides.tsv" />
        <data name="output_protein_groups" format="tabular" label="${tool.name} on ${on_string}: protein_groups.tsv" from_work_dir="output_reports/aggregate.protein_groups.tsv" />
        <data name="aggregate_mzid" format="mzid" label="${tool.name} on ${on_string}: aggregate.mzid" from_work_dir="output_reports/aggregate.mzid" />
        <collection name="morpheus_pep_xmls" type="list" label="${tool.name} on ${on_string} pep.xml">
            <discover_datasets pattern="(?P&lt;name&gt;.*\.pep.xml)" ext="pepxml" visible="false" directory="output_reports" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311-first100entries.fasta" ftype="fasta"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <output name="output_psms">
                <assert_contents>
                    <not_has_text text="K.TTGSSSSSSSK.K" />
                    <has_text text="carbamidomethylation of C" />
                    <has_text text="(oxidation of M)" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311-first100entries.fasta" ftype="fasta"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
            <param name="vm" value="oxidation of M"/>
            <output name="output_psms">
                <assert_contents>
                    <has_text text="K.VETYM(oxidation of M)ETK.I" />
                    <not_has_text text="K.TTGSSSSSSSK.K" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311Condensed-first100entries.xml" ftype="uniprotxml"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
            <param name="vm" value="oxidation of M"/>
            <output name="output_psms">
                <assert_contents>
                    <has_text text="R.KRSLFDS(UniProt: Phosphoserine)AFSSR.A" />
                    <not_has_text text="K.KYFLENKIGTDR.R" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311Condensed-first100entries.xml" ftype="uniprotxml"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
            <param name="vm" value="oxidation of M"/>
            <param name="adv_options_selector" value="set"/>
            <param name="prodmtv" value=".01"/>
            <output name="output_psms">
                <assert_contents>
                    <has_text text="K.KTLKSDGVAGLYR.G" />
                    <not_has_text text="K.RSPSGNISTNSMR.P" />
                </assert_contents>
            </output>
        </test>

    </tests>
    <help><![CDATA[
Morpheus_ is a database search algorithm for high-resolution tandem mass spectra. 

When a Uniprot Proteome XML file is used for the search database, Morpheus will include all known modifications from the proteome in searching for peptide spectral matches.  To fetch Uniprot Proteome XML files see:  http://www.uniprot.org/help/retrieve_sets

**INPUTS**

  - spectral data file in mzML format 
  - protein search database, either a fasta file or a uniprot proteome xml file

**OUTPUTS**

  - summary.tsv
  - input.log.txt
  - input.PSMs.tsv
  - input.unique_peptides.tsv
  - input.protein_groups.tsv
  - input.pep.xml

.. _Morpheus: http://morpheus-ms.sourceforge.net/

    ]]></help>
    <citations>
        <citation type="doi">10.1021/pr301024c</citation>
    </citations>
</tool>