changeset 0:69274b068a48 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pecan commit 21855dec2949420d3bccfd6c9912578e9aacecf0
author jjohnson
date Fri, 25 Aug 2017 14:25:21 -0400
parents
children
files pecan.xml pecan2blib.xml
diffstat 2 files changed, 324 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pecan.xml	Fri Aug 25 14:25:21 2017 -0400
@@ -0,0 +1,286 @@
+<tool id="pecan" name="PECAN" version="0.1.0">
+    <description>detect peptides directly from DIA Mass Spec</description>
+    <requirements>
+        <requirement type="package" version="0.9.9.3">pecan</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <version_command>pecan --VERSION</version_command>
+    <command><![CDATA[
+        ## ln mzML file
+        #if $isolation.isolationsrc == 'entered':
+            #set $isolationfile = $isolationSheme
+        #else
+            #set $isolationfile = $isolation.isolation_windows
+        #end if
+        #set $outputRoot = 'test'
+        awk 'NF>1 && $1 ~ /^[0-9]+(\.[0-9]*)?$/{print \$1; print \$2; print "$outputRoot"}' $isolationfile | 
+        xargs -n3 -P\${GALAXY_SLOTS} 
+        pecan 
+        #if $backgroundProteome:
+            --backgroundProteome='$backgroundProteome'
+        #end if
+            --bgDecoyNumber=$bgDecoyNumber
+        #if $decoyTag and str($decoyTag):
+            --decoyTag=$decoyTag
+        #end if
+        --ionTypes=$--ionTypes
+        --minElution=$minElution
+        --maxElution=$maxElution
+        --overlap=$advanced_options.overlap 
+        #if $advanced_options.ms1.ms1Unit == 'ppm':
+            --ms1Unit=ppm
+            --ms1Tolerance=$advanced_options.ms1.ms1Tolerance
+        #elif $advanced_options.ms1.ms1Unit == 'mz':
+            --ms1Unit=mz
+            --ms1Tolerance=$advanced_options.ms1.ms1Tolerance
+        #end if
+        #if $advanced_options.ms2.ms2Unit == 'ppm':
+            --ms2Unit=ppm
+            --ms2Tolerance=$advanced_options.ms2.ms2Tolerance
+        #elif $advanced_options.ms2.ms2Unit == 'mz':
+            --ms2Unit=mz
+            --ms2Tolerance=$advanced_options.ms2.ms2Tolerance
+        #end if
+        --minCharge=$advanced_options.minCharge
+        --maxCharge=$advanced_options.maxCharge
+        #if $advanced_options.ms2Boundaries.specify == 'specify':
+            --ms2Boundaries="$advanced_options.ms2Boundaries.ms2BoundariesMin,$advanced_options.ms2Boundaries.ms2BoundariesMax"
+        #end if
+        --alpha=$advanced_options.alpha
+        --beta=$advanced_options.beta
+        --idotp=$advanced_options.idotp
+        --topX=$advanced_options.topX
+        #if $advanced_options.fixedMod and str($advanced_options.fixedMod):
+            --fixedMod='$advanced_options.fixedMod'
+        #end if
+        $input $peptides  2>1 > run.log
+        && cat "$outputRoot"*.log > '$log_file'   
+        && cat "$outputRoot"*.td.feature | awk 'NR==1{print $0};!/peakCalibratedScore/{print $0}' > '$feature_file'
+    ]]></command>
+    
+    <configfiles>
+        <configfile name="isolationSheme">#slurp
+#import re
+#if $isolation.isolationsrc == 'entered':
+#echo $re.sub(r'(\d+[.]\d+)\s+(\d+[.]\d)\s*',r'\1\t\2\n',$isolation.isolation_windows.strip())
+#end if
+  #slurp</configfile>
+    </configfiles>
+    <inputs>
+        <param name="input" type="data" format="mzml" label="centroided mzML file"/>
+        <param name="sciex" type="boolean" truevalue="--sciex" falsevalue="" checked="false" 
+            label="mzML files were generated from AB SCIEX MS Data Converter"/>
+        <param name="peptides" type="data" format="tabular" multiple="true" label="target peptides"/>
+
+        <conditional name="isolation">
+            <param name="isolationsrc" type="select" label="isolation windows">
+                <option value="entered">Enter isolation windows</option>  
+                <option value="history">Read isolation windows from a history dataset</option>  
+            </param>
+            <when value="entered">
+                <param name="isolation_windows" type="text" area="true" size="20x20" value="" label="isolation windows">
+                    <validator type="regex" message="enter pairs of window boudaries, e.g.:400.0 420.0">^(?ms)(\d+[.]\d+\s\d+[.]\d)(\s\d+[.]\d+\s\d+[.]\d)*$</validator>
+                </param>
+            </when>
+            <when value="history">
+                <param name="isolation_windows" type="data" format="tabular" label="isolation windows"/>
+            </when>
+        </conditional>
+
+        <!-- species - default proteomes for (ecoli/yeast/mouse/human) -->
+        <param name="backgroundProteome" type="data" format="tabular" optional="true" label="BackgroundProteome" 
+            help="Peptide list file name of background proteome. Recommended if querying for a small number of peptides."/>
+        <param name="bgDecoyNumber" type="integer" value="2000" min="0" label="Number of decoys used for background estimation" 
+            optional="True" help="(-n) Default= 2000" />
+        <param name="decoyTag" type="text" value="decoy" optional="true" label="Tag for decoy sequences in the database"/>
+
+        <param name="ionTypes" type="select" label="Fragment ion types" help="suggested HCD: y, reCID: by">
+            <option value="by">by</option>
+            <option value="y">y</option>
+            <option value="b">b</option>
+        </param>
+        
+        <param name="minElution" type="integer" value="12" min="1" label="Minimum time in seconds a peptide is expected to elute" 
+            optional="True" help="(-m) Default= 12 seconds" />
+        <param name="maxElution" type="integer" value="30" min="1" label="Maximum time in seconds a peptide is expected to elute" 
+            optional="True" help="(-x) Default= 30 seconds" />
+
+        <section name="advanced_options" expanded="false" title="Advanced Options">
+            <param name="overlap" type="integer" value="0" min="0" max="100" label="Overlap percentage of isolation window"/>
+            <conditional name="ms1">
+                <param name="ms1Unit" type="select" label="Unit for precursor ion extraction tolerance">
+                    <option value="ppm">ppm</option>
+                    <option value="mz">mz</option>
+                </param>
+                <when value="ppm">
+                    <param name="ms1Tolerance" type="integer" value="10" min="0" label="ms1Tolerance" 
+                        help="Mass error for precursor ion extracting"/>
+                </when>
+                <when value="mz">
+                    <param name="ms1Tolerance" type="float" value="10" min="0" label="ms1Tolerance" 
+                        help="Mass error for precursor ion extracting"/>
+                </when>
+            </conditional>
+            <conditional name="ms2">
+                <param name="ms2Unit" type="select" label="Unit for fragment ion extraction tolerance">
+                    <option value="ppm">ppm</option>
+                    <option value="mz">mz</option>
+                </param>
+                <when value="ppm">
+                    <param name="ms2Tolerance" type="integer" value="10" min="0" label="ms2Tolerance" 
+                        help="Mass error for precursor ion extracting"/>
+                </when>
+                <when value="mz">
+                    <param name="ms2Tolerance" type="float" value="10" min="0" label="ms2Tolerance" 
+                        help="Mass error for precursor ion extracting"/>
+                </when>
+            </conditional>
+            <param name="minCharge" type="integer" value="2" min="1" label="Minimum charge state searched"/>
+            <param name="maxCharge" type="integer" value="3" min="1" label="Maximum charge state searched"/>
+            <conditional name="ms2Boundaries">
+                <param name="specify" type="select" label="Unit for fragment ion extraction tolerance">
+                    <option value="calculate">Let PECAN calculate MS2 Boundaries</option>
+                    <option value="specify">Specify MS2 Boundaries</option>
+                </param>
+                <when value="calculate"/>
+                <when value="specify">
+                    <param name="ms2BoundariesMin"  type="float" value="" label="Specify the MS2 scan lower m/z limit for the target window"/>
+                    <param name="ms2BoundariesMax"  type="float" value="" label="Specify the MS2 scan upper m/z limit for the target window"/>
+                </when>
+            </conditional>
+            <param name="alpha" type="float" value="1.8" min="0.0" label="Alpha hyperparameter to set score threshold for cIons" />
+            <param name="beta" type="float" value="0.4" min="0.0" max="1.0" label="Beta hyperparameter to set score threshold for cIons" />
+            <param name="idotp" type="float" value="0.0" min="0.0" label="MS1 idotp threshold used for peak picking" />
+            <param name="topX" type="integer" value="1" min="1" label="Reporting top x peaks per charged peptide"/>
+            
+            <param name="fixedMod" type="text" value="" optional="true" label="Fixed modifications in the sample">
+                <help><![CDATA[
+                    Fixed modifications in the sample. For example, default C[+57.021] will treat every Cys
+                    in the query sequences and background proteome as C[+57.021] while overwriting any
+                    specific modifications on individual Cys
+                    If multiple fixed modifications are desired use "," to separate each fixedMod tag
+                    (e.g. C[+57.021],M[+15.995])
+                ]]></help>
+                <validator type="regex" message="C[+57.021],M[+15.995]">^([A-Z]\[[+-]\d+[.]\d+\](,[A-Z]\[[+-]\d+[.]\d+\])*)*$</validator>
+                <sanitizer sanitize="False"/>
+            </param>
+        </section>
+        <!--
+        verbosity#Verbosity level for file logging (WARNING/INFO/DEBUG) Default=INFO
+        -->
+    </inputs>
+    <outputs>
+        <data name="log_file" format="txt" label="" />
+        <data name="feature_file" format="percin" label="" />
+    </outputs>
+    <help><![CDATA[
+**PECAN**
+
+PECAN (PEptide-Centric ANalysis) is a tool for peptide detection directly from DIA data without the need of a spectral library. PECAN takes a list of peptide sequences and query each peptide against the DIA data (in centroid mzML format) and reports the best evidence of detection for each peptide. The PECAN report is designed for Percolator to separate correct from incorrect matches with false discovery rate (FDR) control.
+
+usage: pecan [-h] [-d DECOYTAG] [--ms1Unit MS1UNIT] [--ms2Unit MS2UNIT]
+             [-p MS1TOLERANCE] [-q MS2TOLERANCE] [-m MINELUTION]
+             [-x MAXELUTION] [-b BACKGROUNDPROTEOME] [-n BGDECOYNUMBER]
+             [-i IDOTP] [-v VERBOSITY] [-t TOPX] [--minCharge MINCHARGE]
+             [--maxCharge MAXCHARGE] [-s IONTYPES] [--overlap OVERLAP]
+             [--SCIEX] [--VERSION] [--ms2Boundaries MS2BOUNDARIES]
+             [--alpha ALPHA] [--beta BETA] [--fixedMod FIXEDMOD]
+             mzMLFileName peptideListFileName isolationStart isolationEnd
+             outputRoot
+
+        
+
+positional arguments:
+  mzMLFileName          path to the centroid .mzML file
+  peptideListFileName   path to the query peptide list
+  isolationStart        precursor m/z of isolation start
+  isolationEnd          precursor m/z of isolation end
+  outputRoot            name base of the output files
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -d DECOYTAG, --decoyTag DECOYTAG
+                        Tag for decoy sequences in the database
+                        Default= decoy
+                        
+  --ms1Unit MS1UNIT     Unit (ppm/mz) for precursor ion extraction tolerance
+                        Used with -p
+                        Default=ppm 
+                        
+  --ms2Unit MS2UNIT     Unit (ppm/mz) for fragment ion extraction tolerance
+                        Used with -q
+                        Default=ppm 
+                        
+  -p MS1TOLERANCE, --ms1Tolerance MS1TOLERANCE
+                        Mass error for precursor ion extracting
+                        Default=10 
+                        
+  -q MS2TOLERANCE, --ms2Tolerance MS2TOLERANCE
+                        Mass error for fragment ion extracting
+                        Default=10 
+                        
+  -m MINELUTION, --minElution MINELUTION
+                        Minimum time in seconds a peptide is expected to elute
+                        Default= 12 seconds
+                        
+  -x MAXELUTION, --maxElution MAXELUTION
+                        Maximum time in seconds a peptide is expected to elute
+                        Default= 30 seconds
+                        
+  -b BACKGROUNDPROTEOME, --backgroundProteome BACKGROUNDPROTEOME
+                        Peptide list file name of background proteome
+                        Recommended if querying for a small number of peptides
+                        Default=None
+                        
+  -n BGDECOYNUMBER, --bgDecoyNumber BGDECOYNUMBER
+                        Number of decoys used for background estimation
+                        Default=2000
+                        
+  -i IDOTP, --idotp IDOTP
+                        MS1 idotp threshold used for peak picking
+                        Default=0.000000
+                        
+  -v VERBOSITY, --verbosity VERBOSITY
+                        Verbosity level for file logging (WARNING/INFO/DEBUG)
+                        Default=INFO
+                        
+  -t TOPX, --topX TOPX  Reporting top x peaks per charged peptide
+                        Default=1
+                        
+  --minCharge MINCHARGE
+                        Minimum charge state searched
+                        Default=2
+                        
+  --maxCharge MAXCHARGE
+                        Maximum charge state searched
+                        Default=3
+                        
+  -s IONTYPES, --ionTypes IONTYPES
+                        == To be added == Fragment ion types (b, y, or by)
+                        Default=by
+                        
+  --overlap OVERLAP     Overlap percentage of isolation window
+                        Default=0 
+                        
+  --SCIEX               Flag if mzML is generated from AB Sciex converter
+  --VERSION             Show current PECAN version
+                        
+  --ms2Boundaries MS2BOUNDARIES
+                        Specify the MS2 scan lower and upper m/z limit such as 200,2000 for the target window
+                        If not specify pecan will determine it with a function
+                        
+  --alpha ALPHA         Alpha hyperparameter to set score threshold for cIons
+                        
+  --beta BETA           Beta hyperparameter to set cutoff for number of cIons
+                        
+  --fixedMod FIXEDMOD   Fixed modifications in the sample. For example, default C[+57.021] will treat every Cys
+                        in the query sequences and background proteome as C[+57.021] while overwriting any
+                        specific modifications on individual Cys
+                        If multiple fixed modifications are desired use "," to separate each fixedMod tag
+                        (e.g. C[+57.021],M[+15.995])
+                        
+
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pecan2blib.xml	Fri Aug 25 14:25:21 2017 -0400
@@ -0,0 +1,38 @@
+<tool id="pecan2blib" name="pecan2blib" version="0.1.0">
+    <description>Convert PECAN results after percolator (.peptides) into spectral library (.blib)</description>
+    <requirements>
+        <requirement type="package" version="0.9.9.3">pecan</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        pecan2blib -o pecan_spectral_lib --ionTypes=$ionTypes -q $qValue $toMinutes 
+        #if $extension and str($extension):
+            -e $extension
+        #end if
+        -l $percolator_results
+        && cat "pecan_spectral_lib"*.blib > '$spectral_lib'
+    ]]></command>
+    <inputs>
+        <param name="percolator_results" type="data" format="percout" multiple="true" label="percolator results"/>
+        <param name="ionTypes" type="select" label="Fragment ion types" help="suggested HCD: y, reCID: by">
+            <option value="by">by</option>
+            <option value="y">y</option>
+            <option value="b">b</option>
+        </param>
+        <param name="qValue" type="float" value="0.01" label="q-value threshold"/>
+        <param name="toMinutes" type="boolean" truevalue="--toMinutes" falsevalue="" checked="false" 
+           label="Convert retention time from seconds to minutes"
+           help="Only needed when mzML files recorded retention time in seconds"/>
+        <param name="extension" type="text" value="" optional="true" label="raw file extension for skyline"
+            help="original file extension (e.g. raw/wiff) for proper Skyline views"/>
+
+    </inputs>
+    <outputs>
+        <data name="spectral_lib" format="sqlite" />
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+    ]]></help>
+</tool>