diff flashlfq.xml @ 9:944d155c01c9 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 047addefbac7468a92ba4fc04899bd5136c58ea4
author galaxyp
date Wed, 30 Jan 2019 05:38:43 -0500
parents 69cd4ab3145b
children a21452a5c952
line wrap: on
line diff
--- a/flashlfq.xml	Fri Jan 26 09:07:49 2018 -0500
+++ b/flashlfq.xml	Wed Jan 30 05:38:43 2019 -0500
@@ -1,7 +1,7 @@
-<tool id="flashlfq" name="FlashLFQ" version="0.1.100">
+<tool id="flashlfq" name="FlashLFQ" version="0.1.111">
     <description>ultrafast label-free quantification for mass-spectrometry proteomics</description>
     <requirements>
-        <requirement type="package" version="0.1.100">flashlfq</requirement>
+        <requirement type="package" version="0.1.111">flashlfq</requirement>
     </requirements>
     <command><![CDATA[
         #import re
@@ -12,31 +12,36 @@
             #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML"
             ln -s '${peak_list}' 'spectrum_dir/${input_name}' &&
         #end for
-
+        #if $normalize.nor == 'true':
+          #set $input_name = $re.sub('[.][^.]*$','',$experimental_design.display_name.split('/')[-1]) + ".tsv"
+          ln -s '${experimental_design}' 'spectrum_dir/${input_name}' &&
+        #end if
         FlashLFQ 
         --idt '$idt_path'
-        --rep spectrum_dir
+        --rep "./spectrum_dir"
         --ppm $ppm
         --iso $iso
         --nis $nis
+        #if $normalize.nor == 'true':
+        --nor true
+        #end if
         #if $intensity == 'integrate':
             --int true
         #end if
         #if $charge == 'precursor':
             --chg true
         #end if
-        $rmm $mbr
-        --pau false
-        && cat *_FlashLFQ_Log.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log' 
-        && cp *_FlashLFQ_QuantifiedBaseSequences.tsv '$quantifiedBaseSequences'
-        && cp *_FlashLFQ_QuantifiedModifiedSequences.tsv '$quantifiedModifiedSequences'
+        $rmm $mbr $pro
+        --out . > logfile.txt
+        && cat logfile.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log'
+        && cp *_FlashLFQ_QuantifiedProteins.tsv '$quantifiedProteins'
+        && cp *_FlashLFQ_QuantifiedPeptides.tsv '$quantifiedPeptides'
         && cp *_FlashLFQ_QuantifiedPeaks.tsv '$quantifiedPeaks'
-        ## create issue for FlashLFQ to name column headers correctly
-        && grep -v '^test$' *_FlashLFQ_QuantifiedProteins.tsv > '$quantifiedProteins'
     ]]></command>
+
     <inputs>
-        <param name="idt" type="data" format="tabular" label="identification file" 
-             help="MetaMorpheus,Morpheus"/>
+        <param name="idt" type="data" format="tabular" label="identification file"
+             help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/>
         <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/>
         <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/>
         <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/>
@@ -49,37 +54,31 @@
             <option value="all" selected="true">use all identification detected charge states</option>
             <option value="precursor">use precursor charge</option>
         </param>
-        <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true" 
+        <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true"
             label="require observed monoisotopic mass peak"/>
-        <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false" 
+        <conditional name="normalize">
+            <param name="nor" type="select" label="normalize intensity results">
+                <option value="false">No</option>
+                <option value="true">Yes</option>
+            </param>
+            <when value="false"/>
+            <when value="true">
+                <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/>
+            </when>
+        </conditional>
+        <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false"
             label="match between runs"/>
+        <param name="mrt" type="float" value="1.5" min="0" max="30" label="maximum MBR window in minutes"/>
+        <param name="pro" type="boolean" truevalue="--pro true" falsevalue="--pro false" checked="false"
+            label="advanced protein quantification"/>
     </inputs>
+
     <outputs>
-        <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" />
-        <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv">
-            <actions>
-                <action name="column_names" type="metadata" 
-                 default="File Name,Base Sequence,Full Sequence,Protein Group,Peptide Monoisotopic Mass,MS2 Retention Time,Precursor Charge,Theoretical MZ,Peak intensity,Peak RT Start,Peak RT Apex,Peak RT End,Peak MZ,Peak Charge,Num Charge States Observed,Peak Detection Type,PSMs Mapped,Base Sequences Mapped,Full Sequences Mapped,Peak Split Valley RT,Peak Apex Mass Error (ppm)"/>
-            </actions>
-        </data>
-        <data name="quantifiedBaseSequences" format="tabular" label="${tool.name} on ${on_string}: QuantifiedBaseSequences.tsv">
-            <actions>
-                <action name="column_names" type="metadata" 
-                 default="Sequence,Protein Group,${','.join(['Intensity_' + i.name for i in $peak_lists])},${','.join(['Detection Type_' + i.name for i in $peak_lists])}"/>
-            </actions>
-        </data>
-        <data name="quantifiedModifiedSequences" format="tabular" label="${tool.name} on ${on_string}: QuantifiedModifiedSequences.tsv">
-            <actions>
-                <action name="column_names" type="metadata" 
-                 default="Sequence,Protein Group,${','.join(['Intensity_' + i.name for i in $peak_lists])},${','.join(['Detection Type_' + i.name for i in $peak_lists])}"/>
-            </actions>
-        </data>
-        <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv">
-            <actions>
-                <action name="column_names" type="metadata" 
-                 default="Protein,${','.join([i.name for i in $peak_lists])}"/>
-            </actions>
-        </data>
+       <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" />
+        <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" />
+        <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" />
+        <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" />
+
     </outputs>
     <tests>
         <test>
@@ -87,10 +86,9 @@
             <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/>
             <param name="ppm" value="12"/>
             <param name="iso" value="6"/>
-            <output name="log">
+            <output name="quantifiedPeaks">
                 <assert_contents>
-                    <has_text text="ppmTolerance = 12" />
-                    <has_text text="isotopePpmTolerance = 6" />
+                    <has_text text="EGFQVADGPLYR" />
                 </assert_contents>
             </output>
         </test>
@@ -136,17 +134,15 @@
 
 **Outputs**:
 
-  - **QuantifiedProteins.tsv** - Protein intensities are summed here within a run. 
+  - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein.
 
   - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak.
 
-  - **QuantifiedModifiedSequences.tsv** - Similar to QuantifiedBaseSequences, but instead of being summed by Base Sequence, peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.
+  - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.
 
-  - **QuantifiedBaseSequences.tsv** - Peptide intensities are summed here within a run (including differently-modified forms of the same amino acid sequence) and displayed in a convenient format for comparing across runs. The identification type (MS/MS or MBR) is also indicated. A peptide with more than 30% of its intensity coming from ambiguous peak(s) is considered not quantifiable and is given an intensity of -1.
+  - **Log.txt** - Log of the FlashLFQ run. 
 
 
-  - **Log.txt** - Log of the FlashLFQ run. Includes timestamps and quantification time for each file, total analysis time, directories used, and settings.
-
 
     ]]></help>
     <citations>