changeset 3:41c0c75301b3 draft

planemo upload for repository commit 59fca11aabb90cd875d93e7da8791a49e1e2c01a-dirty
author galaxyp
date Thu, 25 Jan 2018 15:03:28 -0500
parents d042eabcd6ec
children 597199e75dcc
files flashlfq.xml test-data/._aggregatePSMs_5ppmAroundZero.psmtsv test-data/._sliced-mzml.mzML
diffstat 3 files changed, 58 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/flashlfq.xml	Wed Dec 06 13:46:13 2017 -0500
+++ b/flashlfq.xml	Thu Jan 25 15:03:28 2018 -0500
@@ -5,14 +5,14 @@
         #import re
-        #set $idt_path = $re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1]) + ".psmtsv"
+        #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv"
         ## cp '${idt}' '${idt_path}';
         ln -s '${idt}' '${idt_path}';
         #for $peak_list in $peak_lists:
             #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML"
             ln -s '${peak_list}' '${input_name}';
         #end for
-        FlashLFQ --idt $idt_path --rep `pwd` --ppm $ppm --iso $iso --nis $nis
+        FlashLFQ --idt '$idt_path' --rep `pwd` --ppm $ppm --iso $iso --nis $nis
         #if $intensity == 'integrate':
         --int true
         #end if
@@ -28,7 +28,8 @@
         && cp *_FlashLFQ_QuantifiedProteins.tsv '$quantifiedProteins'
-        <param name="idt" type="data" format="tabular" label="identification file"/>
+        <param name="idt" type="data" format="tabular" label="identification file" 
+             help="MetaMorpheus,Morpheus"/>
         <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/>
         <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/>
         <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/>
@@ -48,15 +49,35 @@
         <data name="log" format="txt" label="${} on ${on_string}: Log" />
-        <data name="quantifiedBaseSequences" format="tabular" label="${} on ${on_string}: QuantifiedBaseSequences.tsv" />
-        <data name="quantifiedModifiedSequences" format="tabular" label="${} on ${on_string}: QuantifiedModifiedSequences.tsv" />
-        <data name="quantifiedPeaks" format="tabular" label="${} on ${on_string}: QuantifiedPeaks.tsv" />
-        <data name="quantifiedProteins" format="tabular" label="${} on ${on_string}: QuantifiedProteins.tsv" />
+        <data name="quantifiedPeaks" format="tabular" label="${} on ${on_string}: QuantifiedPeaks.tsv">
+            <actions>
+                <action name="column_names" type="metadata" 
+                 default="File Name,Base Sequence,Full Sequence,Protein Group,Peptide Monoisotopic Mass,MS2 Retention Time,Precursor Charge,Theoretical MZ,Peak intensity,Peak RT Start,Peak RT Apex,Peak RT End,Peak MZ,Peak Charge,Num Charge States Observed,Peak Detection Type,PSMs Mapped,Base Sequences Mapped,Full Sequences Mapped,Peak Split Valley RT,Peak Apex Mass Error (ppm)"/>
+            </actions>
+        </data>
+        <data name="quantifiedBaseSequences" format="tabular" label="${} on ${on_string}: QuantifiedBaseSequences.tsv">
+            <actions>
+                <action name="column_names" type="metadata" 
+                 default="Sequence,Protein Group,${','.join(['Intensity_' + for i in $peak_lists])},${','.join(['Detection Type_' + for i in $peak_lists])}"/>
+            </actions>
+        </data>
+        <data name="quantifiedModifiedSequences" format="tabular" label="${} on ${on_string}: QuantifiedModifiedSequences.tsv">
+            <actions>
+                <action name="column_names" type="metadata" 
+                 default="Sequence,Protein Group,${','.join(['Intensity_' + for i in $peak_lists])},${','.join(['Detection Type_' + for i in $peak_lists])}"/>
+            </actions>
+        </data>
+        <data name="quantifiedProteins" format="tabular" label="${} on ${on_string}: QuantifiedProteins.tsv">
+            <actions>
+                <action name="column_names" type="metadata" 
+                 default="Protein"/>
+            </actions>
+        </data>
             <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/>
-            <param name="scans" value="sliced-mzml.mzML" ftype="mzml"/>
+            <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/>
             <param name="ppm" value="12"/>
             <param name="iso" value="6"/>
             <output name="log">
@@ -91,6 +112,35 @@
     --nis [int|number of isotopes required to be observed]
+**Tab-Delimited Identification Text File**
+The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line).
+The following headers are required in the list of MS/MS identifications:
+  - **File Name** - File extensions should be tolerated, but no extension is tested more extensively (e.g. use MyFile and not MyFile.mzML)
+  - **Base Sequence** - Should only contain amino acid sequences, or it will likely result in a crash
+  - **Full Sequence** - Modified sequence. Can contain any letters, but must be consistent between the same peptidoform to get accurate results
+  - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass
+  - **Scan Retention Time** - MS/MS identification scan retention time
+  - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification
+  - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary
+  - **QuantifiedProteins.tsv** - Protein intensities are summed here within a run. 
+  - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak.
+  - **QuantifiedModifiedSequences.tsv** - Similar to QuantifiedBaseSequences, but instead of being summed by Base Sequence, peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.
+  - **QuantifiedBaseSequences.tsv** - Peptide intensities are summed here within a run (including differently-modified forms of the same amino acid sequence) and displayed in a convenient format for comparing across runs. The identification type (MS/MS or MBR) is also indicated. A peptide with more than 30% of its intensity coming from ambiguous peak(s) is considered not quantifiable and is given an intensity of -1.
+  - **Log.txt** - Log of the FlashLFQ run. Includes timestamps and quantification time for each file, total analysis time, directories used, and settings.
         <citation type="doi">10.1021/acs.jproteome.7b00608</citation>
Binary file test-data/._aggregatePSMs_5ppmAroundZero.psmtsv has changed
Binary file test-data/._sliced-mzml.mzML has changed