Mercurial > repos > galaxyp > idpassemble

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/idpassemble.xml	Fri Oct 06 13:56:47 2017 -0400
@@ -0,0 +1,121 @@
+<?xml version="1.0"?>
+<tool id="idpassemble" name="idpAssemble" version="@VERSION@.0">
+    <description>Merge IDPicker databases from single files into a merged database, and filters the result at PSM/spectrum/peptide/protein/gene levels.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Job Failed" />
+        <regex match="^Error:.*$" source="both" level="fatal" />
+    </stdio>
+    <command>
+<![CDATA[
+        #if len($input) < 2
+        cp '${input}' output &&
+        #end if
+
+        idpAssemble
+            -MaxFDRScore $MaxFDRScore
+            -MinDistinctPeptides $filter_at_gene_level_condition.MinDistinctPeptides
+            -MinSpectra $filter_at_gene_level_condition.MinSpectra
+            -MinAdditionalPeptides $filter_at_gene_level_condition.MinAdditionalPeptides
+            -MinSpectraPerDistinctMatch $MinSpectraPerDistinctMatch
+            -MinSpectraPerDistinctPeptide $MinSpectraPerDistinctPeptide
+            -MaxProteinGroupsPerPeptide $MaxProteinGroupsPerPeptide
+            #if $filter_at_gene_level_condition.FilterAtGeneLevel
+            -FilterAtGeneLevel 1
+            #end if
+            -SummarizeSources 1
+            #if len($input) > 1
+                -MergedOutputFilepath output
+                #for $i in $input
+                    '${i.file_name}'
+                #end for
+            #else
+                output
+            #end if
+]]>
+    </command>
+    <inputs>
+        <param name="input" type="data" format="idpdb" label="Input idpDB(s)" multiple="true"/>
+        <param argument="-MaxFDRScore" type="float" label="Max FDR Score" min="0.00000001" value="0.05" help="Peptide-spectrum-matches (PSMs) with an FDR score (interpolated Q-value) higher than this will be excluded from the filtered data set." />
+        <conditional name="filter_at_gene_level_condition">
+            <param argument="-FilterAtGeneLevel" type="boolean" truevalue="1" falsevalue="0" label="Filter at Gene Level" help="Apply filters at the gene level (i.e. 'min distinct peptides per gene group' instead of 'min distinct peptides per protein group')"/>
+            <when value="1">
+                <param argument="-MinDistinctPeptides" type="integer" label="Min Distinct Peptides per Gene Group" min="1" value="2" help="Gene groups with fewer than this number of peptides will be excluded from the filtered data set." />
+                <param argument="-MinSpectra" type="integer" label="Min Filtered Spectra per Gene Group" min="1" value="2" help="Gene groups with fewer than this number of spectra will be excluded from the filtered data set." />
+                <param argument="-MinAdditionalPeptides" type="integer" label="Min Additional Peptides per Gene Group" min="0" value="1" help="Gene groups that are not necessary to explain the presence of at least this many extra peptides will be from the filtered data set. A value of 1 means that each gene group must explain at least 1 peptide that other gene groups do not explain." />
+            </when>
+            <when value="0">
+                <param argument="-MinDistinctPeptides" type="integer" label="Min Distinct Peptides per Protein Group" min="1" value="2" help="Protein groups with fewer than this number of peptides will be excluded from the filtered data set." />
+                <param argument="-MinSpectra" type="integer" label="Min Filtered Spectra per Protein Group" min="1" value="2" help="Protein groups with fewer than this number of spectra will be excluded from the filtered data set." />
+                <param argument="-MinAdditionalPeptides" type="integer" label="Min Additional Peptides per Protein Group" min="0" value="1" help="Protein groups that are not necessary to explain the presence of at least this many extra peptides will be from the filtered data set. A value of 1 means that each protein group must explain at least 1 peptide that other protein groups do not explain." />
+            </when>
+        </conditional>
+        <param argument="-MinSpectraPerDistinctMatch" type="integer" label="Min Filtered Spectra per Distinct Match" min="1" value="1" help="Distinct matches with fewer than this number of spectra will be excluded from the filtered data set." />
+        <param argument="-MinSpectraPerDistinctPeptide" type="integer" label="Min Filtered Spectra per Distinct Peptide" min="1" value="1" help="Distinct peptides with fewer than this number of spectra will be excluded from the filtered data set." />
+        <param argument="-MaxProteinGroupsPerPeptide" type="integer" label="Max Protein Groups per Distinct Peptide" min="0" value="10" help="Peptides that map to more than this number of protein groups will be excluded from the filtered data set. Highly ambiguous peptides are not very useful for quantitation." />
+    </inputs>
+    <outputs>
+        <data format="idpdb" name="output" from_work_dir="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="201208-378803-mm.idpDB" />
+            <param name="MaxFDRScore" value="0.05" />
+            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
+            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
+            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
+            <param name="MinSpectraPerDistinctMatch" value="1" />
+            <param name="MinSpectraPerDistinctPeptide" value="1" />
+            <param name="MaxProteinGroupsPerPeptide" value="10" />
+            <output name="output" file="201208-378803-mm-filtered.idpDB" compare="sim_size" delta="500000" />
+        </test>
+        <test>
+            <param name="input" value="201208-378803-msgf.idpDB" />
+            <param name="MaxFDRScore" value="0.05" />
+            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
+            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
+            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
+            <param name="MinSpectraPerDistinctMatch" value="1" />
+            <param name="MinSpectraPerDistinctPeptide" value="1" />
+            <param name="MaxProteinGroupsPerPeptide" value="10" />
+            <output name="output" file="201208-378803-msgf-filtered.idpDB" compare="sim_size" delta="500000" />
+        </test>
+        <test>
+            <param name="input" value="201208-378803-cm.idpDB" />
+            <param name="MaxFDRScore" value="0.05" />
+            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
+            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
+            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
+            <param name="MinSpectraPerDistinctMatch" value="1" />
+            <param name="MinSpectraPerDistinctPeptide" value="1" />
+            <param name="MaxProteinGroupsPerPeptide" value="10" />
+            <output name="output" file="201208-378803-cm-filtered.idpDB" compare="sim_size" delta="500000" />
+        </test>
+        <test>
+            <param name="input" value="201208-378803-mm.idpDB,201208-378803-msgf.idpDB,201208-378803-cm.idpDB" />
+            <param name="MaxFDRScore" value="0.05" />
+            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
+            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
+            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
+            <param name="MinSpectraPerDistinctMatch" value="1" />
+            <param name="MinSpectraPerDistinctPeptide" value="1" />
+            <param name="MaxProteinGroupsPerPeptide" value="10" />
+            <output name="output" file="201208-378803.idpDB" compare="sim_size" delta="500000" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Merges and filters one or more IDPicker 3 idpDB files into a combined idpDB file. Protein assembly (e.g. parsimony) is conducted on the combined set of proteins.
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1021/pr900360j</citation>
+        <citation type="bibtex">@misc{toolsGalaxyP, author = {Chilton, J, Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
+          year = {2015}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
+    </citations>
+</tool>
--- a/idpquery.xml	Fri Oct 06 13:55:15 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-<?xml version="1.0"?>
-<tool id="idpquery" name="idpQuery" version="@VERSION@.0">
-    <description>Creates text reports from idpDB files.</description>
-    <macros>
-        <import>macros.xml</import>
-        <xml name="proteinGroupByColumns">
-            <param name="report_columns" type="select" label="Select report columns" display="checkboxes" multiple="true">
-                <option value="Accession" selected="true">Accession</option>
-                <option value="GeneId">Gene Id</option>
-                <option value="GeneGroup">Gene Group</option>
-                <option value="DistinctPeptides">Distinct Peptides</option>
-                <option value="DistinctMatches">Distinct Matches</option>
-                <option value="FilteredSpectra">Filtered Spectra</option>
-                <option value="IsDecoy">Is Decoy</option>
-                <option value="Cluster">Cluster</option>
-                <option value="ProteinGroup">Protein Group</option>
-                <option value="Length">Length</option>
-                <option value="PercentCoverage">Percent Coverage</option>
-                <option value="Sequence">Sequence</option>
-                <option value="Description">Description</option>
-                <option value="TaxonomyId">Taxonomy Id</option>
-                <option value="GeneName">Gene Name</option>
-                <option value="GeneFamily">Gene Family</option>
-                <option value="Chromosome">Chromosome</option>
-                <option value="GeneDescription">Gene Description</option>
-                <option value="PrecursorIntensity">Precursor Intensity</option>
-                <option value="PrecursorArea">Precursor Area</option>
-                <option value="PrecursorBestSNR">Precursor Best SNR</option>
-                <option value="PrecursorMeanSNR">Precursor Mean SNR</option>
-                <option value="iTRAQ4plex">iTRAQ 4-plex</option>
-                <option value="iTRAQ8plex">iTRAQ 8-plex</option>
-                <option value="TMT2plex">TMT 2-plex</option>
-                <option value="TMT6plex">TMT 6-plex</option>
-                <option value="TMT10plex">TMT 10-plex</option>
-                <option value="PivotMatchesByGroup">Pivot Matches By Group</option>
-                <option value="PivotMatchesBySource">Pivot Matches By Source</option>
-                <option value="PivotPeptidesByGroup">Pivot Peptides By Group</option>
-                <option value="PivotPeptidesBySource">Pivot Peptides By Source</option>
-                <option value="PivotSpectraByGroup">Pivot Spectra By Group</option>
-                <option value="PivotSpectraBySource">Pivot Spectra By Source</option>
-                <option value="PivotPrecursorIntensityByGroup">Pivot Precursor Intensity By Group</option>
-                <option value="PivotPrecursorIntensityBySource">Pivot Precursor Intensity By Source</option>
-                <option value="PivotPrecursorAreaByGroup">Pivot Precursor Area By Group</option>
-                <option value="PivotPrecursorAreaBySource">Pivot Precursor Area By Source</option>
-                <option value="PivotPrecursorBestSNRByGroup">Pivot Precursor Best SNR By Group</option>
-                <option value="PivotPrecursorBestSNRBySource">Pivot Precursor Best SNR By Source</option>
-                <option value="PivotPrecursorMeanSNRByGroup">Pivot Precursor Mean SNR By Group</option>
-                <option value="PivotPrecursorMeanSNRBySource">Pivot Precursor Mean SNR By Source</option>
-                <option value="PivotITRAQByGroup">Pivot iTRAQ By Group</option>
-                <option value="PivotITRAQBySource">Pivot iTRAQ By Source</option>
-                <option value="PivotTMTByGroup">Pivot TMT By Group</option>
-                <option value="PivotTMTBySource">Pivot TMT By Source</option>
-                <option value="PeptideGroups">Peptide Groups</option>
-                <option value="PeptideSequences">Peptide Sequences</option>
-            </param>
-        </xml>
-    </macros>
-    <expand macro="requirements" />
-    <stdio>
-        <exit_code range="1:" level="fatal" description="Job Failed" />
-        <regex match="^Error:.*$" source="both" level="fatal" />
-    </stdio>
-    <command>
-<![CDATA[
-        #set $input_name = $input.display_name
-        #set $output_name = $input_name.split(".")[0] + ".tsv"
-        ln -s '$input' '${input_name}' &&
-
-        idpQuery $group_by.group_by_value $group_by.report_columns '${input_name}' &&
-        mv '$output_name' output
-]]>
-    </command>
-    <inputs>
-        <param name="input" type="data" format="idpdb" label="Input idpDB" multiple="false" />
-        <conditional name="group_by">
-            <param name="group_by_value" type="select" label="What proteomic entity do you want to group by?" help="Only protein- and gene-centric entities are currently supported.">
-                <option value="Protein" selected="true">Protein</option>
-                <option value="ProteinGroup">Protein Group</option>
-                <option value="Gene">Gene</option>
-                <option value="GeneGroup">Gene Group</option>
-            </param>
-            <when value="Protein">
-                <expand macro="proteinGroupByColumns" />
-            </when>
-            <when value="ProteinGroup">
-                <expand macro="proteinGroupByColumns" />
-            </when>
-            <when value="Gene">
-                <expand macro="proteinGroupByColumns" />
-            </when>
-            <when value="GeneGroup">
-                <expand macro="proteinGroupByColumns" />
-            </when>
-        </conditional>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="output" from_work_dir="output" />
-    </outputs>
-    <tests>
-        <test>
-          <param name="input" value="201203-624176-12-mm-gui-test.idpDB" />
-          <param name="group_by_value" value="Protein" />
-          <param name="report_columns" value="Accession,FilteredSpectra,Description" />
-          <output name="output" file="201203-624176-12-mm-gui-test-Protein-Accession,FilteredSpectra,Description.tsv" />
-        </test>
-        <test>
-          <param name="input" value="201203-624176-12-mm-gui-test.idpDB" />
-          <param name="group_by_value" value="ProteinGroup" />
-          <param name="report_columns" value="ProteinGroup,Accession,PercentCoverage" />
-          <output name="output" file="201203-624176-12-mm-gui-test-ProteinGroup-ProteinGroup,Accession,PercentCoverage.tsv" />
-        </test>
-        <test>
-          <param name="input" value="201203-624176-12-mm-gui-test.idpDB" />
-          <param name="group_by_value" value="Gene" />
-          <param name="report_columns" value="GeneId,Accession,DistinctPeptides" />
-          <output name="output" file="201203-624176-12-mm-gui-test-Gene-GeneId,Accession,DistinctPeptides.tsv" />
-        </test>
-        <test>
-          <param name="input" value="201203-624176-12-mm-gui-test.idpDB" />
-          <param name="group_by_value" value="GeneGroup" />
-          <param name="report_columns" value="GeneGroup,GeneId,DistinctMatches" />
-          <output name="output" file="201203-624176-12-mm-gui-test-GeneGroup-GeneGroup,GeneId,DistinctMatches.tsv" />
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-**What it does**
-
-Creates user-configurable text reports from IDPicker 3 idpDB files.
-]]>
-    </help>
-    <citations>
-        <citation type="doi">10.1021/pr900360j</citation>
-        <citation type="bibtex">@misc{toolsGalaxyP, author = {Chilton, J, Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
-                                      year = {2015}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
-    </citations>
-</tool>