Repository 'calisp'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/galaxyp/calisp

Changeset 0:d158a7909193 (2023-06-01)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 060699366b6dd19ad6c3ef3f332f63cc55d75dce
added:
calisp.xml
feather2tsv.py
b
diff -r 000000000000 -r d158a7909193 calisp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/calisp.xml Thu Jun 01 08:33:58 2023 +0000
[
b'@@ -0,0 +1,193 @@\n+<tool id="calisp" name="calisp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">\n+    <description>Estimate isotopic composition of peptides from proteomics mass spectrometry data</description>\n+    <macros>\n+        <token name="@TOOL_VERSION@">3.0.10</token>\n+        <token name="@VERSION_SUFFIX@">0</token>\n+        <token name="@CALISP_REPO@">https://raw.githubusercontent.com/kinestetika/Calisp/208d495674e2b52fe56cf23457c833d1c2527242</token>\n+        <xml name="input_macro" tokens="multiple">\n+            <!-- According to readme mzid input is not yet implented -->\n+        </xml>\n+    </macros>\n+    <requirements>\n+        <requirement type="package" version="@TOOL_VERSION@">calisp</requirement>\n+    </requirements>\n+    <command detect_errors="aggressive"><![CDATA[\n+#import re\n+\n+mkdir -p spectra &&\n+#set escaped_specs = re.sub(\'[^\\w\\-\\.,:]\', \'_\', str($spectrum_file.element_identifier))\n+ln -s \'$spectrum_file\' spectra/\'$escaped_specs\' &&\n+\n+mkdir -p psms &&\n+#set escaped_peps = re.sub(\'[^\\w\\-\\.,:]\', \'_\', str($peptide_file.element_identifier))\n+ln -s \'$peptide_file\' psms/\'$escaped_peps\' &&\n+\n+calisp \n+    --spectrum_file spectra/\n+    --peptide_file psms/\n+    --output_file calisp-output/\n+    --mass_accuracy $mass_accuracy\n+    --bin_delimiter \'$bin_delimiter\'\n+    --threads "\\${GALAXY_SLOTS:-1}"\n+    --isotope $isotope\n+    $compute_clumps &&\n+\'$__tool_directory__/feather2tsv.py\' --calisp_output calisp-output/\n+    ]]></command>\n+    <inputs>\n+        <param argument="--spectrum_file" type="data" multiple="false" format="mzml" label="Spectrum file"/>\n+        <param argument="--peptide_file" type="data" multiple="false" format="tabular" label="Peptide file" help="Psm file" />\n+        <param argument="--mass_accuracy" type="float" value="10" label="Mass accuracy" help="The maximum mass difference between theoretical mass and experimental mass of a peptide" />\n+        <param argument="--bin_delimiter" type="text" value="_" label="Bin delimiter" help="For metagenomic data, the delimiter that separates the bin ID from the protein ID (default: &quot;_&quot;). Use &quot;-&quot; to ignore bins ID entirely.">\n+           <sanitizer invalid_char="">\n+                <valid initial="string.ascii_letters,string.digits">\n+                    <add value="_" />\n+                    <add value="-" />\n+                    <add value=":" />\n+                </valid>\n+            </sanitizer> \n+        </param>\n+        <param argument="--isotope" type="select" label="Target isotope">\n+            <option value="13C" selected="true">13C</option>\n+            <option value="14C">14C</option>\n+            <option value="15N">15N</option>\n+            <option value="17O">17O</option>\n+            <option value="18O">18O</option>\n+            <option value="2H">2H</option>\n+            <option value="3H">3H</option>\n+            <option value="33S">33S</option>\n+            <option value="34S">34S</option>\n+            <option value="36S">36S</option>\n+        </param>\n+        <param argument="--compute_clumps" type="boolean" truevalue="--compute_clumps" falsevalue="" checked="false" label="Compute clumps" help="To compute clumpiness of carbon assimilation. Only use when samples are labeled tosaturation. Estimation of clumpiness takes much additional time." />\n+    </inputs>\n+    <outputs>\n+        <collection name="output" type="list">\n+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\\.tsv" format="tabular" directory="calisp-output"/>\n+        </collection>\n+    </outputs>\n+    <tests>\n+        <!-- TODO test data to large, avilable from here: https://github.com/kinestetika/Calisp/tree/master/test\n+            if possible inlcude via location in the future\n+        <test expect_num_outputs="1">\n+            <param name="spectrum_file" value="calisp_test_data.mzML" ftype="mzml"/>\n+            <param name="peptide_file" value="calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabul'..b'sociated with the pattern (without the bin id)\n+peptide                                    the aminoacid sequence of the peptide\n+peptide_mass                               the mass of the peptide\n+C                                          # of carbon atoms in the peptide\n+N                                          # of nitrogen atoms in the peptide\n+O                                          # of oxygen atoms in the peptide\n+H                                          # of hydrogen atoms in the peptide\n+S                                          # of sulfur atoms in the peptide\n+psm_id                                     psm id\n+psm_mz                                     psm m over z\n+psm_charge                                 psm charge\n+psm_neutrons                               number of neutrons inferred from custom \'neutron\' modifications \n+psm_rank                                   rank of the psm\n+psm_precursor_id                           id of the ms1 spectrum that was the source of the psm \n+psm_precursor_mz                           mass over charge of the precursor of the psm\n+pattern_charge                             charge of the pattern\n+pattern_precursor_id                       id of the ms1 spectrum that was the source of the pattern\n+pattern_total_intensity                    total intensity of the pattern\n+pattern_peak_count                         # of peaks in the pattern\n+pattern_median_peak_spacing                medium mass difference between a pattern\'s peaks\n+spectrum_mass_irregularity                 a measure for the standard deviation in the mass difference between a pattern\'s peaks\n+ratio_na                                   the estimated isotope ratio inferred from neutron abundance (sip experiments) \n+ratio_fft                                  the estimated isotope ratio inferred by the fft method (natural isotope abundances)\n+error_fft                                  the remaining error after fitting the pattern with fft\n+error_clumpy                               the remaining error after fitting the pattern with the clumpy carbon method\n+flag_peptide_contains_sulfur               true if peptide contains sulfur\n+flag_peptide_has_modifications             true if peptide has no modifications\n+flag_peptide_assigned_to_multiple_bins     true if peptide is associated with multiple proteins from different bins/mags\n+flag_peptide_assigned_to_multiple_proteins true if peptide is associated with multiple proteins\n+flag_peptide_mass_and_elements_undefined   true if peptide has unknown mass and elemental composition\n+flag_psm_has_low_confidence                true if psm was flagged as having low confidence (peptide identity uncertain)\n+flag_psm_is_ambiguous                      true if psm could not be assigned with certainty\n+flag_pattern_is_contaminated               true if multiple patterns have one or more shared peaks\n+flag_pattern_is_wobbly                     true if pattern_median_peak_spacing exceeds a treshold\n+flag_peak_at_minus_one_pos                 true if a peak was detected immediately before the monoisotopic peak, could indicate overlap with another pattern\n+i0 - i19                                   the intensities of the first 20 peaks of the pattern  \n+m0 - m19                                   the masses of the first 20 peaks of the pattern\n+c1 - c6                                    contributions of clumps of 1-6 carbon to ratio_na. These are the outcomes of the clumpy carbon model. These results are only meaningful if the biomass was labeled to saturation.\n+========================================== ===================\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1186/s40168-022-01454-1</citation>\n+        <citation type="doi">10.1073/pnas.1722325115</citation>\n+        <citation type="doi">10.1101/2021.03.29.437612</citation>\n+        <citation type="doi">10.1093/bioinformatics/bty046</citation>\n+    </citations>\n+</tool>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r d158a7909193 feather2tsv.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feather2tsv.py Thu Jun 01 08:33:58 2023 +0000
[
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+"""
+based on https://github.com/kinestetika/Calisp/blob/master/benchmarking/sip%20benchmarking.ipynb
+"""
+
+import argparse
+import os
+
+import pandas as pd
+
+
+def load_calisp_data(filename):
+
+    # (1) load data
+    if os.path.isdir(filename):
+        file_data = []
+        for f in os.listdir(filename):
+            if not f.endswith(".feather"):
+                continue
+            f = os.path.join(filename, f)
+            file_data.append(pd.read_feather(f))
+            base, _ = os.path.splitext(f)
+            file_data[-1].to_csv(f"{base}.tsv", sep="\t")
+        data = pd.concat(file_data)
+    else:
+        data = pd.read_feather(filename)
+        base, _ = os.path.splitext(filename)
+        data.to_csv(f"{base}.tsv", sep="\t")
+
+
+parser = argparse.ArgumentParser(description='feather2tsv')
+parser.add_argument('--calisp_output', required=True, help='feather file')
+args = parser.parse_args()
+
+data = load_calisp_data(args.calisp_output)