Galaxy |

Changeset 0:d158a7909193 (2023-06-01)

Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 060699366b6dd19ad6c3ef3f332f63cc55d75dce

added:
calisp.xml
feather2tsv.py

diff -r 000000000000 -r d158a7909193 calisp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/calisp.xml Thu Jun 01 08:33:58 2023 +0000

[

b'@@ -0,0 +1,193 @@\n+<tool id="calisp" name="calisp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">\n+ <description>Estimate isotopic composition of peptides from proteomics mass spectrometry data</description>\n+ <macros>\n+ <token name="@TOOL_VERSION@">3.0.10</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ <token name="@CALISP_REPO@">https://raw.githubusercontent.com/kinestetika/Calisp/208d495674e2b52fe56cf23457c833d1c2527242</token>\n+ <xml name="input_macro" tokens="multiple">\n+ \n+ </xml>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">calisp</requirement>\n+ </requirements>\n+ <command detect_errors="aggressive"><![CDATA[\n+#import re\n+\n+mkdir -p spectra &&\n+#set escaped_specs = re.sub(\'[^\\w\\-\\.,:]\', \'_\', str($spectrum_file.element_identifier))\n+ln -s \'$spectrum_file\' spectra/\'$escaped_specs\' &&\n+\n+mkdir -p psms &&\n+#set escaped_peps = re.sub(\'[^\\w\\-\\.,:]\', \'_\', str($peptide_file.element_identifier))\n+ln -s \'$peptide_file\' psms/\'$escaped_peps\' &&\n+\n+calisp \n+ --spectrum_file spectra/\n+ --peptide_file psms/\n+ --output_file calisp-output/\n+ --mass_accuracy $mass_accuracy\n+ --bin_delimiter \'$bin_delimiter\'\n+ --threads "\\${GALAXY_SLOTS:-1}"\n+ --isotope $isotope\n+ $compute_clumps &&\n+\'$__tool_directory__/feather2tsv.py\' --calisp_output calisp-output/\n+ ]]></command>\n+ <inputs>\n+ <param argument="--spectrum_file" type="data" multiple="false" format="mzml" label="Spectrum file"/>\n+ <param argument="--peptide_file" type="data" multiple="false" format="tabular" label="Peptide file" help="Psm file" />\n+ <param argument="--mass_accuracy" type="float" value="10" label="Mass accuracy" help="The maximum mass difference between theoretical mass and experimental mass of a peptide" />\n+ <param argument="--bin_delimiter" type="text" value="_" label="Bin delimiter" help="For metagenomic data, the delimiter that separates the bin ID from the protein ID (default: "_"). Use "-" to ignore bins ID entirely.">\n+ <sanitizer invalid_char="">\n+ <valid initial="string.ascii_letters,string.digits">\n+ <add value="_" />\n+ <add value="-" />\n+ <add value=":" />\n+ </valid>\n+ </sanitizer> \n+ </param>\n+ <param argument="--isotope" type="select" label="Target isotope">\n+ <option value="13C" selected="true">13C</option>\n+ <option value="14C">14C</option>\n+ <option value="15N">15N</option>\n+ <option value="17O">17O</option>\n+ <option value="18O">18O</option>\n+ <option value="2H">2H</option>\n+ <option value="3H">3H</option>\n+ <option value="33S">33S</option>\n+ <option value="34S">34S</option>\n+ <option value="36S">36S</option>\n+ </param>\n+ <param argument="--compute_clumps" type="boolean" truevalue="--compute_clumps" falsevalue="" checked="false" label="Compute clumps" help="To compute clumpiness of carbon assimilation. Only use when samples are labeled tosaturation. Estimation of clumpiness takes much additional time." />\n+ </inputs>\n+ <outputs>\n+ <collection name="output" type="list">\n+ <discover_datasets pattern="(?P<designation>.*)\\.tsv" format="tabular" directory="calisp-output"/>\n+ </collection>\n+ </outputs>\n+ <tests>\n+ <!-- TODO test data to large, avilable from here: https://github.com/kinestetika/Calisp/tree/master/test\n+ if possible inlcude via location in the future\n+ <test expect_num_outputs="1">\n+ <param name="spectrum_file" value="calisp_test_data.mzML" ftype="mzml"/>\n+ <param name="peptide_file" value="calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabul'..b'sociated with the pattern (without the bin id)\n+peptide the aminoacid sequence of the peptide\n+peptide_mass the mass of the peptide\n+C # of carbon atoms in the peptide\n+N # of nitrogen atoms in the peptide\n+O # of oxygen atoms in the peptide\n+H # of hydrogen atoms in the peptide\n+S # of sulfur atoms in the peptide\n+psm_id psm id\n+psm_mz psm m over z\n+psm_charge psm charge\n+psm_neutrons number of neutrons inferred from custom \'neutron\' modifications \n+psm_rank rank of the psm\n+psm_precursor_id id of the ms1 spectrum that was the source of the psm \n+psm_precursor_mz mass over charge of the precursor of the psm\n+pattern_charge charge of the pattern\n+pattern_precursor_id id of the ms1 spectrum that was the source of the pattern\n+pattern_total_intensity total intensity of the pattern\n+pattern_peak_count # of peaks in the pattern\n+pattern_median_peak_spacing medium mass difference between a pattern\'s peaks\n+spectrum_mass_irregularity a measure for the standard deviation in the mass difference between a pattern\'s peaks\n+ratio_na the estimated isotope ratio inferred from neutron abundance (sip experiments) \n+ratio_fft the estimated isotope ratio inferred by the fft method (natural isotope abundances)\n+error_fft the remaining error after fitting the pattern with fft\n+error_clumpy the remaining error after fitting the pattern with the clumpy carbon method\n+flag_peptide_contains_sulfur true if peptide contains sulfur\n+flag_peptide_has_modifications true if peptide has no modifications\n+flag_peptide_assigned_to_multiple_bins true if peptide is associated with multiple proteins from different bins/mags\n+flag_peptide_assigned_to_multiple_proteins true if peptide is associated with multiple proteins\n+flag_peptide_mass_and_elements_undefined true if peptide has unknown mass and elemental composition\n+flag_psm_has_low_confidence true if psm was flagged as having low confidence (peptide identity uncertain)\n+flag_psm_is_ambiguous true if psm could not be assigned with certainty\n+flag_pattern_is_contaminated true if multiple patterns have one or more shared peaks\n+flag_pattern_is_wobbly true if pattern_median_peak_spacing exceeds a treshold\n+flag_peak_at_minus_one_pos true if a peak was detected immediately before the monoisotopic peak, could indicate overlap with another pattern\n+i0 - i19 the intensities of the first 20 peaks of the pattern \n+m0 - m19 the masses of the first 20 peaks of the pattern\n+c1 - c6 contributions of clumps of 1-6 carbon to ratio_na. These are the outcomes of the clumpy carbon model. These results are only meaningful if the biomass was labeled to saturation.\n+========================================== ===================\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1186/s40168-022-01454-1</citation>\n+ <citation type="doi">10.1073/pnas.1722325115</citation>\n+ <citation type="doi">10.1101/2021.03.29.437612</citation>\n+ <citation type="doi">10.1093/bioinformatics/bty046</citation>\n+ </citations>\n+</tool>\n\\ No newline at end of file\n'

diff -r 000000000000 -r d158a7909193 feather2tsv.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feather2tsv.py Thu Jun 01 08:33:58 2023 +0000

[

@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+"""
+based on https://github.com/kinestetika/Calisp/blob/master/benchmarking/sip%20benchmarking.ipynb
+"""
+
+import argparse
+import os
+
+import pandas as pd
+
+
+def load_calisp_data(filename):
+
+    # (1) load data
+    if os.path.isdir(filename):
+        file_data = []
+        for f in os.listdir(filename):
+            if not f.endswith(".feather"):
+                continue
+            f = os.path.join(filename, f)
+            file_data.append(pd.read_feather(f))
+            base, _ = os.path.splitext(f)
+            file_data[-1].to_csv(f"{base}.tsv", sep="\t")
+        data = pd.concat(file_data)
+    else:
+        data = pd.read_feather(filename)
+        base, _ = os.path.splitext(filename)
+        data.to_csv(f"{base}.tsv", sep="\t")
+
+
+parser = argparse.ArgumentParser(description='feather2tsv')
+parser.add_argument('--calisp_output', required=True, help='feather file')
+args = parser.parse_args()
+
+data = load_calisp_data(args.calisp_output)