# HG changeset patch
# User recetox
# Date 1613574873 0
# Node ID 07667688735e73c4f85fb740331feb5ff998b0f3
# Parent 644192cf22a502b95a88ae099fff2dacd0bc463b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 521769cd5af12987a119e6302f22e2e1e864ff9a"
diff -r 644192cf22a5 -r 07667688735e aplcms_to_ramclustr_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/aplcms_to_ramclustr_converter.py Wed Feb 17 15:14:33 2021 +0000
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+import warnings
+
+import pandas as pd
+
+
+warnings.simplefilter('ignore')
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--dataframe", help="Name of hdf dataframe")
+parser.add_argument("--table", help="Name of a table in the dataframe")
+parser.add_argument('output')
+args = parser.parse_args()
+
+
+def extract_data(table):
+ num_samples = int((len(table.columns.tolist()) - 4) / 2)
+ mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str)
+
+ intensities = table.iloc[:, 4:(4 + num_samples)]
+ sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()]
+ ramclustr_data = pd.DataFrame({'mz_rt': mz_rt})
+
+ for idx in range(num_samples):
+ label = sample_labels[idx]
+ ramclustr_data[label] = intensities.iloc[:, idx]
+
+ return ramclustr_data
+
+
+def format_table(ramclustr_data):
+ ramclustr_data.set_index('mz_rt', inplace=True)
+ ramclustr_data = ramclustr_data.transpose()
+ ramclustr_data.index.rename('sample', inplace=True)
+ return ramclustr_data
+
+
+def main():
+ try:
+ aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None')
+ except KeyError:
+ msg = "Selected table does not exist in HDF dataframe"
+ print(msg, file=sys.stderr)
+ sys.exit(1)
+
+ ramclustr_data = extract_data(aplcms_table)
+ ramclustr_table = format_table(ramclustr_data)
+
+ ramclustr_table.to_csv(args.output, sep=',')
+ msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table)
+ print(msg, file=sys.stdout)
+
+
+if __name__ == "__main__":
+ main()
diff -r 644192cf22a5 -r 07667688735e aplcms_to_ramclustr_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/aplcms_to_ramclustr_converter.xml Wed Feb 17 15:14:33 2021 +0000
@@ -0,0 +1,37 @@
+
+ converts aplcms HDF output to RamClustR csv input
+
+ python
+ pandas
+ pytables
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_ Unsupervised or Hybrid
+
+ - Downstream tool: `RamClustR `_ (CSV)
+ ]]>
+
diff -r 644192cf22a5 -r 07667688735e hdf_converter.py
--- a/hdf_converter.py Wed Jan 13 15:55:42 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import sys
-import warnings
-
-import pandas as pd
-
-
-warnings.simplefilter('ignore')
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--dataframe", help="Name of hdf dataframe")
-parser.add_argument("--table", help="Name of a table in the dataframe")
-parser.add_argument('output')
-args = parser.parse_args()
-
-
-def extract_data(table):
- num_samples = int((len(table.columns.tolist()) - 4) / 2)
- mz_rt = table['mz'].map(str) + "_" + table['rt'].map(str)
-
- intensities = table.iloc[:, 4:(4 + num_samples)]
- sample_labels = [label.split('.')[1] for label in intensities.columns.tolist()]
- ramclustr_data = pd.DataFrame({'mz_rt': mz_rt})
-
- for idx in range(num_samples):
- label = sample_labels[idx]
- ramclustr_data[label] = intensities.iloc[:, idx]
-
- return ramclustr_data
-
-
-def format_table(ramclustr_data):
- ramclustr_data.set_index('mz_rt', inplace=True)
- ramclustr_data = ramclustr_data.transpose()
- ramclustr_data.index.rename('sample', inplace=True)
- return ramclustr_data
-
-
-def main():
- try:
- aplcms_table = pd.read_hdf(args.dataframe, args.table, errors='None')
- except KeyError:
- msg = "Selected table does not exist in HDF dataframe"
- print(msg, file=sys.stderr)
- sys.exit(1)
-
- ramclustr_data = extract_data(aplcms_table)
- ramclustr_table = format_table(ramclustr_data)
-
- ramclustr_table.to_csv(args.output, sep=',')
- msg = "Table '{}' of HDF dataset is converted to csv for RamClutsR".format(args.table)
- print(msg, file=sys.stdout)
-
-
-if __name__ == "__main__":
- main()
diff -r 644192cf22a5 -r 07667688735e hdf_converter.xml
--- a/hdf_converter.xml Wed Jan 13 15:55:42 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-
- converts aplcms HDF output to RamClustR csv input
-
- python
- pandas
- pytables
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- `_ Unsupervised or Hybrid
-
- - Downstream tool: `RamClustR `_ (CSV)
- ]]>
-
\ No newline at end of file