Mercurial > repos > ggricourt > data_manager_bigg
changeset 13:c1d4f14dc768 draft
"planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 7db54c0555a12ecb8b3f756032228c54fe028e0a-dirty"
author | ggricourt |
---|---|
date | Wed, 09 Mar 2022 14:22:54 +0000 |
parents | e339b8d84de0 |
children | 8e8a9e51f1d7 |
files | data_manager/.tmp.py.swp data_manager/metanetx_chem_prop_fetcher.py data_manager/metanetx_chem_prop_fetcher.xml data_manager_conf.xml |
diffstat | 4 files changed, 145 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/metanetx_chem_prop_fetcher.py Wed Mar 09 14:22:54 2022 +0000 @@ -0,0 +1,86 @@ +import argparse +import json +import os +import shutil +import sys +import tempfile +import pandas as pd +try: + # For Python 3.0 and later + from urllib.request import Request, urlopen +except ImportError: + # Fall back to Python 2 imports + from urllib2 import Request, urlopen + + +METANETX_URL = "https://www.metanetx.org/ftp/" + + +def url_download(url, path): + try: + with urlopen(Request(url)) as fod: + with open(path, "wb") as dst: + while True: + chunk = fod.read(2**10) + if chunk: + dst.write(chunk) + else: + break + except Exception as e: + sys.exit(str(e)) + + +def clean_metanetx_file(path): + ftmp = tempfile.NamedTemporaryFile() + isHeaderFound = False + with open(path) as fid, open(ftmp.name, 'w') as fod: + for line in fid: + if line.startswith("#"): + last_line = line + else: + if not isHeaderFound: + last_line = last_line.replace("#", "") + fod.write(last_line) + isHeaderFound = True + fod.write(line) + shutil.copyfile(ftmp.name, path) + + +def records_chem_prop(path): + df = pd.read_csv(path, sep="\t") + df["name"] = df.apply(lambda x: "%s: %s (%s)" % (x["ID"], x["name"], x["formula"]), axis=1) + df.drop(columns=["reference", "formula", "charge", "mass", "InChIKey", "SMILES"], inplace=True) + df.rename(columns={"ID": "value", "InChI": "inchi"}) + return df.to_dict('records') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + pinput = parser.add_mutually_exclusive_group(required=True) + pinput.add_argument("--version", help="Version to download") + parser.add_argument("--out-file", help="JSON output file") + args = parser.parse_args() + + # Init. + data_manager_json = {"data_tables": {}} + with open(args.out_file) as fh: + params = json.load(fh) + + workdir = params["output_data"][0]["extra_files_path"] + os.makedirs(workdir) + + # Load models and models metadata. + ftmp = tempfile.NamedTemporaryFile() + url = '/'.join([METANETX_URL, args.version, 'chem_prop.tsv']) + url_download(url, ftmp.name) + + # Clean header + clean_metanetx_file(ftmp.name) + + # Select records. + records = records_chem_prop(ftmp.name) + + # Write data. + data_manager_json["data_tables"]["metanetx_chem_prop"] = records + with open(args.out_file, "w") as fh: + json.dump(data_manager_json, fh, sort_keys=True)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/metanetx_chem_prop_fetcher.xml Wed Mar 09 14:22:54 2022 +0000 @@ -0,0 +1,50 @@ +<tool id="metanetx_chem_prop_fetcher" name="Metanetx Chem Prop data manager" tool_type="manage_data" profile="18.09" version="0.0.1"> + <description>Get InChI value of compound in chem_prop.tsv file available from Metanetx</description> + <requirements> + <requirement type="package" version="3.8">python</requirement> + <requirement type="package" version="1.4">pandas</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/metanetx_chem_prop_fetcher.py' + #if $input_cond.input_select == "latest" + --version latest + #else + --version '$input_cond.version_id' + #end if + --out-file '$out_file' + ]]></command> + <inputs> + <conditional name="input_cond"> + <param name="input_select" type="select" label="Select a version to download"> + <option value="latest">Download latest</option> + <option value="version">Choose version</option> + </param> + <when value="version"> + <param name="version_id" type="text" label="Version ID to download" value=""> + <validator type="empty_field" message="Version ID is required"/> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" /> + </outputs> + <tests> + </tests> + <help><![CDATA[ +Download InChI from Metanetx +============================ + +Get InChI value of compound in chem_prop.tsv file available from Metanetx + +Version +---------- +0.0.1 + +License +------- + +`MIT <https://github.com/brsynth/rptools/blob/master/LICENSE>`_ + + ]]></help> +</tool>
--- a/data_manager_conf.xml Thu Feb 24 13:09:32 2022 +0000 +++ b/data_manager_conf.xml Wed Mar 09 14:22:54 2022 +0000 @@ -16,4 +16,13 @@ </output> </data_table> </data_manager> + <data_manager tool_file="data_manager/metanetx_chem_prop_fetcher.xml" id="metanetx_chem_prop_fetcher"> + <data_table name="metanetx_chem_prop"> + <output> + <column name="value" /> + <column name="name" /> + <column name="inchi" /> + </output> + </data_table> + </data_manager> </data_managers>