Mercurial > repos > ggricourt > data_manager_bigg
view data_manager/metanetx_chem_prop_fetcher.py @ 14:8e8a9e51f1d7 draft
"planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 7db54c0555a12ecb8b3f756032228c54fe028e0a-dirty"
author | ggricourt |
---|---|
date | Wed, 09 Mar 2022 15:18:07 +0000 |
parents | c1d4f14dc768 |
children |
line wrap: on
line source
import argparse import json import os import shutil import sys import tempfile # import pandas as pd try: # For Python 3.0 and later from urllib.request import Request, urlopen except ImportError: # Fall back to Python 2 imports from urllib2 import Request, urlopen METANETX_URL = "https://www.metanetx.org/ftp/" def url_download(url, path): try: with urlopen(Request(url)) as fod: with open(path, "wb") as dst: while True: chunk = fod.read(2**10) if chunk: dst.write(chunk) else: break except Exception as e: sys.exit(str(e)) def clean_metanetx_file(path): ftmp = tempfile.NamedTemporaryFile() isHeaderFound = False with open(path) as fid, open(ftmp.name, 'w') as fod: for line in fid: if line.startswith("#"): last_line = line else: if not isHeaderFound: last_line = last_line.replace("#", "") fod.write(last_line) isHeaderFound = True fod.write(line) shutil.copyfile(ftmp.name, path) # def records_chem_prop_pandas(path): # df = pd.read_csv(path, sep="\t") # df["name"] = df.apply(lambda x: "%s: %s (%s)" % (x["ID"], x["name"], x["formula"]), axis=1) # df.drop(columns=["reference", "formula", "charge", "mass", "InChIKey", "SMILES"], inplace=True) # df.rename(columns={"ID": "value", "InChI": "inchi"}) # return df.to_dict('records') def records_chem_prop(path): records = [] with open(path) as fid: for ix, line in enumerate(fid): if ix == 0: continue line = line.split("\t") if line[7] != '': records.append({ "value": line[0], "name": "%s: %s (%s)" % (line[0], line[1], line[3]), "inchi": line[7], }) return records if __name__ == "__main__": parser = argparse.ArgumentParser() pinput = parser.add_mutually_exclusive_group(required=True) pinput.add_argument("--version", help="Version to download") parser.add_argument("--out-file", help="JSON output file") args = parser.parse_args() # Init. data_manager_json = {"data_tables": {}} with open(args.out_file) as fh: params = json.load(fh) workdir = params["output_data"][0]["extra_files_path"] os.makedirs(workdir) # Load models and models metadata. ftmp = tempfile.NamedTemporaryFile() url = '/'.join([METANETX_URL, args.version, 'chem_prop.tsv']) url_download(url, ftmp.name) # Clean header clean_metanetx_file(ftmp.name) # Select records. records = records_chem_prop(ftmp.name) # Write data. data_manager_json["data_tables"]["metanetx_chem_prop"] = records with open(args.out_file, "w") as fh: json.dump(data_manager_json, fh, sort_keys=True)