comparison data_manager/metanetx_chem_prop_fetcher.py @ 13:c1d4f14dc768 draft

"planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 7db54c0555a12ecb8b3f756032228c54fe028e0a-dirty"
author ggricourt
date Wed, 09 Mar 2022 14:22:54 +0000
parents
children 8e8a9e51f1d7
comparison
equal deleted inserted replaced
12:e339b8d84de0 13:c1d4f14dc768
1 import argparse
2 import json
3 import os
4 import shutil
5 import sys
6 import tempfile
7 import pandas as pd
8 try:
9 # For Python 3.0 and later
10 from urllib.request import Request, urlopen
11 except ImportError:
12 # Fall back to Python 2 imports
13 from urllib2 import Request, urlopen
14
15
16 METANETX_URL = "https://www.metanetx.org/ftp/"
17
18
19 def url_download(url, path):
20 try:
21 with urlopen(Request(url)) as fod:
22 with open(path, "wb") as dst:
23 while True:
24 chunk = fod.read(2**10)
25 if chunk:
26 dst.write(chunk)
27 else:
28 break
29 except Exception as e:
30 sys.exit(str(e))
31
32
33 def clean_metanetx_file(path):
34 ftmp = tempfile.NamedTemporaryFile()
35 isHeaderFound = False
36 with open(path) as fid, open(ftmp.name, 'w') as fod:
37 for line in fid:
38 if line.startswith("#"):
39 last_line = line
40 else:
41 if not isHeaderFound:
42 last_line = last_line.replace("#", "")
43 fod.write(last_line)
44 isHeaderFound = True
45 fod.write(line)
46 shutil.copyfile(ftmp.name, path)
47
48
49 def records_chem_prop(path):
50 df = pd.read_csv(path, sep="\t")
51 df["name"] = df.apply(lambda x: "%s: %s (%s)" % (x["ID"], x["name"], x["formula"]), axis=1)
52 df.drop(columns=["reference", "formula", "charge", "mass", "InChIKey", "SMILES"], inplace=True)
53 df.rename(columns={"ID": "value", "InChI": "inchi"})
54 return df.to_dict('records')
55
56
57 if __name__ == "__main__":
58 parser = argparse.ArgumentParser()
59 pinput = parser.add_mutually_exclusive_group(required=True)
60 pinput.add_argument("--version", help="Version to download")
61 parser.add_argument("--out-file", help="JSON output file")
62 args = parser.parse_args()
63
64 # Init.
65 data_manager_json = {"data_tables": {}}
66 with open(args.out_file) as fh:
67 params = json.load(fh)
68
69 workdir = params["output_data"][0]["extra_files_path"]
70 os.makedirs(workdir)
71
72 # Load models and models metadata.
73 ftmp = tempfile.NamedTemporaryFile()
74 url = '/'.join([METANETX_URL, args.version, 'chem_prop.tsv'])
75 url_download(url, ftmp.name)
76
77 # Clean header
78 clean_metanetx_file(ftmp.name)
79
80 # Select records.
81 records = records_chem_prop(ftmp.name)
82
83 # Write data.
84 data_manager_json["data_tables"]["metanetx_chem_prop"] = records
85 with open(args.out_file, "w") as fh:
86 json.dump(data_manager_json, fh, sort_keys=True)