Mercurial > repos > ggricourt > data_manager_bigg
comparison data_manager/metanetx_chem_prop_fetcher.py @ 13:c1d4f14dc768 draft
"planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 7db54c0555a12ecb8b3f756032228c54fe028e0a-dirty"
author | ggricourt |
---|---|
date | Wed, 09 Mar 2022 14:22:54 +0000 |
parents | |
children | 8e8a9e51f1d7 |
comparison
equal
deleted
inserted
replaced
12:e339b8d84de0 | 13:c1d4f14dc768 |
---|---|
1 import argparse | |
2 import json | |
3 import os | |
4 import shutil | |
5 import sys | |
6 import tempfile | |
7 import pandas as pd | |
8 try: | |
9 # For Python 3.0 and later | |
10 from urllib.request import Request, urlopen | |
11 except ImportError: | |
12 # Fall back to Python 2 imports | |
13 from urllib2 import Request, urlopen | |
14 | |
15 | |
16 METANETX_URL = "https://www.metanetx.org/ftp/" | |
17 | |
18 | |
19 def url_download(url, path): | |
20 try: | |
21 with urlopen(Request(url)) as fod: | |
22 with open(path, "wb") as dst: | |
23 while True: | |
24 chunk = fod.read(2**10) | |
25 if chunk: | |
26 dst.write(chunk) | |
27 else: | |
28 break | |
29 except Exception as e: | |
30 sys.exit(str(e)) | |
31 | |
32 | |
33 def clean_metanetx_file(path): | |
34 ftmp = tempfile.NamedTemporaryFile() | |
35 isHeaderFound = False | |
36 with open(path) as fid, open(ftmp.name, 'w') as fod: | |
37 for line in fid: | |
38 if line.startswith("#"): | |
39 last_line = line | |
40 else: | |
41 if not isHeaderFound: | |
42 last_line = last_line.replace("#", "") | |
43 fod.write(last_line) | |
44 isHeaderFound = True | |
45 fod.write(line) | |
46 shutil.copyfile(ftmp.name, path) | |
47 | |
48 | |
49 def records_chem_prop(path): | |
50 df = pd.read_csv(path, sep="\t") | |
51 df["name"] = df.apply(lambda x: "%s: %s (%s)" % (x["ID"], x["name"], x["formula"]), axis=1) | |
52 df.drop(columns=["reference", "formula", "charge", "mass", "InChIKey", "SMILES"], inplace=True) | |
53 df.rename(columns={"ID": "value", "InChI": "inchi"}) | |
54 return df.to_dict('records') | |
55 | |
56 | |
57 if __name__ == "__main__": | |
58 parser = argparse.ArgumentParser() | |
59 pinput = parser.add_mutually_exclusive_group(required=True) | |
60 pinput.add_argument("--version", help="Version to download") | |
61 parser.add_argument("--out-file", help="JSON output file") | |
62 args = parser.parse_args() | |
63 | |
64 # Init. | |
65 data_manager_json = {"data_tables": {}} | |
66 with open(args.out_file) as fh: | |
67 params = json.load(fh) | |
68 | |
69 workdir = params["output_data"][0]["extra_files_path"] | |
70 os.makedirs(workdir) | |
71 | |
72 # Load models and models metadata. | |
73 ftmp = tempfile.NamedTemporaryFile() | |
74 url = '/'.join([METANETX_URL, args.version, 'chem_prop.tsv']) | |
75 url_download(url, ftmp.name) | |
76 | |
77 # Clean header | |
78 clean_metanetx_file(ftmp.name) | |
79 | |
80 # Select records. | |
81 records = records_chem_prop(ftmp.name) | |
82 | |
83 # Write data. | |
84 data_manager_json["data_tables"]["metanetx_chem_prop"] = records | |
85 with open(args.out_file, "w") as fh: | |
86 json.dump(data_manager_json, fh, sort_keys=True) |