comparison data_manager/bigg_model_sbml_fetcher.py @ 5:5e6f76507721 draft

"planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 47caed1dd87e80ae226fabb584e9d63d7c86a436-dirty"
author ggricourt
date Thu, 24 Feb 2022 10:56:46 +0000
parents 2f837e65b33c
children 65589e7476b6
comparison
equal deleted inserted replaced
4:a15b229ee755 5:5e6f76507721
1 import argparse 1 import argparse
2 import ast
2 import json 3 import json
3 import os 4 import os
4 import sys 5 import sys
6 import time
5 try: 7 try:
6 # For Python 3.0 and later 8 # For Python 3.0 and later
7 from urllib.request import Request, urlopen 9 from urllib.request import Request, urlopen
8 except ImportError: 10 except ImportError:
9 # Fall back to Python 2 imports 11 # Fall back to Python 2 imports
10 from urllib2 import Request, urlopen 12 from urllib2 import Request, urlopen
11 13
12 BASE_URL = 'http://bigg.ucsd.edu/static/models'
13 ID2ORG = {
14 "iCN718": "Acinetobacter baumannii AYE",
15 "iYO844": "Bacillus subtilis subsp. subtilis str. 168",
16 "iRC1080": "Chlamydomonas reinhardtii",
17 "iCN900": "Clostridioides difficile 630",
18 "iHN637": "Clostridium ljungdahlii DSM 13528",
19 "iCHOv1_DG44": "Cricetulus griseus",
20 "iCHOv1": "Cricetulus griseus",
21 "iAF1260b": "Escherichia coli str. K-12 substr. MG1655",
22 "iAF1260": "Escherichia coli str. K-12 substr. MG1655",
23 "iML1515": "Escherichia coli str. K-12 substr. MG1655",
24 "iJO1366": "Escherichia coli str. K-12 substr. MG1655",
25 "iJR904": "Escherichia coli str. K-12 substr. MG1655",
26 "e_coli_core": "Escherichia coli str. K-12 substr. MG1655",
27 "iAF987": "Geobacter metallireducens GS-15",
28 "iIT341": "Helicobacter pylori 26695",
29 "iAT_PLT_636": "Homo sapiens",
30 "Recon3D": "Homo sapiens",
31 "iAB_RBC_283": "Homo sapiens",
32 "RECON1": "Homo sapiens",
33 "iYL1228": "Klebsiella pneumoniae subsp. pneumoniae MGH 78578",
34 "iNF517": "Lactococcus lactis subsp. cremoris MG1363",
35 "iAF692": "Methanosarcina barkeri str. Fusaro",
36 "iMM1415": "Mus musculus",
37 "iNJ661": "Mycobacterium tuberculosis H37Rv",
38 "iEK1008": "Mycobacterium tuberculosis H37Rv",
39 "iLB1027_lipid": "Phaeodactylum tricornutum CCAP 1055/1",
40 "iAM_Pb448": "Plasmodium berghei",
41 "iAM_Pc455": "Plasmodium cynomolgi strain B",
42 "iAM_Pf480": "Plasmodium falciparum 3D7",
43 "iAM_Pk459": "Plasmodium knowlesi strain H",
44 "iAM_Pv461": "Plasmodium vivax Sal-1",
45 "iJN746": "Pseudomonas putida KT2440",
46 "iJN1463": "Pseudomonas putida KT2440",
47 "iND750": "Saccharomyces cerevisiae S288C",
48 "iMM904": "Saccharomyces cerevisiae S288C",
49 "STM_v1_0": "Salmonella enterica subsp. enterica serovar Typhimurium str. LT2",
50 "iYS1720": "Salmonella pan-reactome",
51 "iSB619": "Staphylococcus aureus subsp. aureus N315",
52 "iYS854": "Staphylococcus aureus subsp. aureus USA300_TCH1516",
53 "iJB785": "Synechococcus elongatus PCC 7942",
54 "iJN678": "Synechocystis sp. PCC 6803",
55 "iSynCJ816": "Synechocystis sp. PCC 6803",
56 "iLJ478": "Thermotoga maritima MSB8",
57 "iIS312": "Trypanosoma cruzi Dm28c",
58 "iIS312_Trypomastigote": "Trypanosoma cruzi Dm28c",
59 "iIS312_Epimastigote": "Trypanosoma cruzi Dm28c",
60 "iIS312_Amastigote": "Trypanosoma cruzi Dm28c"
61 }
62 14
63 def url_download(url, workdir): 15 MODEL_URL = 'http://bigg.ucsd.edu/static/models/'
64 file_path = os.path.abspath(os.path.join(workdir, os.path.basename(url))) 16 MODEL_DETAIL_URL = 'http://bigg.ucsd.edu/api/v2/models/'
65 src = None 17
66 dst = None 18
19 def url_download(url, path):
67 try: 20 try:
68 req = Request(url) 21 with urlopen(Request(url)) as fod:
69 src = urlopen(req) 22 with open(path, 'wb') as dst:
70 with open(file_path, 'wb') as dst: 23 while True:
71 while True: 24 chunk = fod.read(2**10)
72 chunk = src.read(2**10) 25 if chunk:
73 if chunk: 26 dst.write(chunk)
74 dst.write(chunk) 27 else:
75 else: 28 break
76 break
77 except Exception as e: 29 except Exception as e:
78 sys.exit(str(e)) 30 sys.exit(str(e))
79 finally:
80 if src:
81 src.close()
82 return file_path
83 31
84 32
85 def download(model_id, out_file): 33 def url_json(url):
34 data = {}
35 try:
36 with urlopen(Request(url)) as fod:
37 data = fod.read().encode('utf-8')
38 data = ast.literal_evals(data)
39 except Exception as e:
40 sys.exit(str(e))
41 return data
86 42
87 with open(out_file) as fh: 43
44 def get_model_organism(model_id):
45 data = url_json(MODEL_DETAIL_URL + model_id)
46 org = data.get('organism', 'undefined')
47 res = "(%s) %s" (model_id, org)
48 return res
49
50
51 def download_entries(model_ids, workdir):
52 for model_id in model_ids:
53 model_filename = model_id + '.xml'
54 path = os.path.abspath(os.path.join(workdir, model_filename))
55
56 url_download(MODEL_DETAIL_URL + model_filename, path)
57 data_manager_entry = {}
58 data_manager_entry['value'] = model_id
59 data_manager_entry['name'] = get_model_organism(model_id)
60 data_manager_entry['path'] = path
61
62 # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access)
63 time.sleep(1)
64 yield data_manager_entry
65
66
67 if __name__ == '__main__':
68 parser = argparse.ArgumentParser()
69 pinput = parser.add_mutually_exclusive_group(required=True)
70 pinput.add_argument('--model-id', help='Model BIGG id')
71 pinput.add_argument('--model-all', action='store_true', help='Download all models')
72 parser.add_argument('--out-file', help='JSON output file')
73 args = parser.parse_args()
74
75 # Init.
76 data_manager_json = {'data_tables': {}}
77 with open(args.out_file) as fh:
88 params = json.load(fh) 78 params = json.load(fh)
89 79
90 workdir = params['output_data'][0]['extra_files_path'] 80 workdir = params['output_data'][0]['extra_files_path']
91 os.makedirs(workdir) 81 os.makedirs(workdir)
92 82
93 data_manager_json = {'data_tables': {}} 83 model_ids = []
94 file_path = url_download(BASE_URL + '/' + model_id + '.xml', workdir) 84 if args.model_id:
85 model_ids.append(args.model_id)
86 else:
87 data = url_json(MODEL_DETAIL_URL)
88 for result in data.get("results", []):
89 model_ids.append(result.get("bigg_id"))
95 90
96 data_manager_entry = {} 91 entries = list(download_entries(model_ids, workdir))
97 data_manager_entry['value'] = model_id
98 data_manager_entry['name'] = ID2ORG.get(model_id, 'undefined')
99 data_manager_entry['path'] = file_path
100 92
101 data_manager_json['data_tables']['bigg_model_sbml'] = data_manager_entry 93 # Write data.
102 with open(out_file, 'w') as fh: 94 data_manager_json['data_tables']['bigg_model_sbml'] = entries
95 with open(args.out_file, 'w') as fh:
103 json.dump(data_manager_json, fh, sort_keys=True) 96 json.dump(data_manager_json, fh, sort_keys=True)
104
105
106 if __name__ == '__main__':
107 parser = argparse.ArgumentParser()
108 parser.add_argument('--model-id', help='Model BIGG id')
109 parser.add_argument('--out-file', help='JSON output file')
110 args = parser.parse_args()
111
112 download(args.model_id, args.out_file)