Mercurial > repos > ggricourt > data_manager_bigg
comparison data_manager/bigg_model_sbml_fetcher.py @ 5:5e6f76507721 draft
"planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 47caed1dd87e80ae226fabb584e9d63d7c86a436-dirty"
| author | ggricourt |
|---|---|
| date | Thu, 24 Feb 2022 10:56:46 +0000 |
| parents | 2f837e65b33c |
| children | 65589e7476b6 |
comparison
equal
deleted
inserted
replaced
| 4:a15b229ee755 | 5:5e6f76507721 |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import ast | |
| 2 import json | 3 import json |
| 3 import os | 4 import os |
| 4 import sys | 5 import sys |
| 6 import time | |
| 5 try: | 7 try: |
| 6 # For Python 3.0 and later | 8 # For Python 3.0 and later |
| 7 from urllib.request import Request, urlopen | 9 from urllib.request import Request, urlopen |
| 8 except ImportError: | 10 except ImportError: |
| 9 # Fall back to Python 2 imports | 11 # Fall back to Python 2 imports |
| 10 from urllib2 import Request, urlopen | 12 from urllib2 import Request, urlopen |
| 11 | 13 |
| 12 BASE_URL = 'http://bigg.ucsd.edu/static/models' | |
| 13 ID2ORG = { | |
| 14 "iCN718": "Acinetobacter baumannii AYE", | |
| 15 "iYO844": "Bacillus subtilis subsp. subtilis str. 168", | |
| 16 "iRC1080": "Chlamydomonas reinhardtii", | |
| 17 "iCN900": "Clostridioides difficile 630", | |
| 18 "iHN637": "Clostridium ljungdahlii DSM 13528", | |
| 19 "iCHOv1_DG44": "Cricetulus griseus", | |
| 20 "iCHOv1": "Cricetulus griseus", | |
| 21 "iAF1260b": "Escherichia coli str. K-12 substr. MG1655", | |
| 22 "iAF1260": "Escherichia coli str. K-12 substr. MG1655", | |
| 23 "iML1515": "Escherichia coli str. K-12 substr. MG1655", | |
| 24 "iJO1366": "Escherichia coli str. K-12 substr. MG1655", | |
| 25 "iJR904": "Escherichia coli str. K-12 substr. MG1655", | |
| 26 "e_coli_core": "Escherichia coli str. K-12 substr. MG1655", | |
| 27 "iAF987": "Geobacter metallireducens GS-15", | |
| 28 "iIT341": "Helicobacter pylori 26695", | |
| 29 "iAT_PLT_636": "Homo sapiens", | |
| 30 "Recon3D": "Homo sapiens", | |
| 31 "iAB_RBC_283": "Homo sapiens", | |
| 32 "RECON1": "Homo sapiens", | |
| 33 "iYL1228": "Klebsiella pneumoniae subsp. pneumoniae MGH 78578", | |
| 34 "iNF517": "Lactococcus lactis subsp. cremoris MG1363", | |
| 35 "iAF692": "Methanosarcina barkeri str. Fusaro", | |
| 36 "iMM1415": "Mus musculus", | |
| 37 "iNJ661": "Mycobacterium tuberculosis H37Rv", | |
| 38 "iEK1008": "Mycobacterium tuberculosis H37Rv", | |
| 39 "iLB1027_lipid": "Phaeodactylum tricornutum CCAP 1055/1", | |
| 40 "iAM_Pb448": "Plasmodium berghei", | |
| 41 "iAM_Pc455": "Plasmodium cynomolgi strain B", | |
| 42 "iAM_Pf480": "Plasmodium falciparum 3D7", | |
| 43 "iAM_Pk459": "Plasmodium knowlesi strain H", | |
| 44 "iAM_Pv461": "Plasmodium vivax Sal-1", | |
| 45 "iJN746": "Pseudomonas putida KT2440", | |
| 46 "iJN1463": "Pseudomonas putida KT2440", | |
| 47 "iND750": "Saccharomyces cerevisiae S288C", | |
| 48 "iMM904": "Saccharomyces cerevisiae S288C", | |
| 49 "STM_v1_0": "Salmonella enterica subsp. enterica serovar Typhimurium str. LT2", | |
| 50 "iYS1720": "Salmonella pan-reactome", | |
| 51 "iSB619": "Staphylococcus aureus subsp. aureus N315", | |
| 52 "iYS854": "Staphylococcus aureus subsp. aureus USA300_TCH1516", | |
| 53 "iJB785": "Synechococcus elongatus PCC 7942", | |
| 54 "iJN678": "Synechocystis sp. PCC 6803", | |
| 55 "iSynCJ816": "Synechocystis sp. PCC 6803", | |
| 56 "iLJ478": "Thermotoga maritima MSB8", | |
| 57 "iIS312": "Trypanosoma cruzi Dm28c", | |
| 58 "iIS312_Trypomastigote": "Trypanosoma cruzi Dm28c", | |
| 59 "iIS312_Epimastigote": "Trypanosoma cruzi Dm28c", | |
| 60 "iIS312_Amastigote": "Trypanosoma cruzi Dm28c" | |
| 61 } | |
| 62 | 14 |
| 63 def url_download(url, workdir): | 15 MODEL_URL = 'http://bigg.ucsd.edu/static/models/' |
| 64 file_path = os.path.abspath(os.path.join(workdir, os.path.basename(url))) | 16 MODEL_DETAIL_URL = 'http://bigg.ucsd.edu/api/v2/models/' |
| 65 src = None | 17 |
| 66 dst = None | 18 |
| 19 def url_download(url, path): | |
| 67 try: | 20 try: |
| 68 req = Request(url) | 21 with urlopen(Request(url)) as fod: |
| 69 src = urlopen(req) | 22 with open(path, 'wb') as dst: |
| 70 with open(file_path, 'wb') as dst: | 23 while True: |
| 71 while True: | 24 chunk = fod.read(2**10) |
| 72 chunk = src.read(2**10) | 25 if chunk: |
| 73 if chunk: | 26 dst.write(chunk) |
| 74 dst.write(chunk) | 27 else: |
| 75 else: | 28 break |
| 76 break | |
| 77 except Exception as e: | 29 except Exception as e: |
| 78 sys.exit(str(e)) | 30 sys.exit(str(e)) |
| 79 finally: | |
| 80 if src: | |
| 81 src.close() | |
| 82 return file_path | |
| 83 | 31 |
| 84 | 32 |
| 85 def download(model_id, out_file): | 33 def url_json(url): |
| 34 data = {} | |
| 35 try: | |
| 36 with urlopen(Request(url)) as fod: | |
| 37 data = fod.read().encode('utf-8') | |
| 38 data = ast.literal_evals(data) | |
| 39 except Exception as e: | |
| 40 sys.exit(str(e)) | |
| 41 return data | |
| 86 | 42 |
| 87 with open(out_file) as fh: | 43 |
| 44 def get_model_organism(model_id): | |
| 45 data = url_json(MODEL_DETAIL_URL + model_id) | |
| 46 org = data.get('organism', 'undefined') | |
| 47 res = "(%s) %s" (model_id, org) | |
| 48 return res | |
| 49 | |
| 50 | |
| 51 def download_entries(model_ids, workdir): | |
| 52 for model_id in model_ids: | |
| 53 model_filename = model_id + '.xml' | |
| 54 path = os.path.abspath(os.path.join(workdir, model_filename)) | |
| 55 | |
| 56 url_download(MODEL_DETAIL_URL + model_filename, path) | |
| 57 data_manager_entry = {} | |
| 58 data_manager_entry['value'] = model_id | |
| 59 data_manager_entry['name'] = get_model_organism(model_id) | |
| 60 data_manager_entry['path'] = path | |
| 61 | |
| 62 # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access) | |
| 63 time.sleep(1) | |
| 64 yield data_manager_entry | |
| 65 | |
| 66 | |
| 67 if __name__ == '__main__': | |
| 68 parser = argparse.ArgumentParser() | |
| 69 pinput = parser.add_mutually_exclusive_group(required=True) | |
| 70 pinput.add_argument('--model-id', help='Model BIGG id') | |
| 71 pinput.add_argument('--model-all', action='store_true', help='Download all models') | |
| 72 parser.add_argument('--out-file', help='JSON output file') | |
| 73 args = parser.parse_args() | |
| 74 | |
| 75 # Init. | |
| 76 data_manager_json = {'data_tables': {}} | |
| 77 with open(args.out_file) as fh: | |
| 88 params = json.load(fh) | 78 params = json.load(fh) |
| 89 | 79 |
| 90 workdir = params['output_data'][0]['extra_files_path'] | 80 workdir = params['output_data'][0]['extra_files_path'] |
| 91 os.makedirs(workdir) | 81 os.makedirs(workdir) |
| 92 | 82 |
| 93 data_manager_json = {'data_tables': {}} | 83 model_ids = [] |
| 94 file_path = url_download(BASE_URL + '/' + model_id + '.xml', workdir) | 84 if args.model_id: |
| 85 model_ids.append(args.model_id) | |
| 86 else: | |
| 87 data = url_json(MODEL_DETAIL_URL) | |
| 88 for result in data.get("results", []): | |
| 89 model_ids.append(result.get("bigg_id")) | |
| 95 | 90 |
| 96 data_manager_entry = {} | 91 entries = list(download_entries(model_ids, workdir)) |
| 97 data_manager_entry['value'] = model_id | |
| 98 data_manager_entry['name'] = ID2ORG.get(model_id, 'undefined') | |
| 99 data_manager_entry['path'] = file_path | |
| 100 | 92 |
| 101 data_manager_json['data_tables']['bigg_model_sbml'] = data_manager_entry | 93 # Write data. |
| 102 with open(out_file, 'w') as fh: | 94 data_manager_json['data_tables']['bigg_model_sbml'] = entries |
| 95 with open(args.out_file, 'w') as fh: | |
| 103 json.dump(data_manager_json, fh, sort_keys=True) | 96 json.dump(data_manager_json, fh, sort_keys=True) |
| 104 | |
| 105 | |
| 106 if __name__ == '__main__': | |
| 107 parser = argparse.ArgumentParser() | |
| 108 parser.add_argument('--model-id', help='Model BIGG id') | |
| 109 parser.add_argument('--out-file', help='JSON output file') | |
| 110 args = parser.parse_args() | |
| 111 | |
| 112 download(args.model_id, args.out_file) |
