Mercurial > repos > pimarin > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 19:c90380f8bbbc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 8eb28a93e6f2688bb2f3f85aea0389e1b1148816-dirty
author | pimarin |
---|---|
date | Fri, 13 Jan 2023 14:11:30 +0000 |
parents | 04bee0f935a2 |
children | ddeedb302cf1 |
comparison
equal
deleted
inserted
replaced
18:04bee0f935a2 | 19:c90380f8bbbc |
---|---|
1 import argparse | 1 import argparse |
2 import hashlib | 2 import hashlib |
3 import json | 3 import json |
4 import os | 4 import os |
5 import sys | 5 import sys |
6 # import subprocess | |
7 import tarfile | 6 import tarfile |
8 from datetime import datetime | 7 from datetime import datetime |
9 from pathlib import Path | 8 from pathlib import Path |
10 # implement pip as a subprocess: | 9 |
11 # subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'requests']) | |
12 | 10 |
13 import requests | 11 import requests |
14 | 12 |
15 | 13 |
16 class GetBaktaDatabaseInfo: | 14 class GetBaktaDatabaseInfo: |
19 """ | 17 """ |
20 | 18 |
21 def __init__(self, | 19 def __init__(self, |
22 data_table_name="bakta_database", | 20 data_table_name="bakta_database", |
23 db_name=Path.cwd().joinpath("db"), | 21 db_name=Path.cwd().joinpath("db"), |
24 db_version="latest"): | 22 db_version="latest", |
23 test_mode=False): | |
25 self.bakta_table_list = None | 24 self.bakta_table_list = None |
26 self.db_url = None | 25 self.db_url = None |
27 self.data_table_entry = None | 26 self.data_table_entry = None |
28 self.data_table_name = data_table_name | 27 self.data_table_name = data_table_name |
29 self.db_name = db_name | 28 self.db_name = db_name |
30 self.db_version = db_version | 29 self.db_version = db_version |
31 self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json' | 30 self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json' |
31 self.DB_TEST_URL = 'https://zenodo.org/record/7360542/files/db-versions.json' | |
32 self.test_mode = test_mode | |
32 | 33 |
33 def get_data_table_format(self): | 34 def get_data_table_format(self): |
34 """ | 35 """ |
35 Skeleton of a data_table format | 36 Skeleton of a data_table format |
36 return: a data table formated for json output | 37 return: a data table formated for json output |
42 } | 43 } |
43 return self.data_table_entry | 44 return self.data_table_entry |
44 | 45 |
45 def fetch_db_versions(self, db_version="latest"): | 46 def fetch_db_versions(self, db_version="latest"): |
46 """ | 47 """ |
47 List bakta database info depending of the db_version selected | 48 List bakta database info related to the db_version selected |
48 """ | 49 """ |
50 if self.test_mode is True: | |
51 self.DB_VERSIONS_URL = self.DB_TEST_URL | |
49 try: | 52 try: |
50 with requests.get(self.DB_VERSIONS_URL) as resp: | 53 with requests.get(self.DB_VERSIONS_URL) as resp: |
51 versions = json.loads(resp.content) | 54 versions = json.loads(resp.content) |
52 except IOError as e: | 55 except IOError as e: |
53 print(e, file=sys.stderr) | 56 print(e, file=sys.stderr) |
57 db_date_list = [] | 60 db_date_list = [] |
58 for db_dic in versions: | 61 for db_dic in versions: |
59 db_date_list.append(datetime.strptime(db_dic["date"], | 62 db_date_list.append(datetime.strptime(db_dic["date"], |
60 '%Y-%m-%d').date()) | 63 '%Y-%m-%d').date()) |
61 filtered_version = max(versions, key=lambda x: x['date']) | 64 filtered_version = max(versions, key=lambda x: x['date']) |
62 elif db_version == "test": | |
63 filtered_version = {"date": "date_test", | |
64 "major": "0", | |
65 "minor": "0", | |
66 "doi": "10.5281/zenodo.7197299", | |
67 "record": "7197299", | |
68 "md5": "8b0250c17078742fc12207d4efb0fc1a", | |
69 "software-min": {"major": "0", | |
70 "minor": "0"} | |
71 } | |
72 else: | 65 else: |
73 filtered_version = None | 66 filtered_version = None |
74 for item in versions: | 67 for item in versions: |
75 if '{0}.{1}'.format(item["major"], item["minor"]) == db_version: | 68 if '{0}.{1}'.format(item["major"], item["minor"]) == db_version: |
76 filtered_version = item | 69 filtered_version = item |
81 self.db_url = f"https://zenodo.org/record/" \ | 74 self.db_url = f"https://zenodo.org/record/" \ |
82 f"{filtered_version['record']}/files/db.tar.gz" | 75 f"{filtered_version['record']}/files/db.tar.gz" |
83 self.db_version = db_version | 76 self.db_version = db_version |
84 return filtered_version | 77 return filtered_version |
85 | 78 |
86 def get_data_manager(self, bakta_database_info, output_path): | 79 def get_data_manager(self, bakta_database_info): |
87 self.bakta_table_list = self.get_data_table_format() | 80 self.bakta_table_list = self.get_data_table_format() |
88 bakta_value = f"V{bakta_database_info['major']}." \ | 81 bakta_value = f"V{bakta_database_info['major']}." \ |
89 f"{bakta_database_info['minor']}_" \ | 82 f"{bakta_database_info['minor']}_" \ |
90 f"{bakta_database_info['date']}" | 83 f"{bakta_database_info['date']}" |
91 tool_version = str(f"{bakta_database_info['software-min']['major']}." | 84 tool_version = str(f"{bakta_database_info['software-min']['major']}." |
92 f"{bakta_database_info['software-min']['minor']}") | 85 f"{bakta_database_info['software-min']['minor']}") |
93 data_info = dict(value=bakta_database_info['record'], | 86 data_info = dict(value=bakta_value, |
94 dbkey=bakta_value, | 87 dbkey=bakta_database_info['record'], |
95 bakta_version=tool_version, | 88 bakta_version=tool_version, |
96 path="db") | 89 path="db") |
97 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] | 90 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] |
98 return self.bakta_table_list | 91 return self.bakta_table_list |
99 | 92 |
106 """ | 99 """ |
107 | 100 |
108 def __init__(self, | 101 def __init__(self, |
109 db_dir=Path.cwd(), | 102 db_dir=Path.cwd(), |
110 db_name="bakta", | 103 db_name="bakta", |
111 tarball_name="db.tar.gz"): | 104 tarball_name="db.tar.gz", |
105 test_mode=False): | |
112 super().__init__() | 106 super().__init__() |
113 self.md5 = None | 107 self.md5 = None |
114 self.db_dir = db_dir | 108 self.db_dir = db_dir |
115 self.db_name = db_name | 109 self.db_name = db_name |
116 self.tarball_name = tarball_name | 110 self.tarball_name = tarball_name |
117 self.tarball_path = None | 111 self.tarball_path = None |
112 self.test_mode = test_mode | |
118 | 113 |
119 def download(self): | 114 def download(self): |
120 self.db_name = f'{self.db_name}_{self.db_version}' | 115 self.db_name = f'{self.db_name}_{self.db_version}' |
121 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) | 116 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) |
122 try: | 117 try: |
210 return arg_parser.parse_args() | 205 return arg_parser.parse_args() |
211 | 206 |
212 | 207 |
213 def main(): | 208 def main(): |
214 all_args = parse_arguments() | 209 all_args = parse_arguments() |
215 | |
216 with open(all_args.data_manager_json) as fh: | 210 with open(all_args.data_manager_json) as fh: |
217 params = json.load(fh) | 211 params = json.load(fh) |
218 target_dir = params['output_data'][0]['extra_files_path'] | 212 target_dir = params['output_data'][0]['extra_files_path'] |
219 os.makedirs(target_dir) | 213 os.makedirs(target_dir) |
220 # init the class to download bakta db | 214 # init the class to download bakta db |
221 bakta_upload = InstallBaktaDatabase() | 215 bakta_upload = InstallBaktaDatabase(test_mode=all_args.test) |
222 # extract the version | 216 bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version) |
223 if all_args.test is True: | |
224 bakta_db = bakta_upload.fetch_db_versions( | |
225 db_version="test") | |
226 else: | |
227 bakta_db = bakta_upload.fetch_db_versions( | |
228 db_version=all_args.database_version) | |
229 # update the path for galaxy | 217 # update the path for galaxy |
230 bakta_upload.db_dir = target_dir | 218 bakta_upload.db_dir = target_dir |
231 # download the database | 219 # download the database |
232 bakta_upload.download() | 220 bakta_upload.download() |
233 # check md5 sum | 221 # check md5 sum |
234 bakta_upload.calc_md5_sum() | 222 bakta_upload.calc_md5_sum() |
235 # untar db | 223 # untar db |
236 bakta_upload.untar() | 224 bakta_upload.untar() |
237 # make the data_manager metadata | 225 # make the data_manager metadata |
238 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=target_dir) | 226 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db) |
239 with open(all_args.data_manager_json, 'w') as fh: | 227 with open(all_args.data_manager_json, 'w') as fh: |
240 json.dump(bakta_data_manager, fh, sort_keys=True) | 228 json.dump(bakta_data_manager, fh, sort_keys=True) |
241 | 229 |
242 | 230 |
243 if __name__ == '__main__': | 231 if __name__ == '__main__': |