comparison data_manager/bakta_build_database.py @ 19:c90380f8bbbc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 8eb28a93e6f2688bb2f3f85aea0389e1b1148816-dirty
author pimarin
date Fri, 13 Jan 2023 14:11:30 +0000
parents 04bee0f935a2
children ddeedb302cf1
comparison
equal deleted inserted replaced
18:04bee0f935a2 19:c90380f8bbbc
1 import argparse 1 import argparse
2 import hashlib 2 import hashlib
3 import json 3 import json
4 import os 4 import os
5 import sys 5 import sys
6 # import subprocess
7 import tarfile 6 import tarfile
8 from datetime import datetime 7 from datetime import datetime
9 from pathlib import Path 8 from pathlib import Path
10 # implement pip as a subprocess: 9
11 # subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'requests'])
12 10
13 import requests 11 import requests
14 12
15 13
16 class GetBaktaDatabaseInfo: 14 class GetBaktaDatabaseInfo:
19 """ 17 """
20 18
21 def __init__(self, 19 def __init__(self,
22 data_table_name="bakta_database", 20 data_table_name="bakta_database",
23 db_name=Path.cwd().joinpath("db"), 21 db_name=Path.cwd().joinpath("db"),
24 db_version="latest"): 22 db_version="latest",
23 test_mode=False):
25 self.bakta_table_list = None 24 self.bakta_table_list = None
26 self.db_url = None 25 self.db_url = None
27 self.data_table_entry = None 26 self.data_table_entry = None
28 self.data_table_name = data_table_name 27 self.data_table_name = data_table_name
29 self.db_name = db_name 28 self.db_name = db_name
30 self.db_version = db_version 29 self.db_version = db_version
31 self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json' 30 self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json'
31 self.DB_TEST_URL = 'https://zenodo.org/record/7360542/files/db-versions.json'
32 self.test_mode = test_mode
32 33
33 def get_data_table_format(self): 34 def get_data_table_format(self):
34 """ 35 """
35 Skeleton of a data_table format 36 Skeleton of a data_table format
36 return: a data table formated for json output 37 return: a data table formated for json output
42 } 43 }
43 return self.data_table_entry 44 return self.data_table_entry
44 45
45 def fetch_db_versions(self, db_version="latest"): 46 def fetch_db_versions(self, db_version="latest"):
46 """ 47 """
47 List bakta database info depending of the db_version selected 48 List bakta database info related to the db_version selected
48 """ 49 """
50 if self.test_mode is True:
51 self.DB_VERSIONS_URL = self.DB_TEST_URL
49 try: 52 try:
50 with requests.get(self.DB_VERSIONS_URL) as resp: 53 with requests.get(self.DB_VERSIONS_URL) as resp:
51 versions = json.loads(resp.content) 54 versions = json.loads(resp.content)
52 except IOError as e: 55 except IOError as e:
53 print(e, file=sys.stderr) 56 print(e, file=sys.stderr)
57 db_date_list = [] 60 db_date_list = []
58 for db_dic in versions: 61 for db_dic in versions:
59 db_date_list.append(datetime.strptime(db_dic["date"], 62 db_date_list.append(datetime.strptime(db_dic["date"],
60 '%Y-%m-%d').date()) 63 '%Y-%m-%d').date())
61 filtered_version = max(versions, key=lambda x: x['date']) 64 filtered_version = max(versions, key=lambda x: x['date'])
62 elif db_version == "test":
63 filtered_version = {"date": "date_test",
64 "major": "0",
65 "minor": "0",
66 "doi": "10.5281/zenodo.7197299",
67 "record": "7197299",
68 "md5": "8b0250c17078742fc12207d4efb0fc1a",
69 "software-min": {"major": "0",
70 "minor": "0"}
71 }
72 else: 65 else:
73 filtered_version = None 66 filtered_version = None
74 for item in versions: 67 for item in versions:
75 if '{0}.{1}'.format(item["major"], item["minor"]) == db_version: 68 if '{0}.{1}'.format(item["major"], item["minor"]) == db_version:
76 filtered_version = item 69 filtered_version = item
81 self.db_url = f"https://zenodo.org/record/" \ 74 self.db_url = f"https://zenodo.org/record/" \
82 f"{filtered_version['record']}/files/db.tar.gz" 75 f"{filtered_version['record']}/files/db.tar.gz"
83 self.db_version = db_version 76 self.db_version = db_version
84 return filtered_version 77 return filtered_version
85 78
86 def get_data_manager(self, bakta_database_info, output_path): 79 def get_data_manager(self, bakta_database_info):
87 self.bakta_table_list = self.get_data_table_format() 80 self.bakta_table_list = self.get_data_table_format()
88 bakta_value = f"V{bakta_database_info['major']}." \ 81 bakta_value = f"V{bakta_database_info['major']}." \
89 f"{bakta_database_info['minor']}_" \ 82 f"{bakta_database_info['minor']}_" \
90 f"{bakta_database_info['date']}" 83 f"{bakta_database_info['date']}"
91 tool_version = str(f"{bakta_database_info['software-min']['major']}." 84 tool_version = str(f"{bakta_database_info['software-min']['major']}."
92 f"{bakta_database_info['software-min']['minor']}") 85 f"{bakta_database_info['software-min']['minor']}")
93 data_info = dict(value=bakta_database_info['record'], 86 data_info = dict(value=bakta_value,
94 dbkey=bakta_value, 87 dbkey=bakta_database_info['record'],
95 bakta_version=tool_version, 88 bakta_version=tool_version,
96 path="db") 89 path="db")
97 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] 90 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info]
98 return self.bakta_table_list 91 return self.bakta_table_list
99 92
106 """ 99 """
107 100
108 def __init__(self, 101 def __init__(self,
109 db_dir=Path.cwd(), 102 db_dir=Path.cwd(),
110 db_name="bakta", 103 db_name="bakta",
111 tarball_name="db.tar.gz"): 104 tarball_name="db.tar.gz",
105 test_mode=False):
112 super().__init__() 106 super().__init__()
113 self.md5 = None 107 self.md5 = None
114 self.db_dir = db_dir 108 self.db_dir = db_dir
115 self.db_name = db_name 109 self.db_name = db_name
116 self.tarball_name = tarball_name 110 self.tarball_name = tarball_name
117 self.tarball_path = None 111 self.tarball_path = None
112 self.test_mode = test_mode
118 113
119 def download(self): 114 def download(self):
120 self.db_name = f'{self.db_name}_{self.db_version}' 115 self.db_name = f'{self.db_name}_{self.db_version}'
121 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) 116 bakta_path = Path(self.db_dir).joinpath(self.tarball_name)
122 try: 117 try:
210 return arg_parser.parse_args() 205 return arg_parser.parse_args()
211 206
212 207
213 def main(): 208 def main():
214 all_args = parse_arguments() 209 all_args = parse_arguments()
215
216 with open(all_args.data_manager_json) as fh: 210 with open(all_args.data_manager_json) as fh:
217 params = json.load(fh) 211 params = json.load(fh)
218 target_dir = params['output_data'][0]['extra_files_path'] 212 target_dir = params['output_data'][0]['extra_files_path']
219 os.makedirs(target_dir) 213 os.makedirs(target_dir)
220 # init the class to download bakta db 214 # init the class to download bakta db
221 bakta_upload = InstallBaktaDatabase() 215 bakta_upload = InstallBaktaDatabase(test_mode=all_args.test)
222 # extract the version 216 bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version)
223 if all_args.test is True:
224 bakta_db = bakta_upload.fetch_db_versions(
225 db_version="test")
226 else:
227 bakta_db = bakta_upload.fetch_db_versions(
228 db_version=all_args.database_version)
229 # update the path for galaxy 217 # update the path for galaxy
230 bakta_upload.db_dir = target_dir 218 bakta_upload.db_dir = target_dir
231 # download the database 219 # download the database
232 bakta_upload.download() 220 bakta_upload.download()
233 # check md5 sum 221 # check md5 sum
234 bakta_upload.calc_md5_sum() 222 bakta_upload.calc_md5_sum()
235 # untar db 223 # untar db
236 bakta_upload.untar() 224 bakta_upload.untar()
237 # make the data_manager metadata 225 # make the data_manager metadata
238 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=target_dir) 226 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db)
239 with open(all_args.data_manager_json, 'w') as fh: 227 with open(all_args.data_manager_json, 'w') as fh:
240 json.dump(bakta_data_manager, fh, sort_keys=True) 228 json.dump(bakta_data_manager, fh, sort_keys=True)
241 229
242 230
243 if __name__ == '__main__': 231 if __name__ == '__main__':