Mercurial > repos > pimarin > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 11:c50e8b4f74d7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit 572bbceeba9a22cdb591d526abda362595f8f0c4
| author | pimarin |
|---|---|
| date | Wed, 02 Nov 2022 11:06:36 +0000 |
| parents | 9c65e5da43e2 |
| children | bcac3aa1f494 |
comparison
equal
deleted
inserted
replaced
| 10:9c65e5da43e2 | 11:c50e8b4f74d7 |
|---|---|
| 6 import sys | 6 import sys |
| 7 import tarfile | 7 import tarfile |
| 8 from datetime import datetime | 8 from datetime import datetime |
| 9 from pathlib import Path | 9 from pathlib import Path |
| 10 | 10 |
| 11 import bakta.constants as bc | |
| 12 import bakta.utils as bu | |
| 13 import requests | 11 import requests |
| 14 from alive_progress import alive_bar | 12 from alive_progress import alive_bar |
| 15 | 13 |
| 16 | 14 |
| 17 class GetBaktaDatabaseInfo: | 15 class GetBaktaDatabaseInfo: |
| 27 self.db_url = None | 25 self.db_url = None |
| 28 self.data_table_entry = None | 26 self.data_table_entry = None |
| 29 self.data_table_name = data_table_name | 27 self.data_table_name = data_table_name |
| 30 self.db_name = db_name | 28 self.db_name = db_name |
| 31 self.db_version = db_version | 29 self.db_version = db_version |
| 30 self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json' | |
| 32 | 31 |
| 33 def get_data_table_format(self): | 32 def get_data_table_format(self): |
| 34 """ | 33 """ |
| 35 Build a data table format for galaxy | 34 Build a data table format for galaxy |
| 36 using the bakta database information | 35 using the bakta database information |
| 55 db_version: a string of the version number | 54 db_version: a string of the version number |
| 56 in the galaxy wrapper list or just latest | 55 in the galaxy wrapper list or just latest |
| 57 return: info for the select or the latest bakta db version | 56 return: info for the select or the latest bakta db version |
| 58 """ | 57 """ |
| 59 try: | 58 try: |
| 60 with requests.get(bc.DB_VERSIONS_URL) as resp: | 59 with requests.get(self.DB_VERSIONS_URL) as resp: |
| 61 versions = json.loads(resp.content) | 60 versions = json.loads(resp.content) |
| 62 except IOError as e: | 61 except IOError as e: |
| 63 print(e, file=sys.stderr) | 62 print(e, file=sys.stderr) |
| 64 raise e | 63 raise e |
| 65 else: | 64 else: |
| 106 dbkey=bakta_database_info['date'], | 105 dbkey=bakta_database_info['date'], |
| 107 database_record=bakta_database_info['record'], | 106 database_record=bakta_database_info['record'], |
| 108 bakta_version=str( | 107 bakta_version=str( |
| 109 f"{bakta_database_info['software-min']['major']}." | 108 f"{bakta_database_info['software-min']['major']}." |
| 110 f"{bakta_database_info['software-min']['minor']}" | 109 f"{bakta_database_info['software-min']['minor']}" |
| 111 ), path=output_path) | 110 ), |
| 111 path=output_path) | |
| 112 self.bakta_table_list["data_tables"][self.data_table_name] = data_info | 112 self.bakta_table_list["data_tables"][self.data_table_name] = data_info |
| 113 return self.bakta_table_list | 113 return self.bakta_table_list |
| 114 | 114 |
| 115 | 115 |
| 116 class InstallBaktaDatabase(GetBaktaDatabaseInfo): | 116 class InstallBaktaDatabase(GetBaktaDatabaseInfo): |
| 128 self.md5 = None | 128 self.md5 = None |
| 129 self.db_dir = db_dir | 129 self.db_dir = db_dir |
| 130 self.db_name = db_name | 130 self.db_name = db_name |
| 131 self.tarball_name = tarball_name | 131 self.tarball_name = tarball_name |
| 132 self.tarball_path = None | 132 self.tarball_path = None |
| 133 bu.test_dependency(bu.DEPENDENCY_AMRFINDERPLUS) | |
| 134 | 133 |
| 135 def download(self): | 134 def download(self): |
| 136 self.db_name = f'{self.db_name}_{self.db_version}' | 135 self.db_name = f'{self.db_name}_{self.db_version}' |
| 137 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) | 136 bakta_path = Path(self.db_dir).joinpath(self.tarball_name) |
| 138 try: | 137 try: |
| 219 help='Select the database version ' | 218 help='Select the database version ' |
| 220 '(major and minor eg. 4.0),' | 219 '(major and minor eg. 4.0),' |
| 221 'default is the latest version', | 220 'default is the latest version', |
| 222 default="latest", | 221 default="latest", |
| 223 required=True) | 222 required=True) |
| 223 arg_parser.add_argument("-t", "--test", action='store_true', | |
| 224 help="option to test the script with an empty database") | |
| 224 return arg_parser.parse_args() | 225 return arg_parser.parse_args() |
| 225 | 226 |
| 226 | 227 |
| 227 def main(): | 228 def main(): |
| 228 all_args = parse_arguments() | 229 all_args = parse_arguments() |
| 234 os.makedirs(target_dir) | 235 os.makedirs(target_dir) |
| 235 | 236 |
| 236 # init the class to download bakta db | 237 # init the class to download bakta db |
| 237 bakta_upload = InstallBaktaDatabase() | 238 bakta_upload = InstallBaktaDatabase() |
| 238 # extract the version | 239 # extract the version |
| 239 bakta_db = bakta_upload.fetch_db_versions( | 240 if all_args.test is True: |
| 240 db_version=all_args.database_version) | 241 bakta_db = bakta_upload.fetch_db_versions( |
| 242 db_version="test") | |
| 243 else: | |
| 244 bakta_db = bakta_upload.fetch_db_versions( | |
| 245 db_version=all_args.database_version) | |
| 241 # update the path for galaxy | 246 # update the path for galaxy |
| 242 bakta_upload.db_dir = target_dir | 247 bakta_upload.db_dir = target_dir |
| 243 # download the database | 248 # download the database |
| 244 print(bakta_db) | |
| 245 bakta_upload.download() | 249 bakta_upload.download() |
| 246 # check md5 sum | 250 # check md5 sum |
| 247 bakta_upload.calc_md5_sum() | 251 bakta_upload.calc_md5_sum() |
| 248 # untar db | 252 # untar db |
| 249 bakta_extracted_path = bakta_upload.untar() | 253 bakta_extracted_path = bakta_upload.untar() |
| 250 # update for amrfinderplus | 254 # update for amrfinderplus |
| 251 bakta_upload.update_amrfinderplus_db() | 255 bakta_upload.update_amrfinderplus_db() |
| 252 # make the data_manager metadata | 256 # make the data_manager metadata |
| 253 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) | 257 bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db, output_path=bakta_extracted_path) |
| 254 with open(all_args.data_manager_json, 'w') as fh: | 258 with open(all_args.data_manager_json, 'w') as fh: |
| 255 json.dump(bakta_data_manager, fh, indent=2, sort_keys=True) | 259 json.dump(bakta_data_manager, fh, sort_keys=True) |
| 256 | 260 |
| 257 | 261 |
| 258 if __name__ == '__main__': | 262 if __name__ == '__main__': |
| 259 main() | 263 main() |
