Mercurial > repos > greg > data_manager_gtdbtk_database_installer
comparison data_manager/gtdbtk_database_installer.py @ 1:7093598fa300 draft
Uploaded
| author | greg |
|---|---|
| date | Tue, 15 Mar 2022 19:31:23 +0000 |
| parents | 3ab83cb7e2d2 |
| children | e4fe7259c4e9 |
comparison
equal
deleted
inserted
replaced
| 0:3ab83cb7e2d2 | 1:7093598fa300 |
|---|---|
| 7 import tarfile | 7 import tarfile |
| 8 from urllib.request import Request, urlopen | 8 from urllib.request import Request, urlopen |
| 9 from urllib.parse import urlparse | 9 from urllib.parse import urlparse |
| 10 | 10 |
| 11 | 11 |
| 12 def url_download(url, work_dir): | 12 def url_download(url, target_directory): |
| 13 url_parts = urlparse(url) | 13 url_parts = urlparse(url) |
| 14 file_path = os.path.abspath(os.path.join(work_dir, os.path.basename(url_parts.path))) | 14 tarball = os.path.abspath(os.path.join(target_directory, os.path.basename(url_parts.path))) |
| 15 src = None | 15 src = None |
| 16 dst = None | 16 dst = None |
| 17 try: | 17 try: |
| 18 req = Request(url) | 18 req = Request(url) |
| 19 src = urlopen(req) | 19 src = urlopen(req) |
| 20 with open(file_path, 'wb') as dst: | 20 with open(tarball, 'wb') as dst: |
| 21 while True: | 21 while True: |
| 22 chunk = src.read(2**10) | 22 chunk = src.read(2**10) |
| 23 if chunk: | 23 if chunk: |
| 24 dst.write(chunk) | 24 dst.write(chunk) |
| 25 else: | 25 else: |
| 26 break | 26 break |
| 27 except Exception as e: | 27 except Exception as e: |
| 28 sys.exit(str(e)) | 28 sys.exit(str(e)) |
| 29 finally: | 29 finally: |
| 30 if src: | 30 if src is not None: |
| 31 src.close() | 31 src.close() |
| 32 if tarfile.is_tarfile(file_path): | 32 if tarfile.is_tarfile(tarball): |
| 33 fh = tarfile.open(file_path, 'r:*') | 33 fh = tarfile.open(tarball, 'r:*') |
| 34 else: | 34 else: |
| 35 return file_path | 35 return tarball |
| 36 fh.extractall(work_dir) | 36 fh.extractall(target_directory) |
| 37 os.remove(file_path) | 37 fh.close() |
| 38 return work_dir | 38 os.remove(tarball) |
| 39 return target_directory | |
| 39 | 40 |
| 40 | 41 |
| 41 def download(database_id, database_name, url, out_file): | 42 def download(database_id, database_name, url, out_file): |
| 42 | 43 |
| 43 with open(out_file) as fh: | 44 with open(out_file) as fh: |
| 44 params = json.load(fh) | 45 params = json.load(fh) |
| 45 | 46 |
| 46 work_dir = params['output_data'][0]['extra_files_path'] | 47 target_directory = params['output_data'][0]['extra_files_path'] |
| 47 os.makedirs(work_dir) | 48 os.makedirs(target_directory) |
| 48 file_path = url_download(url, work_dir) | 49 file_path = url_download(url, target_directory) |
| 49 | 50 |
| 50 data_manager_json = {"data_tables": {}} | 51 data_manager_json = {"data_tables": {}} |
| 51 data_manager_entry = {} | 52 data_manager_entry = {} |
| 52 data_manager_entry['value'] = database_id | 53 data_manager_entry['value'] = database_id |
| 53 data_manager_entry['name'] = database_name | 54 data_manager_entry['name'] = database_name |
| 54 data_manager_entry['path'] = file_path | 55 data_manager_entry['db_path'] = file_path |
| 55 data_manager_json["data_tables"]["gtdbtk_database"] = data_manager_entry | 56 data_manager_json["data_tables"]["gtdbtk_database"] = data_manager_entry |
| 56 | 57 |
| 57 with open(out_file, 'w') as fh: | 58 with open(out_file, 'w') as fh: |
| 58 json.dump(data_manager_json, fh, sort_keys=True) | 59 json.dump(data_manager_json, fh, sort_keys=True) |
| 59 | 60 |
