Mercurial > repos > sh477 > data_manager_vep_cache_downloader
comparison data_manager/data_manager_vep_cache_download.py @ 7:7890790d2afd draft
Fully working now and improved several things
| author | sh477 |
|---|---|
| date | Tue, 01 Mar 2022 18:12:26 +0000 |
| parents | a3dba0440f08 |
| children |
comparison
equal
deleted
inserted
replaced
| 6:3bd006fa2be2 | 7:7890790d2afd |
|---|---|
| 16 target_directory = params['output_data'][0]['extra_files_path'] | 16 target_directory = params['output_data'][0]['extra_files_path'] |
| 17 os.mkdir(target_directory) | 17 os.mkdir(target_directory) |
| 18 | 18 |
| 19 # Process parameters for metadata and file download | 19 # Process parameters for metadata and file download |
| 20 url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/") | 20 url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/") |
| 21 m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name']) | 21 m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name']) |
| 22 version = str(m.group(2)) | 22 version = str(m.group(3)) |
| 23 cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default" | 23 cache_type = m.group(2) if m.group(2) else "default" |
| 24 species = m.group(1).rstrip("_") | |
| 25 display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})" | |
| 24 | 26 |
| 25 # Download and extract given cache archive, remove archive afterwards | 27 # Download and extract given cache archive, remove archive afterwards |
| 26 final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name'])) | 28 final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name'])) |
| 27 tar = tarfile.open(final_file, "r:gz") | 29 tar = tarfile.open(final_file, "r:gz") |
| 28 tar.extractall(target_directory) | 30 tar.extractall(target_directory) |
| 30 os.remove(final_file) | 32 os.remove(final_file) |
| 31 | 33 |
| 32 # Construct metadata for the new data table entry | 34 # Construct metadata for the new data table entry |
| 33 data_manager_dict = { | 35 data_manager_dict = { |
| 34 'data_tables': { | 36 'data_tables': { |
| 35 'vep_versioned_caches': [ | 37 'vep_versioned_annotation_cache': [ |
| 36 { | 38 { |
| 37 'value': params['param_dict']['file_name'].strip(".tar.gz"), | 39 'value': params['param_dict']['file_name'].strip(".tar.gz"), |
| 38 'dbkey': params['param_dict']['dbkey'], | 40 'dbkey': params['param_dict']['dbkey'], |
| 39 'version': version, | 41 'version': version, |
| 40 'cachetype': cache_type, | 42 'cachetype': cache_type, |
| 41 'name': params['param_dict']['display_name'], | 43 'name': display_name, |
| 44 'species': species, | |
| 42 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz") | 45 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz") |
| 43 } | 46 } |
| 44 ] | 47 ] |
| 45 } | 48 } |
| 46 } | 49 } |
| 47 | |
| 48 #assert 42 == 0, str(data_manager_dict) | |
| 49 | 50 |
| 50 # Save metadata to out_file | 51 # Save metadata to out_file |
| 51 with open(sys.argv[1], 'w') as fh: | 52 with open(sys.argv[1], 'w') as fh: |
| 52 json.dump(data_manager_dict, fh, sort_keys=True) | 53 json.dump(data_manager_dict, fh, sort_keys=True) |
| 53 | 54 |
