Mercurial > repos > sh477 > data_manager_vep_cache_downloader
diff data_manager/data_manager_vep_cache_download.py @ 7:7890790d2afd draft
Fully working now and improved several things
author | sh477 |
---|---|
date | Tue, 01 Mar 2022 18:12:26 +0000 |
parents | a3dba0440f08 |
children |
line wrap: on
line diff
--- a/data_manager/data_manager_vep_cache_download.py Mon Feb 28 14:42:50 2022 +0000 +++ b/data_manager/data_manager_vep_cache_download.py Tue Mar 01 18:12:26 2022 +0000 @@ -18,9 +18,11 @@ # Process parameters for metadata and file download url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/") - m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name']) - version = str(m.group(2)) - cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default" + m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name']) + version = str(m.group(3)) + cache_type = m.group(2) if m.group(2) else "default" + species = m.group(1).rstrip("_") + display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})" # Download and extract given cache archive, remove archive afterwards final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name'])) @@ -32,20 +34,19 @@ # Construct metadata for the new data table entry data_manager_dict = { 'data_tables': { - 'vep_versioned_caches': [ + 'vep_versioned_annotation_cache': [ { 'value': params['param_dict']['file_name'].strip(".tar.gz"), 'dbkey': params['param_dict']['dbkey'], 'version': version, 'cachetype': cache_type, - 'name': params['param_dict']['display_name'], + 'name': display_name, + 'species': species, 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz") } ] } } - - #assert 42 == 0, str(data_manager_dict) # Save metadata to out_file with open(sys.argv[1], 'w') as fh: