comparison data_manager/data_manager_vep_cache_download.py @ 7:7890790d2afd draft

Fully working now and improved several things
author sh477
date Tue, 01 Mar 2022 18:12:26 +0000
parents a3dba0440f08
children
comparison
equal deleted inserted replaced
6:3bd006fa2be2 7:7890790d2afd
16 target_directory = params['output_data'][0]['extra_files_path'] 16 target_directory = params['output_data'][0]['extra_files_path']
17 os.mkdir(target_directory) 17 os.mkdir(target_directory)
18 18
19 # Process parameters for metadata and file download 19 # Process parameters for metadata and file download
20 url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/") 20 url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
21 m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name']) 21 m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name'])
22 version = str(m.group(2)) 22 version = str(m.group(3))
23 cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default" 23 cache_type = m.group(2) if m.group(2) else "default"
24 species = m.group(1).rstrip("_")
25 display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})"
24 26
25 # Download and extract given cache archive, remove archive afterwards 27 # Download and extract given cache archive, remove archive afterwards
26 final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name'])) 28 final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
27 tar = tarfile.open(final_file, "r:gz") 29 tar = tarfile.open(final_file, "r:gz")
28 tar.extractall(target_directory) 30 tar.extractall(target_directory)
30 os.remove(final_file) 32 os.remove(final_file)
31 33
32 # Construct metadata for the new data table entry 34 # Construct metadata for the new data table entry
33 data_manager_dict = { 35 data_manager_dict = {
34 'data_tables': { 36 'data_tables': {
35 'vep_versioned_caches': [ 37 'vep_versioned_annotation_cache': [
36 { 38 {
37 'value': params['param_dict']['file_name'].strip(".tar.gz"), 39 'value': params['param_dict']['file_name'].strip(".tar.gz"),
38 'dbkey': params['param_dict']['dbkey'], 40 'dbkey': params['param_dict']['dbkey'],
39 'version': version, 41 'version': version,
40 'cachetype': cache_type, 42 'cachetype': cache_type,
41 'name': params['param_dict']['display_name'], 43 'name': display_name,
44 'species': species,
42 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz") 45 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
43 } 46 }
44 ] 47 ]
45 } 48 }
46 } 49 }
47
48 #assert 42 == 0, str(data_manager_dict)
49 50
50 # Save metadata to out_file 51 # Save metadata to out_file
51 with open(sys.argv[1], 'w') as fh: 52 with open(sys.argv[1], 'w') as fh:
52 json.dump(data_manager_dict, fh, sort_keys=True) 53 json.dump(data_manager_dict, fh, sort_keys=True)
53 54