Mercurial > repos > sh477 > data_manager_vep_cache_downloader
comparison data_manager/data_manager_vep_cache_download.py @ 7:7890790d2afd draft
Fully working now and improved several things
author | sh477 |
---|---|
date | Tue, 01 Mar 2022 18:12:26 +0000 |
parents | a3dba0440f08 |
children |
comparison
equal
deleted
inserted
replaced
6:3bd006fa2be2 | 7:7890790d2afd |
---|---|
16 target_directory = params['output_data'][0]['extra_files_path'] | 16 target_directory = params['output_data'][0]['extra_files_path'] |
17 os.mkdir(target_directory) | 17 os.mkdir(target_directory) |
18 | 18 |
19 # Process parameters for metadata and file download | 19 # Process parameters for metadata and file download |
20 url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/") | 20 url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/") |
21 m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name']) | 21 m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name']) |
22 version = str(m.group(2)) | 22 version = str(m.group(3)) |
23 cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default" | 23 cache_type = m.group(2) if m.group(2) else "default" |
24 species = m.group(1).rstrip("_") | |
25 display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})" | |
24 | 26 |
25 # Download and extract given cache archive, remove archive afterwards | 27 # Download and extract given cache archive, remove archive afterwards |
26 final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name'])) | 28 final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name'])) |
27 tar = tarfile.open(final_file, "r:gz") | 29 tar = tarfile.open(final_file, "r:gz") |
28 tar.extractall(target_directory) | 30 tar.extractall(target_directory) |
30 os.remove(final_file) | 32 os.remove(final_file) |
31 | 33 |
32 # Construct metadata for the new data table entry | 34 # Construct metadata for the new data table entry |
33 data_manager_dict = { | 35 data_manager_dict = { |
34 'data_tables': { | 36 'data_tables': { |
35 'vep_versioned_caches': [ | 37 'vep_versioned_annotation_cache': [ |
36 { | 38 { |
37 'value': params['param_dict']['file_name'].strip(".tar.gz"), | 39 'value': params['param_dict']['file_name'].strip(".tar.gz"), |
38 'dbkey': params['param_dict']['dbkey'], | 40 'dbkey': params['param_dict']['dbkey'], |
39 'version': version, | 41 'version': version, |
40 'cachetype': cache_type, | 42 'cachetype': cache_type, |
41 'name': params['param_dict']['display_name'], | 43 'name': display_name, |
44 'species': species, | |
42 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz") | 45 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz") |
43 } | 46 } |
44 ] | 47 ] |
45 } | 48 } |
46 } | 49 } |
47 | |
48 #assert 42 == 0, str(data_manager_dict) | |
49 | 50 |
50 # Save metadata to out_file | 51 # Save metadata to out_file |
51 with open(sys.argv[1], 'w') as fh: | 52 with open(sys.argv[1], 'w') as fh: |
52 json.dump(data_manager_dict, fh, sort_keys=True) | 53 json.dump(data_manager_dict, fh, sort_keys=True) |
53 | 54 |