5
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import datetime
|
|
4 import json
|
|
5 import os
|
|
6 import re
|
|
7 from urllib.request import urlretrieve
|
|
8 import sys
|
|
9 import tarfile
|
|
10
|
|
11
|
|
12 def main():
|
|
13 # Read in given out_file and create target directory for file download
|
|
14 with open(sys.argv[1]) as fh:
|
|
15 params = json.load(fh)
|
|
16 target_directory = params['output_data'][0]['extra_files_path']
|
|
17 os.mkdir(target_directory)
|
|
18
|
|
19 # Process parameters for metadata and file download
|
|
20 url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
|
7
|
21 m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name'])
|
|
22 version = str(m.group(3))
|
|
23 cache_type = m.group(2) if m.group(2) else "default"
|
|
24 species = m.group(1).rstrip("_")
|
|
25 display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})"
|
5
|
26
|
|
27 # Download and extract given cache archive, remove archive afterwards
|
|
28 final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
|
|
29 tar = tarfile.open(final_file, "r:gz")
|
|
30 tar.extractall(target_directory)
|
|
31 tar.close()
|
|
32 os.remove(final_file)
|
|
33
|
|
34 # Construct metadata for the new data table entry
|
|
35 data_manager_dict = {
|
|
36 'data_tables': {
|
7
|
37 'vep_versioned_annotation_cache': [
|
5
|
38 {
|
|
39 'value': params['param_dict']['file_name'].strip(".tar.gz"),
|
|
40 'dbkey': params['param_dict']['dbkey'],
|
|
41 'version': version,
|
|
42 'cachetype': cache_type,
|
7
|
43 'name': display_name,
|
|
44 'species': species,
|
5
|
45 'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
|
|
46 }
|
|
47 ]
|
|
48 }
|
|
49 }
|
|
50
|
|
51 # Save metadata to out_file
|
|
52 with open(sys.argv[1], 'w') as fh:
|
|
53 json.dump(data_manager_dict, fh, sort_keys=True)
|
|
54
|
|
55
|
|
56 if __name__ == "__main__":
|
|
57 main()
|