diff data_manager/data_manager_vep_cache_download.py @ 7:7890790d2afd draft

Fully working now and improved several things
author sh477
date Tue, 01 Mar 2022 18:12:26 +0000
parents a3dba0440f08
children
line wrap: on
line diff
--- a/data_manager/data_manager_vep_cache_download.py	Mon Feb 28 14:42:50 2022 +0000
+++ b/data_manager/data_manager_vep_cache_download.py	Tue Mar 01 18:12:26 2022 +0000
@@ -18,9 +18,11 @@
 
     # Process parameters for metadata and file download
     url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
-    m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name'])
-    version = str(m.group(2))
-    cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default"
+    m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name'])
+    version = str(m.group(3))
+    cache_type = m.group(2) if m.group(2) else "default"
+    species = m.group(1).rstrip("_")
+    display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})"
 
     # Download and extract given cache archive, remove archive afterwards
     final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
@@ -32,20 +34,19 @@
     # Construct metadata for the new data table entry
     data_manager_dict = {
         'data_tables': {
-            'vep_versioned_caches': [
+            'vep_versioned_annotation_cache': [
                 {
                     'value': params['param_dict']['file_name'].strip(".tar.gz"),
                     'dbkey': params['param_dict']['dbkey'],
                     'version': version,
                     'cachetype': cache_type,
-                    'name': params['param_dict']['display_name'],
+                    'name': display_name,
+                    'species': species,
                     'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
                 }
             ]
         }
     }
-    
-    #assert 42 == 0, str(data_manager_dict)
 
     # Save metadata to out_file
     with open(sys.argv[1], 'w') as fh: