# HG changeset patch
# User sh477
# Date 1646158346 0
# Node ID 7890790d2afdef8fa5ef1ab21db069d27ec3f075
# Parent 3bd006fa2be204ab37cf9a1feb3ec4a2de32fc9b
Fully working now and improved several things
diff -r 3bd006fa2be2 -r 7890790d2afd data_manager/data_manager_vep_cache_download.py
--- a/data_manager/data_manager_vep_cache_download.py Mon Feb 28 14:42:50 2022 +0000
+++ b/data_manager/data_manager_vep_cache_download.py Tue Mar 01 18:12:26 2022 +0000
@@ -18,9 +18,11 @@
# Process parameters for metadata and file download
url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
- m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name'])
- version = str(m.group(2))
- cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default"
+ m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name'])
+ version = str(m.group(3))
+ cache_type = m.group(2) if m.group(2) else "default"
+ species = m.group(1).rstrip("_")
+ display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})"
# Download and extract given cache archive, remove archive afterwards
final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
@@ -32,20 +34,19 @@
# Construct metadata for the new data table entry
data_manager_dict = {
'data_tables': {
- 'vep_versioned_caches': [
+ 'vep_versioned_annotation_cache': [
{
'value': params['param_dict']['file_name'].strip(".tar.gz"),
'dbkey': params['param_dict']['dbkey'],
'version': version,
'cachetype': cache_type,
- 'name': params['param_dict']['display_name'],
+ 'name': display_name,
+ 'species': species,
'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
}
]
}
}
-
- #assert 42 == 0, str(data_manager_dict)
# Save metadata to out_file
with open(sys.argv[1], 'w') as fh:
diff -r 3bd006fa2be2 -r 7890790d2afd data_manager/data_manager_vep_cache_download.xml
--- a/data_manager/data_manager_vep_cache_download.xml Mon Feb 28 14:42:50 2022 +0000
+++ b/data_manager/data_manager_vep_cache_download.xml Tue Mar 01 18:12:26 2022 +0000
@@ -11,27 +11,26 @@
label="DBKEY of genome that the VEP cache data is for"
help="" />
-
-
+ label="FTP root url for VEP cache files" help="Release number should be equal to desired VEP version"/>
+
-
+
-
-
+
This tool downloads given versions of VEP cache annotation files and makes them available to Ensembl VEP in Galaxy via the
-"vep_versioned_caches" data table. You should use the indexed version of the cache files and it is strongly recommended to
-use the cache files which version number matches the VEP version number. Note that for most genomes there are three versions
-of cache data available: default, refseq and merged (combining the former two). Choose the one suitable for your usage.
+"vep_versioned_annotation_cache" data table. You should use the indexed version of the cache files and it is strongly
+recommended to use the cache files which version number matches the VEP version number. Note that for most genomes there
+are three versions of cache data available: default, refseq and merged (combining the former two). Choose the one suitable
+for your usage.
A general introduction to the VEP cache and download links can be found on the official website:
https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html
diff -r 3bd006fa2be2 -r 7890790d2afd data_manager_conf.xml
--- a/data_manager_conf.xml Mon Feb 28 14:42:50 2022 +0000
+++ b/data_manager_conf.xml Tue Mar 01 18:12:26 2022 +0000
@@ -1,13 +1,14 @@
-
+