Mercurial > repos > sh477 > data_manager_vep_cache_downloader

--- a/.shed.yml	Wed Feb 23 12:26:11 2022 +0000
+++ b/.shed.yml	Mon Feb 28 09:17:29 2022 +0000
@@ -1,11 +1,11 @@
-categories:
-- Data Managers
-description: Download and install annotation cache files for Ensembl VEP
-long_description: |
-    This tool downloads given versions of VEP cache annotation files and makes
-    them available to Ensembl VEP in Galaxy via the "vep_versioned_caches" data
-    table.
-name: data_manager_vep_cache_downloader
-owner: sh477
-remote_repository_url:
+categories:
+- Data Managers
+description: Download and install annotation cache files for Ensembl VEP
+long_description: |
+    This tool downloads given versions of VEP cache annotation files and makes
+    them available to Ensembl VEP in Galaxy via the "vep_versioned_caches" data
+    table.
+name: data_manager_vep_cache_downloader
+owner: sh477
+remote_repository_url:
 type: unrestricted
\ No newline at end of file
--- a/data_manager/data_manager_vep_cache_download.py	Wed Feb 23 12:26:11 2022 +0000
+++ b/data_manager/data_manager_vep_cache_download.py	Mon Feb 28 09:17:29 2022 +0000
@@ -1,56 +1,56 @@
-#!/usr/bin/env python
-
-import datetime
-import json
-import os
-import re
-from urllib.request import urlretrieve
-import sys
-import tarfile
-
-
-def main():
-	# Read in given out_file and create target directory for file download
-    with open(sys.argv[1]) as fh:
-        params = json.load(fh)
-    target_directory = params['output_data'][0]['extra_files_path']
-    os.mkdir(target_directory)
-
-	# Process parameters for metadata and file download
-    url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
-    m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name'])
-    version = str(m.group(2))
-    cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default"
-
-	# Download and extract given cache archive, remove archive afterwards
-    final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
-    tar = tarfile.open(final_file, "r:gz")
-    tar.extractall(target_directory)
-    tar.close()
-    os.remove(final_file)
-
-    # Construct metadata for the new data table entry
-    data_manager_dict = {
-        'data_tables': {
-            'vep_versioned_caches': [
-                {
-                    'value': params['param_dict']['file_name'].strip(".tar.gz"),
-                    'dbkey': params['param_dict']['dbkey'],
-                    'version': version,
-                    'cachetype': cache_type,
-                    'name': params['param_dict']['display_name'],
-                    'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
-                }
-            ]
-        }
-    }
-
-    #assert 42 == 0, str(data_manager_dict)
-
-    # Save metadata to out_file
-    with open(sys.argv[1], 'w') as fh:
-        json.dump(data_manager_dict, fh, sort_keys=True)
-
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+
+import datetime
+import json
+import os
+import re
+from urllib.request import urlretrieve
+import sys
+import tarfile
+
+
+def main():
+    # Read in given out_file and create target directory for file download
+    with open(sys.argv[1]) as fh:
+        params = json.load(fh)
+    target_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(target_directory)
+
+    # Process parameters for metadata and file download
+    url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
+    m = re.search(r"_([^_]*?)_vep_(\d+?)_", params['param_dict']['file_name'])
+    version = str(m.group(2))
+    cache_type = m.group(1) if m.group(1) == "merged" or m.group(1) == "refseq" else "default"
+
+    # Download and extract given cache archive, remove archive afterwards
+    final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
+    tar = tarfile.open(final_file, "r:gz")
+    tar.extractall(target_directory)
+    tar.close()
+    os.remove(final_file)
+
+    # Construct metadata for the new data table entry
+    data_manager_dict = {
+        'data_tables': {
+            'vep_versioned_caches': [
+                {
+                    'value': params['param_dict']['file_name'].strip(".tar.gz"),
+                    'dbkey': params['param_dict']['dbkey'],
+                    'version': version,
+                    'cachetype': cache_type,
+                    'name': params['param_dict']['display_name'],
+                    'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
+                }
+            ]
+        }
+    }
+
+    #assert 42 == 0, str(data_manager_dict)
+
+    # Save metadata to out_file
+    with open(sys.argv[1], 'w') as fh:
+        json.dump(data_manager_dict, fh, sort_keys=True)
+
+
+if __name__ == "__main__":
+    main()
--- a/data_manager/data_manager_vep_cache_download.xml	Wed Feb 23 12:26:11 2022 +0000
+++ b/data_manager/data_manager_vep_cache_download.xml	Mon Feb 28 09:17:29 2022 +0000
@@ -1,42 +1,42 @@
-<tool id="data_manager_vep_cache_download" name="Download and install VEP cache" version="0.1" tool_type="manage_data">
-    <description>ToDo:the cache files required by VEP</description>
-    <requirements>
-        <requirement type="package" version="3.9">python</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-        python '$__tool_directory__/data_manager_vep_cache_download.py' '$out_file'
-    </command>
-    <inputs>
-        <param name="dbkey" type="genomebuild"
-			label="DBKEY of genome that the VEP cache data is for"
-			help="" />
-        <param name="url" type="text" value="http://ftp.ensembl.org/pub/release-105/variation/indexed_vep_cache/"
-            label="FTP root url for VEP cache files" help=""/>
-		<param name="file_name" type="text" label="File name of cache file to be downloaded from root url." help="E.g. homo_sapiens_vep_105_GRCh38.tar.gz"/>
-		<param name="display_name" type="text" label="Display name used in data-selection dropdowns." help="E.g. Homo sapiens hg38 (V105)"/>
-    </inputs>
-    <outputs>
-        <data name="out_file" format="data_manager_json"/>
-    </outputs>
-    <tests>
-	<test>
-		<param name="dbkey" value="ce11"/>
-		<param name="url" value="http://ftp.ensembl.org/pub/release-105/variation/indexed_vep_cache/"/>
-		<param name="file_name" value="caenorhabditis_elegans_vep_105_WBcel235.tar.gz"/>
-		<param name="display_name" value="C. elegans ce11 (V105)"/>
-		<output name="out_file" file="from_test-meta.data_manager.json"/>
-        </test>
-    </tests>
-    <help>
-This tool downloads given versions of VEP cache annotation files and makes them available to Ensembl VEP in Galaxy via the
-"vep_versioned_caches" data table. You should use the indexed version of the cache files and it is strongly recommended to
-use the cache files which version number matches the VEP version number. Note that for most genomes there are three versions
-of cache data available: default, refseq and merged (combining the former two). Choose the one suitable for your usage.
-
-A general introduction to the VEP cache and download links can be found on the official website:
-https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html
-    </help>
-    <citations>
-        <citation type="doi">10.1186/s13059-016-0974-4</citation>
-    </citations>
-</tool>
+<tool id="data_manager_vep_cache_download" name="Download and install VEP cache" version="0.1" tool_type="manage_data">
+    <description>versioned annotation files for VEP</description>
+    <requirements>
+        <requirement type="package" version="3.9">python</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+        python '$__tool_directory__/data_manager_vep_cache_download.py' '$out_file'
+    </command>
+    <inputs>
+        <param name="dbkey" type="genomebuild"
+            label="DBKEY of genome that the VEP cache data is for"
+            help="" />
+        <param name="url" type="text" value="http://ftp.ensembl.org/pub/release-105/variation/indexed_vep_cache/"
+            label="FTP root url for VEP cache files" help=""/>
+        <param name="file_name" type="text" label="File name of cache file to be downloaded from root url." help="E.g. homo_sapiens_vep_105_GRCh38.tar.gz"/>
+        <param name="display_name" type="text" label="Display name used in data-selection dropdowns." help="E.g. Homo sapiens hg38 (V105)"/>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+    <test>
+        <param name="dbkey" value="ce11"/>
+        <param name="url" value="http://ftp.ensembl.org/pub/release-105/variation/indexed_vep_cache/"/>
+        <param name="file_name" value="caenorhabditis_elegans_vep_105_WBcel235.tar.gz"/>
+        <param name="display_name" value="C. elegans ce11 (V105)"/>
+        <output name="out_file" file="from_test-meta.data_manager.json"/>
+        </test>
+    </tests>
+    <help>
+This tool downloads given versions of VEP cache annotation files and makes them available to Ensembl VEP in Galaxy via the
+"vep_versioned_caches" data table. You should use the indexed version of the cache files and it is strongly recommended to
+use the cache files which version number matches the VEP version number. Note that for most genomes there are three versions
+of cache data available: default, refseq and merged (combining the former two). Choose the one suitable for your usage.
+
+A general introduction to the VEP cache and download links can be found on the official website:
+https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html
+    </help>
+    <citations>
+        <citation type="doi">10.1186/s13059-016-0974-4</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- a/data_manager_conf.xml	Wed Feb 23 12:26:11 2022 +0000
+++ b/data_manager_conf.xml	Mon Feb 28 09:17:29 2022 +0000
@@ -1,21 +1,21 @@
-<?xml version="1.0"?>
-<data_managers>
-    <data_manager tool_file="data_manager/data_manager_vep_cache_download.xml" id="data_manager_vep_cache_download" >
-        <data_table name="vep_versioned_caches">  <!-- Defines a Data Table to be modified. -->
-            <output> <!-- Handle the output of the Data Manager Tool -->
-                <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-                <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-                <column name="version" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-                <column name="cachetype" />  <!-- columns that are going to be specified by the Data Manager Tool -->
-				<column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
-                <column name="path" output_ref="out_file" >
-                    <move type="directory" relativize_symlinks="True">
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">vep/${version}/${dbkey}/${cachetype}</target>
-                    </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/vep/${version}/${dbkey}/${cachetype}/</value_translation>
-                    <value_translation type="function">abspath</value_translation>
-                </column>
-            </output>
-        </data_table>
-    </data_manager>
-</data_managers>
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_vep_cache_download.xml" id="data_manager_vep_cache_download" >
+        <data_table name="vep_versioned_caches">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="version" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="cachetype" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="path" output_ref="out_file" >
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">vep/${version}/${dbkey}/${cachetype}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/vep/${version}/${dbkey}/${cachetype}/</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
\ No newline at end of file
--- a/data_manager_conf.xml.sample	Wed Feb 23 12:26:11 2022 +0000
+++ b/data_manager_conf.xml.sample	Mon Feb 28 09:17:29 2022 +0000
@@ -1,12 +1,12 @@
-<tables>
-    <!-- Table of installed versioned vep cache data -->
-    <table name="vep_versioned_caches" comment_char="#">
-        <columns>value, dbkey, version, cachetype, name, path</columns>
-        <file path="tool-data/vep_versioned_caches.loc" />
-    </table>
-    <!-- Locations of dbkeys and len files under genome directory -->
-    <table name="__dbkeys__" comment_char="#">
-        <columns>value, name, len_path</columns>
-        <file path="tool-data/dbkeys.loc" />
-    </table>
-</tables>
+<tables>
+    <!-- Table of installed versioned vep cache data -->
+    <table name="vep_versioned_caches" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, path</columns>
+        <file path="tool-data/vep_versioned_caches.loc" />
+    </table>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="tool-data/dbkeys.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- a/data_manager_conf.xml.test	Wed Feb 23 12:26:11 2022 +0000
+++ b/data_manager_conf.xml.test	Mon Feb 28 09:17:29 2022 +0000
@@ -1,12 +1,12 @@
-<tables>
-    <!-- Table of installed versioned vep cache data -->
-    <table name="vep_versioned_caches" comment_char="#">
-        <columns>value, dbkey, version, cachetype, name, path</columns>
-        <file path="${__HERE__}/test-data/vep_versioned_caches.loc" />
-    </table>
-    <!-- Locations of dbkeys and len files under genome directory -->
-    <table name="__dbkeys__" comment_char="#">
-        <columns>value, name, len_path</columns>
-        <file path="${__HERE__}/test-data/dbkeys.loc" />
-    </table>
-</tables>
+<tables>
+    <!-- Table of installed versioned vep cache data -->
+    <table name="vep_versioned_caches" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, path</columns>
+        <file path="${__HERE__}/test-data/vep_versioned_caches.loc" />
+    </table>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="${__HERE__}/test-data/dbkeys.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- a/test-data/dbkeys.loc	Wed Feb 23 12:26:11 2022 +0000
+++ b/test-data/dbkeys.loc	Mon Feb 28 09:17:29 2022 +0000
@@ -1,3 +1,3 @@
 #<dbkey>		<display_name>	<len_file_path>
 hg38			Human hg38		a_path
-ce11			C. elegans ce11	a_path
+ce11			C. elegans ce11	a_path
\ No newline at end of file
--- a/test-data/vep_versioned_caches.loc	Wed Feb 23 12:26:11 2022 +0000
+++ b/test-data/vep_versioned_caches.loc	Mon Feb 28 09:17:29 2022 +0000
@@ -1,2 +1,2 @@
-#<value>									<dbkey>			<version>		<cachetype>		<name>								<path>
-#
+#<value>									<dbkey>			<version>		<cachetype>		<name>								<path>
+#
--- a/tool-data/dbkeys.loc.sample	Wed Feb 23 12:26:11 2022 +0000
+++ b/tool-data/dbkeys.loc.sample	Mon Feb 28 09:17:29 2022 +0000
@@ -1,1 +1,1 @@
-#<dbkey>		<display_name>	<len_file_path>
+#<dbkey>		<display_name>	<len_file_path>
\ No newline at end of file