Mercurial > repos > sanbi-uwc > data_manager_fetch_artic_primers

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/fetch_artic_primers.py	Thu Apr 16 14:06:59 2020 +0000
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+from __future__ import print_function, division
+
+import argparse
+import json
+import os
+import os.path
+import sys
+
+import requests
+
+DATA_TABLE_NAME = 'artic_primers'
+def fetch_artic_primers(output_filename, output_directory, primers):
+    primer_sets = {
+        'ARTICv1': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V1/nCoV-2019.bed',
+        'ARTICv2': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V2/nCoV-2019.bed',
+        'ARTICv3': 'https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed'
+        }
+
+    if not os.path.isdir(output_directory):
+        os.makedirs(output_directory)
+    data_manager_dict = json.load(open(output_filename))
+    data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+    data_manager_dict['data_tables'][DATA_TABLE_NAME] = data_manager_dict['data_tables'].get(DATA_TABLE_NAME, [])
+
+    data = []
+    for name, url in primer_sets.items():
+        response = requests.get(url)
+        if response.status_code != 200:
+            print('Error: download of', url, 'failed with code', response.status_code, file=sys.stderr)
+            exit(response.status_code)
+        bed_output_filename = os.path.join(output_directory, name + '.bed')
+        open(bed_output_filename, 'w').write(response.text)
+        description = name[:-2] + ' ' + name[-2:] + ' primer set'
+        data.append(dict(value=name, path=bed_output_filename, description=description))
+    data_manager_dict['data_tables'][DATA_TABLE_NAME].extend(data)
+    print(data_manager_dict)
+    json.dump(data_manager_dict, open(output_filename, 'w'))
+
+class SplitArgs(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values.split(','))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Fetch ARTIC primer files for Galaxy use')
+    parser.add_argument('--output_directory', default='tmp', help='Directory to write output to')
+    parser.add_argument('--galaxy_datamanager_filename', help='Galaxy JSON format file describing data manager inputs')
+    parser.add_argument('--primers', default='ARTCIv1,ARTICv2,ARTICv3', action=SplitArgs, help='Comma separated list of primers to fetch')
+    args = parser.parse_args()
+    fetch_artic_primers(args.galaxy_datamanager_filename, args.output_directory, args.primers)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/fetch_artic_primers.xml	Thu Apr 16 14:06:59 2020 +0000
@@ -0,0 +1,31 @@
+<tool id="fetch_artic_primers" name="ARTIC primer data manager" version="0.0.1" tool_type="manage_data" profile="19.05">
+    <requirements>
+        <requirement type="package">python</requirement>
+        <requirement type="package" version="2.22.0">requests</requirement>
+    </requirements>
+    <!-- fetch all the primers in one go -->
+    <command detect_errors="exit_code">
+    python '$__tool_directory__/fetch_artic_primers.py'
+        --galaxy_datamanager_filename '${output_file}'
+    </command>
+    <inputs>
+        <param name="primers" type="select" multiple="true" label="SARS-CoV-2 Primers to fetch">
+            <option value="ARTICv1" selected="true">ARTIC v1</option>
+            <option value="ARTICv2" selected="true">ARTIC v2</option>
+            <option value="ARTICv3" selected="true">ARTIC v3</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="primers" value="ARTICv1,ARTICv2,ARTICv3"/>
+            <output name="output_file">
+                <assert_contents>
+                    <has_text text="ARTIC"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Thu Apr 16 14:06:59 2020 +0000
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/fetch_artic_primers.xml" id="fetch_artic_primers">
+        <data_table name="artic_primers">
+            <output>
+                <column name="value" />
+                <column name="description" />
+                <column name="path" output_ref="output_file" >
+                    <move type="file">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">artic_primers/#echo str($name).bed#</target>
+                    </move>
+                    <value_translation>artic_primers/#echo str($name).bed#</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/artic.json	Thu Apr 16 14:06:59 2020 +0000
@@ -0,0 +1,21 @@
+{
+    "data_tables": {
+        "artic_primers": [
+            {
+                "value": "ARTICv1",
+                "description": "ARTIC v1 primer set",
+                "path": "tmp/ARTICv1.bed"
+            },
+            {
+                "value": "ARTICv2",
+                "description": "ARTIC v2 primer set",
+                "path": "tmp/ARTICv2.bed"
+            },
+            {
+                "value": "ARTICv3",
+                "description": "ARTIC v3 primer set",
+                "path": "tmp/ARTICv3.bed"
+            }
+        ]
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Thu Apr 16 14:06:59 2020 +0000
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/artic_primers.loc.sample	Thu Apr 16 14:06:59 2020 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of ARTIC primers for use in SARS-CoV-2 sequencing
+#
+# the columns are:
+# value  description path
+#
+# for example
+# ARTICv1   ARTIC v1 primers    /data/galaxy/tool_data/artic_primers/ARTICv1.bed
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Apr 16 14:06:59 2020 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="artic_primers" comment_char="#">
+        <columns>value, description, path</columns>
+        <file path="tool-data/artic_primers.loc" />
+    </table>
+</tables>
\ No newline at end of file