Mercurial > repos > sanbi-uwc > vcf2neo

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf2neo.xml	Mon Jun 19 00:08:18 2017 -0400
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<tool id="vcf2neo"
+    name="Import SnpEff produced VCF files to a Neo4j Graph database." version="1.0.0">
+    <description>Parses VCF files and SnpEff annotation and build a
+        Neo4j Graph database.</description>
+    <requirements>
+      <requirement type="package" version="4.0.0b2">py2neo</requirement>
+      <requirement type="package" version="0.0.6">vcf2neo</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        cp -r '${inputDb.extra_files_path}' '${outputDb.files_path}' &&
+        python $__tool_directory__/vcf2neo_wrapper.py
+          --vcf_dataset_names
+          #for $vcf_file in $vcf_inputs
+            '${vcf_file.element_identifier}'
+          #end for
+          --neo4j_db_path '${outputDb.files_path}'
+          --user '${__user_email__}'
+          --variantset_name '${vcf_inputs.name}'
+          --vcf_files
+          #for $vcf_file in $vcf_inputs:
+            '${vcf_file}'
+          #end for
+          && python $__tool_directory__/write_db_summary.py
+            '${outputDb.files_path}' '${outputDb.name}' >'${outputDb}'
+        ]]>
+    </command>
+    <inputs>
+        <param name="vcf_inputs" type="data_collection"
+          format="vcf" label="VCF files"
+          help="H37Rv TB variants in VCF format" />
+        <param name="inputDb" type="data" format="neostore"
+          label="Reference TB database (in Neo4j format)"
+          help="Reference TB database previously generated by tb2neo" />
+    </inputs>
+    <outputs>
+        <data format="neostore" name="outputDb"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help><![CDATA[
+      The vcf2neo_ tool adds variants (in VCF format) to a TB annotation
+      database (previously built using tb2neo_). The output is a Neo4j database
+      stored as a Galaxy neostore datatype that can be explored with the Neo4j
+      Interactive Environment or saved for use outside Galaxy.
+
+      .. _vcf2neo: https://github.com/sanbi-sa/vcf2neo
+      .. _tb2neo: https://github.com/sanbi-sa/tb2neo
+      ]]></help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf2neo_wrapper.py	Mon Jun 19 00:08:18 2017 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import argparse
+import os
+import shlex
+import subprocess
+import uuid
+import sys
+
+parser = argparse.ArgumentParser(
+    description="Call vcf2neo on VCF inputs from Galaxy")
+
+parser.add_argument('--vcf_dataset_names', nargs='+',
+                    help='Names of VCF datasets')
+parser.add_argument('--neo4j_db_path', help='Neo4j database directory')
+parser.add_argument('--user', help='Email of Galaxy user running this tool')
+parser.add_argument('--variantset_name',
+                    help='Name for the VariantSet containing all the variants')
+parser.add_argument('--vcf_files',
+                    help='VCF format variant file', nargs='+')
+
+args = parser.parse_args()
+
+os.mkdir(args.variantset_name)
+
+print("VCF names:", len(args.vcf_dataset_names), args.vcf_dataset_names, file=sys.stderr)
+print("VCF files:", len(args.vcf_files), args.vcf_files, file=sys.stderr)
+for i, vcf_file in enumerate(args.vcf_files):
+    print("XXXX I:", i, vcf_file, file=sys.stderr)
+    callset_name = args.vcf_dataset_names[i]
+    os.symlink(vcf_file, os.path.join(args.variantset_name,
+                                      callset_name) + '.vcf')
+# Usage: vcf2neo init [OPTIONS] VCF_DIR OWNER [HISTORY_ID] [OUTPUT_DIR]
+#
+#   Copy reference database and load VCF to Neo4j Graph database. :param
+#   vcf_dir: :param refdb_dir: :param d: :return:
+#
+# Options:
+#   -d / -D  Run Neo4j docker container.
+#   --help   Show this message and exit.
+
+history_id = str(uuid.uuid4())
+cmd_str = ('vcf2neo init -d ' +
+           '{input_vcf_dir} {email} {history_id} {neo4j_db_path}'.format(
+               input_vcf_dir=args.variantset_name,
+               email=args.user,
+               history_id=history_id,
+               neo4j_db_path=args.neo4j_db_path))
+cmd = shlex.split(cmd_str)
+subprocess.check_call(cmd)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/write_db_summary.py	Mon Jun 19 00:08:18 2017 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import argparse
+import os
+import os.path
+
+parser = argparse.ArgumentParser(
+    description="Write HTML summary from neostore datatype")
+parser.add_argument('basepath')
+parser.add_argument('label')
+
+args = parser.parse_args()
+
+output = """<html><head><title>Files for Composite Dataset ({})</title></head>
+    <p/>This composite dataset is composed of
+     the following files:<p/><ul>\n""".format(args.label)
+db_path = args.basepath + '/neo4jdb/databases/graph.db'
+for filename in os.listdir(db_path):
+    if filename.startswith('.'):
+        continue
+    path = db_path + '/' + filename
+    if os.path.isdir(path):
+        continue
+    output += "<li>{}</li>\n".format(filename)
+output += '</ul></html>\b'
+print(output)