Mercurial > repos > sanbi-uwc > vcf2neo
changeset 0:3e14eda348d3 draft default tip
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 0245b4efed8ced6d06d76f1249a47d2178285385
author | sanbi-uwc |
---|---|
date | Mon, 19 Jun 2017 00:08:18 -0400 |
parents | |
children | |
files | vcf2neo.xml vcf2neo_wrapper.py write_db_summary.py |
diffstat | 3 files changed, 132 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcf2neo.xml Mon Jun 19 00:08:18 2017 -0400 @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="utf-8" ?> +<tool id="vcf2neo" + name="Import SnpEff produced VCF files to a Neo4j Graph database." version="1.0.0"> + <description>Parses VCF files and SnpEff annotation and build a + Neo4j Graph database.</description> + <requirements> + <requirement type="package" version="4.0.0b2">py2neo</requirement> + <requirement type="package" version="0.0.6">vcf2neo</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + cp -r '${inputDb.extra_files_path}' '${outputDb.files_path}' && + python $__tool_directory__/vcf2neo_wrapper.py + --vcf_dataset_names + #for $vcf_file in $vcf_inputs + '${vcf_file.element_identifier}' + #end for + --neo4j_db_path '${outputDb.files_path}' + --user '${__user_email__}' + --variantset_name '${vcf_inputs.name}' + --vcf_files + #for $vcf_file in $vcf_inputs: + '${vcf_file}' + #end for + && python $__tool_directory__/write_db_summary.py + '${outputDb.files_path}' '${outputDb.name}' >'${outputDb}' + ]]> + </command> + <inputs> + <param name="vcf_inputs" type="data_collection" + format="vcf" label="VCF files" + help="H37Rv TB variants in VCF format" /> + <param name="inputDb" type="data" format="neostore" + label="Reference TB database (in Neo4j format)" + help="Reference TB database previously generated by tb2neo" /> + </inputs> + <outputs> + <data format="neostore" name="outputDb"/> + </outputs> + <tests> + <test> + </test> + </tests> + <help><![CDATA[ + The vcf2neo_ tool adds variants (in VCF format) to a TB annotation + database (previously built using tb2neo_). The output is a Neo4j database + stored as a Galaxy neostore datatype that can be explored with the Neo4j + Interactive Environment or saved for use outside Galaxy. + + .. _vcf2neo: https://github.com/sanbi-sa/vcf2neo + .. _tb2neo: https://github.com/sanbi-sa/tb2neo + ]]></help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcf2neo_wrapper.py Mon Jun 19 00:08:18 2017 -0400 @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +from __future__ import print_function +import argparse +import os +import shlex +import subprocess +import uuid +import sys + +parser = argparse.ArgumentParser( + description="Call vcf2neo on VCF inputs from Galaxy") + +parser.add_argument('--vcf_dataset_names', nargs='+', + help='Names of VCF datasets') +parser.add_argument('--neo4j_db_path', help='Neo4j database directory') +parser.add_argument('--user', help='Email of Galaxy user running this tool') +parser.add_argument('--variantset_name', + help='Name for the VariantSet containing all the variants') +parser.add_argument('--vcf_files', + help='VCF format variant file', nargs='+') + +args = parser.parse_args() + +os.mkdir(args.variantset_name) + +print("VCF names:", len(args.vcf_dataset_names), args.vcf_dataset_names, file=sys.stderr) +print("VCF files:", len(args.vcf_files), args.vcf_files, file=sys.stderr) +for i, vcf_file in enumerate(args.vcf_files): + print("XXXX I:", i, vcf_file, file=sys.stderr) + callset_name = args.vcf_dataset_names[i] + os.symlink(vcf_file, os.path.join(args.variantset_name, + callset_name) + '.vcf') +# Usage: vcf2neo init [OPTIONS] VCF_DIR OWNER [HISTORY_ID] [OUTPUT_DIR] +# +# Copy reference database and load VCF to Neo4j Graph database. :param +# vcf_dir: :param refdb_dir: :param d: :return: +# +# Options: +# -d / -D Run Neo4j docker container. +# --help Show this message and exit. + +history_id = str(uuid.uuid4()) +cmd_str = ('vcf2neo init -d ' + + '{input_vcf_dir} {email} {history_id} {neo4j_db_path}'.format( + input_vcf_dir=args.variantset_name, + email=args.user, + history_id=history_id, + neo4j_db_path=args.neo4j_db_path)) +cmd = shlex.split(cmd_str) +subprocess.check_call(cmd)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/write_db_summary.py Mon Jun 19 00:08:18 2017 -0400 @@ -0,0 +1,27 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import os +import os.path + +parser = argparse.ArgumentParser( + description="Write HTML summary from neostore datatype") +parser.add_argument('basepath') +parser.add_argument('label') + +args = parser.parse_args() + +output = """<html><head><title>Files for Composite Dataset ({})</title></head> + <p/>This composite dataset is composed of + the following files:<p/><ul>\n""".format(args.label) +db_path = args.basepath + '/neo4jdb/databases/graph.db' +for filename in os.listdir(db_path): + if filename.startswith('.'): + continue + path = db_path + '/' + filename + if os.path.isdir(path): + continue + output += "<li>{}</li>\n".format(filename) +output += '</ul></html>\b' +print(output)