Mercurial > repos > sanbi-uwc > data_manager_novocraft_index_builder

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Fri Mar 04 08:23:09 2016 -0500
@@ -0,0 +1,2 @@
+# Data Manager Novocraft Index Builder
+Data Manager to build Novo-Craft index
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/novocraft_index_builder.py	Fri Mar 04 08:23:09 2016 -0500
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# Z. Mashologu (SANBI-UWC)
+# import dict as dict
+from __future__ import print_function
+import os
+import sys
+import urllib2
+import logging
+import argparse
+import shlex
+from subprocess import check_call, CalledProcessError
+
+log = logging.getLogger(__name__)
+
+from json import loads, dumps
+
+def get_dbkey_id_name(params, dbkey_description=None):
+    dbkey = params['param_dict']['dbkey']
+    # TODO: ensure sequence_id is unique and does not already appear in location file
+    sequence_id = params['param_dict']['sequence_id']
+    if not sequence_id:
+        sequence_id = dbkey  # uuid.uuid4() generate and use an uuid instead?
+
+    sequence_name = params['param_dict']['sequence_name']
+    if not sequence_name:
+        sequence_name = dbkey_description
+        if not sequence_name:
+            sequence_name = dbkey
+    return dbkey, sequence_id, sequence_name
+
+
+def _make_novocraft_index(fasta_filename, target_directory):
+    if os.path.exists(target_directory) and not os.path.isdir(target_directory):
+        print("Output directory path already exists but is not a directory: {}".format(target_directory),
+              file=sys.stderr)
+    elif not os.path.exists(target_directory):
+        os.mkdir(target_directory)
+
+    if 'GALAXY_SLOTS' in os.environ:
+        nslots = os.environ['GALAXY_SLOTS']
+    else:
+        nslots = 1
+
+    #cmdline_str = 'STAR --runMode genomeGenerate --genomeDir {} --genomeFastaFiles {} --runThreadN {}'.format(
+    #    target_directory,
+    #    fasta_filename,
+    #    nslots)
+    #cmdline = shlex.split(cmdline_str)
+    cmdline = ('touch', '{}/foo.nix'.format(target_directory))
+    try:
+        check_call(cmdline)
+    except CalledProcessError:
+        print("Error building RNA STAR index", file=sys.stderr)
+    return (target_directory)
+
+
+def download_from_url(params, target_directory):
+    # TODO: we should automatically do decompression here
+    urls = filter(bool, map(lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split('\n')))
+    fasta_reader = [urllib2.urlopen(url) for url in urls]
+
+    _make_novocraft_index(fasta_reader, target_directory)
+
+
+def download_from_history( params, target_directory):
+    # TODO: allow multiple FASTA input files
+    input_filename = params['param_dict']['reference_source']['input_fasta']
+
+    _make_novocraft_index(input_filename, target_directory)
+
+REFERENCE_SOURCE_TO_DOWNLOAD = dict(url=download_from_url, history=download_from_history)
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate Novo-align genome index and JSON describing this")
+    parser.add_argument('output_filename')
+    parser.add_argument('--dbkey_description')
+    parser.add_argument('--data_table_name', default='novocraft_index')
+    args = parser.parse_args()
+
+    filename = args.output_filename
+
+    params = loads(open(filename).read())
+    target_directory = params['output_data'][0]['extra_files_path']
+    os.makedirs(target_directory)
+
+    dbkey, sequence_id, sequence_name = get_dbkey_id_name(params, dbkey_description=args.dbkey_description)
+    if dbkey in [None, '', '?']:
+        raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey))
+
+    # Fetch the FASTA
+    REFERENCE_SOURCE_TO_DOWNLOAD[params['param_dict']['reference_source']['reference_source_selector']]\
+        (params, target_directory)
+
+    data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=target_directory)
+
+    output_datatable_dict = dict(data_tables={args.data_table_name: [data_table_entry]})
+    open(filename, 'wb').write(dumps(output_datatable_dict))
+
+if __name__ == "__main__": main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/novocraft_index_builder.xml	Fri Mar 04 08:23:09 2016 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<tool id="novoalign_index_builder" name="NOVO ALIGN index" tool_type="manage_data" version="0.0.1">
+    <description>Build an index for use by the Novo Align mapping tool</description>
+    <stdio>
+        <exit_code range=":-1" />
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="python">
+        novoalign_index_builder.py "${out_file}" --dbkey_description ${ dbkey.get_display_text() } --data_table_name "novocraft_index"
+    </command>
+    <inputs>
+        <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" />
+        <param type="text" name="sequence_name" value="" label="Name of sequence" />
+        <param type="text" name="sequence_desc" value="" label="Description of sequence" />
+        <param type="text" name="sequence_id" value="" label="ID for sequence" />
+        <conditional name="reference_source">
+          <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
+            <option value="url">URL</option>
+            <option value="history">History</option>
+          </param>
+          <when value="url">
+            <param type="text" area="True" name="user_url" value="http://" label="URLs" optional="False" />
+          </when>
+          <when value="history">
+            <param name="input_fasta" type="data" format="fasta" label="FASTA File" multiple="False" optional="False" />
+          </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference_source_selector" value="history"/>
+            <output name="out_file" file="phiX174_as_anoGam1.data_manager_json"/>
+        </test>
+    </tests>
+    <help>Help!</help>
+    <citations>
+        <citation></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Fri Mar 04 08:23:09 2016 -0500
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<data_managers>
+    <data_manager tool_file="data_manager/novocraft_index_builder.xml" id="novocraft_index_builder" version="0.0.1">
+        <data_table name="novocraft_index">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" output_ref="out_file">
+                    <move type="directory" relative_symlinks="True">
+                        <!-- no need to set source, gets taken from out_dir.extra_files_path -->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/novocraft_index/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/{$dbkey}/novocraft_index/${value}/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/novocraft_index.nix.sample	Fri Mar 04 08:23:09 2016 -0500
@@ -0,0 +1,2 @@
+#TODO: Document
+#<unique_build_id>  <dbkey> <display_name>  <directory_path>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Mar 04 08:23:09 2016 -0500
@@ -0,0 +1,6 @@
+<tables>
+    <table name="novocraft_index" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/novocraft_index.nix" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Mar 04 08:23:09 2016 -0500
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<tool_dependency>
+</tool_dependency>