Mercurial > repos > sanbi-uwc > data_manager_novocraft_index_builder
changeset 0:6cddc1a6e282 draft
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit 3a0b36e3ee66f35f7e2d5e1220cd883a9d528fac
author | sanbi-uwc |
---|---|
date | Fri, 04 Mar 2016 08:23:09 -0500 |
parents | |
children | d9f0b710e097 |
files | README.md data_manager/novocraft_index_builder.py data_manager/novocraft_index_builder.xml data_manager_conf.xml tool-data/novocraft_index.nix.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 7 files changed, 174 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Fri Mar 04 08:23:09 2016 -0500 @@ -0,0 +1,2 @@ +# Data Manager Novocraft Index Builder +Data Manager to build Novo-Craft index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/novocraft_index_builder.py Fri Mar 04 08:23:09 2016 -0500 @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# Z. Mashologu (SANBI-UWC) +# import dict as dict +from __future__ import print_function +import os +import sys +import urllib2 +import logging +import argparse +import shlex +from subprocess import check_call, CalledProcessError + +log = logging.getLogger(__name__) + +from json import loads, dumps + +def get_dbkey_id_name(params, dbkey_description=None): + dbkey = params['param_dict']['dbkey'] + # TODO: ensure sequence_id is unique and does not already appear in location file + sequence_id = params['param_dict']['sequence_id'] + if not sequence_id: + sequence_id = dbkey # uuid.uuid4() generate and use an uuid instead? + + sequence_name = params['param_dict']['sequence_name'] + if not sequence_name: + sequence_name = dbkey_description + if not sequence_name: + sequence_name = dbkey + return dbkey, sequence_id, sequence_name + + +def _make_novocraft_index(fasta_filename, target_directory): + if os.path.exists(target_directory) and not os.path.isdir(target_directory): + print("Output directory path already exists but is not a directory: {}".format(target_directory), + file=sys.stderr) + elif not os.path.exists(target_directory): + os.mkdir(target_directory) + + if 'GALAXY_SLOTS' in os.environ: + nslots = os.environ['GALAXY_SLOTS'] + else: + nslots = 1 + + #cmdline_str = 'STAR --runMode genomeGenerate --genomeDir {} --genomeFastaFiles {} --runThreadN {}'.format( + # target_directory, + # fasta_filename, + # nslots) + #cmdline = shlex.split(cmdline_str) + cmdline = ('touch', '{}/foo.nix'.format(target_directory)) + try: + check_call(cmdline) + except CalledProcessError: + print("Error building RNA STAR index", file=sys.stderr) + return (target_directory) + + +def download_from_url(params, target_directory): + # TODO: we should automatically do decompression here + urls = filter(bool, map(lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split('\n'))) + fasta_reader = [urllib2.urlopen(url) for url in urls] + + _make_novocraft_index(fasta_reader, target_directory) + + +def download_from_history( params, target_directory): + # TODO: allow multiple FASTA input files + input_filename = params['param_dict']['reference_source']['input_fasta'] + + _make_novocraft_index(input_filename, target_directory) + +REFERENCE_SOURCE_TO_DOWNLOAD = dict(url=download_from_url, history=download_from_history) + +def main(): + parser = argparse.ArgumentParser(description="Generate Novo-align genome index and JSON describing this") + parser.add_argument('output_filename') + parser.add_argument('--dbkey_description') + parser.add_argument('--data_table_name', default='novocraft_index') + args = parser.parse_args() + + filename = args.output_filename + + params = loads(open(filename).read()) + target_directory = params['output_data'][0]['extra_files_path'] + os.makedirs(target_directory) + + dbkey, sequence_id, sequence_name = get_dbkey_id_name(params, dbkey_description=args.dbkey_description) + if dbkey in [None, '', '?']: + raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey)) + + # Fetch the FASTA + REFERENCE_SOURCE_TO_DOWNLOAD[params['param_dict']['reference_source']['reference_source_selector']]\ + (params, target_directory) + + data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=target_directory) + + output_datatable_dict = dict(data_tables={args.data_table_name: [data_table_entry]}) + open(filename, 'wb').write(dumps(output_datatable_dict)) + +if __name__ == "__main__": main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/novocraft_index_builder.xml Fri Mar 04 08:23:09 2016 -0500 @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="utf-8" ?> +<tool id="novoalign_index_builder" name="NOVO ALIGN index" tool_type="manage_data" version="0.0.1"> + <description>Build an index for use by the Novo Align mapping tool</description> + <stdio> + <exit_code range=":-1" /> + <exit_code range="1:" /> + </stdio> + <command interpreter="python"> + novoalign_index_builder.py "${out_file}" --dbkey_description ${ dbkey.get_display_text() } --data_table_name "novocraft_index" + </command> + <inputs> + <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" /> + <param type="text" name="sequence_name" value="" label="Name of sequence" /> + <param type="text" name="sequence_desc" value="" label="Description of sequence" /> + <param type="text" name="sequence_id" value="" label="ID for sequence" /> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> + <option value="url">URL</option> + <option value="history">History</option> + </param> + <when value="url"> + <param type="text" area="True" name="user_url" value="http://" label="URLs" optional="False" /> + </when> + <when value="history"> + <param name="input_fasta" type="data" format="fasta" label="FASTA File" multiple="False" optional="False" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history"/> + <output name="out_file" file="phiX174_as_anoGam1.data_manager_json"/> + </test> + </tests> + <help>Help!</help> + <citations> + <citation></citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Fri Mar 04 08:23:09 2016 -0500 @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="utf-8" ?> +<data_managers> + <data_manager tool_file="data_manager/novocraft_index_builder.xml" id="novocraft_index_builder" version="0.0.1"> + <data_table name="novocraft_index"> + <output> + <column name="value" /> + <column name="dbkey" /> + <column name="name" /> + <column name="path" output_ref="out_file"> + <move type="directory" relative_symlinks="True"> + <!-- no need to set source, gets taken from out_dir.extra_files_path --> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/novocraft_index/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/{$dbkey}/novocraft_index/${value}/${path}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/novocraft_index.nix.sample Fri Mar 04 08:23:09 2016 -0500 @@ -0,0 +1,2 @@ +#TODO: Document +#<unique_build_id> <dbkey> <display_name> <directory_path>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Mar 04 08:23:09 2016 -0500 @@ -0,0 +1,6 @@ +<tables> + <table name="novocraft_index" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/novocraft_index.nix" /> + </table> +</tables>