Mercurial > repos > dfornika > data_manager_mash_sketch_builder
changeset 0:647eedfd1556 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit 20558d9de7b6dd375a8835698f231cbc0c83446d-dirty"
author | dfornika |
---|---|
date | Sat, 22 Feb 2020 00:30:35 +0000 |
parents | |
children | af86557c1b87 |
files | data_manager/mash_sketch_builder.py data_manager/mash_sketch_builder.xml data_manager_conf.xml test-data/mash_sketch_data_manager.json test-data/test_assembly.fasta tool-data/mash_sketches.loc.sample tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 210 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/mash_sketch_builder.py Sat Feb 22 00:30:35 2020 +0000 @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +import argparse +import errno +import json +import os +import subprocess +import sys + + +DATA_TABLE_NAME = "mash_sketches" + + +def mash_sketch(mash_sketch_args, sketch_name, target_directory, data_table_name=DATA_TABLE_NAME): + UUID = str(uuid.uuid4()) + + os.mkdir(os.path.join(target_directory, UUID)) + + sketch_path = os.path.join(UUID, "sketch"), + + args = [ + '--threads', str(kraken2_args["threads"]), + '-k', str(mash_sketch_args["kmer_size"]), + '-s', str(mash_sketch_args["sketch_size"]), + '-o', sketch_path + ] + + subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory) + + if kraken2_args["clean"]: + args = [ + '--threads', str(kraken2_args["threads"]), + '--clean', + '--db', database_path + ] + + subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) + + data_table_entry = { + 'data_tables': { + data_table_name: [ + { + "value": UUID, + "name": sketch_name, + "path": sketch_path, + } + ] + } + } + + return data_table_entry + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('data_manager_json') + parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length') + parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length') + parser.add_argument('--threads', dest='threads', default=1, help='threads') + parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch') + args = parser.parse_args() + + data_manager_input = json.loads(open(args.data_manager_json).read()) + + target_directory = data_manager_input['output_data'][0]['extra_files_path'] + + try: + os.mkdir( target_directory ) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): + pass + else: + raise + + data_manager_output = {} + + mash_sketch_args = { + "kmer_size": args.kmer_len, + "sketch_size": args.minimizer_len, + "threads": args.threads, + } + + open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True)) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/mash_sketch_builder.xml Sat Feb 22 00:30:35 2020 +0000 @@ -0,0 +1,93 @@ +<?xml version="1.0"?> +<tool id="mash_sketch_builder" name="Mash Sketch" tool_type="manage_data" version="2.1+galaxy0" profile="18.09"> + <macros> + <xml name="common_params"> + <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" /> + <param name="minimizer_len" type="integer" value="31" label="Minimizer length" /> + <param name="minimizer_spaces" type="integer" value="6" label="Minimizer spaces" /> + <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" /> + </xml> + </macros> + <description>database builder</description> + <requirements> + <requirement type="package" version="2.1">mash</requirement> + <requirement type="package" version="3.7">python</requirement> + </requirements> + <version_command>mash --version</version_command> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/mash_sketch_builder.py' + '${out_file}' + -s '${sketch_size}' + -k '${kmer_size}' + #if str ( $reads_assembly.reads_assembly_selector ) == "reads" + -m '${reads_assembly.minimum_kmer_copies}' + -r + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" + '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2' + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection" + '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse' + #end if + #if str( $reads_assembly.reads_input.reads_input_selector ) == "single" + '$reads_assembly.reads_input.reads' + #end if + #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly" + -p \${GALAXY_SLOTS:-1} + '${assembly}' + ${reads_assembly.individual_sequences} + #end if + -o 'sketch' + ]]> + </command> + <inputs> + <conditional name="reads_assembly"> + <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies"> + <option selected="True" value="reads">Reads</option> + <option value="assembly">Assembly</option> + </param> + <when value="reads"> + <conditional name="reads_input"> + <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> + <option value="paired">Paired</option> + <option value="single">Single</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="paired"> + <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/> + <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/> + </when> + <when value="single"> + <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/> + </when> + <when value="paired_collection"> + <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> + </when> + </conditional> + <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/> + </when> + <when value="assembly"> + <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/> + <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/> + </when> + </conditional> + <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" /> + <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" /> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" /> + </outputs> + <tests> + <test> + <param name="reads_assembly_selector" value="assembly" /> + <param name="assembly" value="test_assembly.fasta"/> + <param name="sketch_name" value="Test Sketch" /> + <output name="out_file" value="mash_sketch_data_manager.json" /> + </test> + </tests> + <help> + </help> + <citations> + <citation type="doi">10.1186/s13059-016-0997-x</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Sat Feb 22 00:30:35 2020 +0000 @@ -0,0 +1,18 @@ +<data_managers> + <data_manager tool_file="data_manager/mash_sketch_builder.xml" id="mash_sketch_builder" version="2.1+galaxy0"> + <data_table name="mash_sketches"> + <output> + <column name="value"/> + <column name="name"/> + <column name="path" output_ref="out_file"> + <move type="directory"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">mash_sketches/${path}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/mash_sketches/${path}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mash_sketch_data_manager.json Sat Feb 22 00:30:35 2020 +0000 @@ -0,0 +1,1 @@ +{"data_tables": {"mash_sketches": [{"name": "sketch", "path": "sketch", "value": "sketch"}]}}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_assembly.fasta Sat Feb 22 00:30:35 2020 +0000 @@ -0,0 +1,3 @@ +>test +GCATGTCGATCTGTGTGCTAGTCGTAGTCGATCGATCTGATCGATCTGTCAGTCAGTAGT +CTCAGCGATGCATTATTATATTATATTATCGATCGATGCTGATCGATTATATTCGATCTG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sat Feb 22 00:30:35 2020 +0000 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of Mash sketches in the required format --> + <table name="mash_sketches" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/mash_sketches.loc" /> + </table> +</tables>