changeset 0:647eedfd1556 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit 20558d9de7b6dd375a8835698f231cbc0c83446d-dirty"
author dfornika
date Sat, 22 Feb 2020 00:30:35 +0000
parents
children af86557c1b87
files data_manager/mash_sketch_builder.py data_manager/mash_sketch_builder.xml data_manager_conf.xml test-data/mash_sketch_data_manager.json test-data/test_assembly.fasta tool-data/mash_sketches.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 210 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/mash_sketch_builder.py	Sat Feb 22 00:30:35 2020 +0000
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+import argparse
+import errno
+import json
+import os
+import subprocess
+import sys
+
+
+DATA_TABLE_NAME = "mash_sketches"
+
+
+def mash_sketch(mash_sketch_args, sketch_name, target_directory, data_table_name=DATA_TABLE_NAME):
+    UUID = str(uuid.uuid4())
+
+    os.mkdir(os.path.join(target_directory, UUID))
+
+    sketch_path = os.path.join(UUID, "sketch"),
+
+    args = [
+        '--threads', str(kraken2_args["threads"]),
+        '-k', str(mash_sketch_args["kmer_size"]),
+        '-s', str(mash_sketch_args["sketch_size"]),
+        '-o', sketch_path
+    ]
+
+    subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory)
+
+    if kraken2_args["clean"]:
+        args = [
+            '--threads', str(kraken2_args["threads"]),
+            '--clean',
+            '--db', database_path
+        ]
+
+        subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
+
+    data_table_entry = {
+        'data_tables': {
+            data_table_name: [
+                {
+                    "value": UUID,
+                    "name": sketch_name,
+                    "path": sketch_path,
+                }
+            ]
+        }
+    }
+
+    return data_table_entry
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('data_manager_json')
+    parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length')
+    parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length')
+    parser.add_argument('--threads', dest='threads', default=1, help='threads')
+    parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch')
+    args = parser.parse_args()
+
+    data_manager_input = json.loads(open(args.data_manager_json).read())
+
+    target_directory = data_manager_input['output_data'][0]['extra_files_path']
+
+    try:
+        os.mkdir( target_directory )
+    except OSError as exc:
+        if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
+            pass
+        else:
+            raise
+
+    data_manager_output = {}
+
+    mash_sketch_args = {
+        "kmer_size": args.kmer_len,
+        "sketch_size": args.minimizer_len,
+        "threads": args.threads,
+    }
+
+    open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True))
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/mash_sketch_builder.xml	Sat Feb 22 00:30:35 2020 +0000
@@ -0,0 +1,93 @@
+<?xml version="1.0"?>
+<tool id="mash_sketch_builder" name="Mash Sketch" tool_type="manage_data" version="2.1+galaxy0" profile="18.09">
+    <macros>
+        <xml name="common_params">
+            <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" />
+            <param name="minimizer_len" type="integer" value="31" label="Minimizer length" />
+            <param name="minimizer_spaces" type="integer" value="6" label="Minimizer spaces" />
+            <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" />
+        </xml>
+    </macros>
+    <description>database builder</description>
+    <requirements>
+        <requirement type="package" version="2.1">mash</requirement>
+        <requirement type="package" version="3.7">python</requirement>
+    </requirements>
+    <version_command>mash --version</version_command>
+    <command detect_errors="exit_code">
+    <![CDATA[
+        python '$__tool_directory__/mash_sketch_builder.py'
+          '${out_file}'
+            -s '${sketch_size}'
+            -k '${kmer_size}'
+            #if str ( $reads_assembly.reads_assembly_selector ) == "reads"
+              -m '${reads_assembly.minimum_kmer_copies}'
+              -r
+              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired"
+                '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2'
+              #end if
+              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection"
+                '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse'
+              #end if
+              #if str( $reads_assembly.reads_input.reads_input_selector ) == "single"
+                '$reads_assembly.reads_input.reads'
+              #end if
+            #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly"
+              -p \${GALAXY_SLOTS:-1}
+              '${assembly}'
+              ${reads_assembly.individual_sequences}
+            #end if
+            -o 'sketch'
+    ]]>
+    </command>
+    <inputs>
+        <conditional name="reads_assembly">
+            <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies">
+                <option selected="True" value="reads">Reads</option>
+                <option value="assembly">Assembly</option>
+            </param>
+            <when value="reads">
+                <conditional name="reads_input">
+                    <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
+                        <option value="paired">Paired</option>
+                        <option value="single">Single</option>
+                        <option value="paired_collection">Paired Collection</option>
+                    </param>
+                    <when value="paired">
+                        <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/>
+                        <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+                    </when>
+                    <when value="single">
+                        <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/>
+                    </when>
+                    <when value="paired_collection">
+                        <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
+                    </when>
+                </conditional>
+                <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/>
+            </when>
+            <when value="assembly">
+                <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
+                <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/>
+            </when>
+        </conditional>
+        <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" />
+        <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" />
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="reads_assembly_selector" value="assembly" />
+            <param name="assembly" value="test_assembly.fasta"/>
+            <param name="sketch_name" value="Test Sketch" />
+            <output name="out_file" value="mash_sketch_data_manager.json" />
+        </test>
+    </tests>
+    <help>
+    </help>
+    <citations>
+        <citation type="doi">10.1186/s13059-016-0997-x</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Sat Feb 22 00:30:35 2020 +0000
@@ -0,0 +1,18 @@
+<data_managers>
+    <data_manager tool_file="data_manager/mash_sketch_builder.xml" id="mash_sketch_builder" version="2.1+galaxy0">
+        <data_table name="mash_sketches">
+            <output>
+                <column name="value"/>
+                <column name="name"/>
+                <column name="path" output_ref="out_file">
+                    <move type="directory">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">mash_sketches/${path}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/mash_sketches/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mash_sketch_data_manager.json	Sat Feb 22 00:30:35 2020 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"mash_sketches": [{"name": "sketch", "path": "sketch", "value": "sketch"}]}}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_assembly.fasta	Sat Feb 22 00:30:35 2020 +0000
@@ -0,0 +1,3 @@
+>test
+GCATGTCGATCTGTGTGCTAGTCGTAGTCGATCGATCTGATCGATCTGTCAGTCAGTAGT
+CTCAGCGATGCATTATTATATTATATTATCGATCGATGCTGATCGATTATATTCGATCTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sat Feb 22 00:30:35 2020 +0000
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of Mash sketches in the required format -->
+    <table name="mash_sketches" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/mash_sketches.loc" />
+    </table>
+</tables>