changeset 1:af86557c1b87 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit 20558d9de7b6dd375a8835698f231cbc0c83446d-dirty"
author dfornika
date Sat, 22 Feb 2020 01:01:25 +0000
parents 647eedfd1556
children d892b30d5492
files data_manager/mash_sketch_builder.py data_manager/mash_sketch_builder.xml macros.xml
diffstat 2 files changed, 22 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/mash_sketch_builder.py	Sat Feb 22 00:30:35 2020 +0000
+++ b/data_manager/mash_sketch_builder.py	Sat Feb 22 01:01:25 2020 +0000
@@ -19,22 +19,15 @@
     sketch_path = os.path.join(UUID, "sketch"),
 
     args = [
-        '--threads', str(kraken2_args["threads"]),
         '-k', str(mash_sketch_args["kmer_size"]),
         '-s', str(mash_sketch_args["sketch_size"]),
-        '-o', sketch_path
+        '-o', sketch_path,
+        '-p', str(mash_sketch_args["threads"]),
+        str(mash_sketch_args["fasta"]),
     ]
 
     subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory)
 
-    if kraken2_args["clean"]:
-        args = [
-            '--threads', str(kraken2_args["threads"]),
-            '--clean',
-            '--db', database_path
-        ]
-
-        subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
 
     data_table_entry = {
         'data_tables': {
@@ -56,6 +49,7 @@
     parser.add_argument('data_manager_json')
     parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length')
     parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length')
+    parser.add_argument('--fasta', dest='fasta', type='string', help='Fasta file to sketch')
     parser.add_argument('--threads', dest='threads', default=1, help='threads')
     parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch')
     args = parser.parse_args()
@@ -75,11 +69,18 @@
     data_manager_output = {}
 
     mash_sketch_args = {
-        "kmer_size": args.kmer_len,
-        "sketch_size": args.minimizer_len,
+        "kmer_size": args.kmer_size,
+        "sketch_size": args.sketch_size,
+        "fasta": args.fasta,
         "threads": args.threads,
     }
 
+    data_manager_output = mash_sketch(
+        mash_sketch_args,
+        sketch_name,
+        target_directory,
+    )
+
     open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True))
 
 
--- a/data_manager/mash_sketch_builder.xml	Sat Feb 22 00:30:35 2020 +0000
+++ b/data_manager/mash_sketch_builder.xml	Sat Feb 22 01:01:25 2020 +0000
@@ -1,14 +1,6 @@
 <?xml version="1.0"?>
 <tool id="mash_sketch_builder" name="Mash Sketch" tool_type="manage_data" version="2.1+galaxy0" profile="18.09">
-    <macros>
-        <xml name="common_params">
-            <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" />
-            <param name="minimizer_len" type="integer" value="31" label="Minimizer length" />
-            <param name="minimizer_spaces" type="integer" value="6" label="Minimizer spaces" />
-            <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" />
-        </xml>
-    </macros>
-    <description>database builder</description>
+    <description>builder</description>
     <requirements>
         <requirement type="package" version="2.1">mash</requirement>
         <requirement type="package" version="3.7">python</requirement>
@@ -18,68 +10,23 @@
     <![CDATA[
         python '$__tool_directory__/mash_sketch_builder.py'
           '${out_file}'
-            -s '${sketch_size}'
-            -k '${kmer_size}'
-            #if str ( $reads_assembly.reads_assembly_selector ) == "reads"
-              -m '${reads_assembly.minimum_kmer_copies}'
-              -r
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired"
-                '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2'
-              #end if
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection"
-                '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse'
-              #end if
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "single"
-                '$reads_assembly.reads_input.reads'
-              #end if
-            #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly"
-              -p \${GALAXY_SLOTS:-1}
-              '${assembly}'
-              ${reads_assembly.individual_sequences}
-            #end if
-            -o 'sketch'
+          --fasta '${all_fasta_source.fields.path}'
+          --sketch-size '${sketch_size}'
+          --kmer-size '${kmer_size}'
     ]]>
     </command>
     <inputs>
-        <conditional name="reads_assembly">
-            <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies">
-                <option selected="True" value="reads">Reads</option>
-                <option value="assembly">Assembly</option>
-            </param>
-            <when value="reads">
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
-                        <option value="paired">Paired</option>
-                        <option value="single">Single</option>
-                        <option value="paired_collection">Paired Collection</option>
-                    </param>
-                    <when value="paired">
-                        <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/>
-                        <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
-                    </when>
-                    <when value="single">
-                        <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/>
-                    </when>
-                    <when value="paired_collection">
-                        <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
-                    </when>
-                </conditional>
-                <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/>
-            </when>
-            <when value="assembly">
-                <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
-                <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/>
-            </when>
-        </conditional>
-        <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" />
-        <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" />
+        <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
+            <options from_data_table="all_fasta"/>
+        </param>
+        <param type="integer" name="sketch_size" value="1000" min="10" max="1000000" label="Sketch size" />
+        <param type="integer" name="kmer_size" value="21" min="1" max="32" label="kmer size" />
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" />
     </outputs>
     <tests>
         <test>
-            <param name="reads_assembly_selector" value="assembly" />
             <param name="assembly" value="test_assembly.fasta"/>
             <param name="sketch_name" value="Test Sketch" />
             <output name="out_file" value="mash_sketch_data_manager.json" />