changeset 17:9e119de8edf4 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit 3a2663e8a2d8b056e041c3c9e550d8e522f44583-dirty"
author dfornika
date Mon, 24 Feb 2020 21:11:08 +0000
parents c842c12e81d9
children e081c3731725
files data_manager/mash_sketch_builder.py data_manager/mash_sketch_builder.xml
diffstat 2 files changed, 16 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/mash_sketch_builder.py	Mon Feb 24 19:17:36 2020 +0000
+++ b/data_manager/mash_sketch_builder.py	Mon Feb 24 21:11:08 2020 +0000
@@ -27,6 +27,9 @@
         '-p', str(mash_sketch_args["threads"]),
         str(mash_sketch_args["fasta"]),
     ]
+
+    if mash_sketch_args["individual_sequences"]:
+        args = args + ["-i"]
     
     subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory)
 
@@ -50,6 +53,7 @@
     parser.add_argument('data_manager_json')
     parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length')
     parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length')
+    parser.add_argument('--individual-sequences', dest='individual_sequences', type=boolean, action='store_true' help='Sketch individual sequences (for multi-fasta files)')
     parser.add_argument('--fasta', dest='fasta', help='Fasta file to sketch')
     parser.add_argument('--threads', dest='threads', default=1, help='threads')
     parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch')
@@ -59,8 +63,6 @@
     
     target_directory = data_manager_input['output_data'][0]['extra_files_path']
     
-    pprint("***" + target_directory + "***" )
-    
     try:
         os.mkdir( target_directory )
     except OSError as exc:
@@ -75,6 +77,7 @@
         "kmer_size": args.kmer_size,
         "sketch_size": args.sketch_size,
         "fasta": args.fasta,
+        "individual_sequences": args.individual_sequences,
         "threads": args.threads,
     }
 
--- a/data_manager/mash_sketch_builder.xml	Mon Feb 24 19:17:36 2020 +0000
+++ b/data_manager/mash_sketch_builder.xml	Mon Feb 24 21:11:08 2020 +0000
@@ -18,6 +18,7 @@
           --sketch-name '${sketch_name}'
           --sketch-size '${sketch_size}'
           --kmer-size '${kmer_size}'
+          ${individual_sequences}
 
     ]]>
     </command>
@@ -40,6 +41,7 @@
         <param type="text" name="sketch_name" label="Sketch name" />
         <param type="integer" name="sketch_size" value="1000" min="10" max="1000000" label="Sketch size" />
         <param type="integer" name="kmer_size" value="21" min="1" max="32" label="kmer size" />
+        <param type="boolean" name="individual_sequences" truevalue="--individual-sequences" falsevalue="" label="Sketch individual sequences"/>
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" />
@@ -51,8 +53,15 @@
             <output name="out_file" value="mash_sketch_data_manager.json" />
         </test>
     </tests>
-    <help>
-    </help>
+    <help><![CDATA[
+
+**What it does**
+
+  Create a sketch file, which is a reduced representation of a sequence or set
+  of sequences (based on min-hashes) that can be used for fast distance
+  estimations. For output, one sketch file will be generated, but it can have
+  multiple sketches within it, divided by sequences or files.
+    ]]></help>
     <citations>
         <citation type="doi">10.1186/s13059-016-0997-x</citation>
     </citations>