|
2
|
1 <tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy0" profile="19.01">
|
|
|
2 <description>
|
|
|
3 Create a reduced representation of a sequence or set of sequences, based on min-hashes
|
|
|
4 </description>
|
|
|
5 <macros>
|
|
|
6 <import>macros.xml</import>
|
|
|
7 </macros>
|
|
|
8 <expand macro="requirements" />
|
|
|
9 <expand macro="version_command" />
|
|
|
10 <command detect_errors="exit_code"><![CDATA[
|
|
|
11 mash sketch
|
|
|
12 -s '${sketch_size}'
|
|
|
13 -k '${kmer_size}'
|
|
|
14 #if str ( $reads_assembly.reads_assembly_selector ) == "reads"
|
|
|
15 -m '${reads_assembly.minimum_kmer_copies}'
|
|
|
16 -r
|
|
|
17 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired"
|
|
|
18 '$reads_assembly.reads_input.reads_1' '$reads_assembly.reads_input.reads_2'
|
|
|
19 #end if
|
|
|
20 #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection"
|
|
|
21 '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse'
|
|
|
22 #end if
|
|
|
23 #if str( $reads_assembly.reads_input.reads_input_selector ) == "single"
|
|
|
24 '$reads_assembly.reads_input.reads'
|
|
|
25 #end if
|
|
|
26 #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly"
|
|
|
27 -p \${GALAXY_SLOTS:-1}
|
|
|
28 '${assembly}'
|
|
|
29 ${reads_assembly.individual_sequences}
|
|
|
30 #end if
|
|
|
31 -o 'sketch'
|
|
|
32 ]]></command>
|
|
|
33 <inputs>
|
|
|
34 <conditional name="reads_assembly">
|
|
|
35 <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies">
|
|
|
36 <option selected="True" value="reads">Reads</option>
|
|
|
37 <option value="assembly">Assembly</option>
|
|
|
38 </param>
|
|
|
39 <when value="reads">
|
|
|
40 <conditional name="reads_input">
|
|
|
41 <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
|
|
|
42 <option value="paired">Paired</option>
|
|
|
43 <option value="single">Single</option>
|
|
|
44 <option value="paired_collection">Paired Collection</option>
|
|
|
45 </param>
|
|
|
46 <when value="paired">
|
|
|
47 <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/>
|
|
|
48 <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
|
|
|
49 </when>
|
|
|
50 <when value="single">
|
|
|
51 <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/>
|
|
|
52 </when>
|
|
|
53 <when value="paired_collection">
|
|
|
54 <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
|
|
|
55 </when>
|
|
|
56 </conditional>
|
|
|
57 <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000" label="Minimum copies of each k-mer required to pass noise filter"/>
|
|
|
58 </when>
|
|
|
59 <when value="assembly">
|
|
|
60 <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
|
|
|
61 <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/>
|
|
|
62 </when>
|
|
|
63 </conditional>
|
|
|
64 <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" label="Sketch size" />
|
|
|
65 <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32" label="kmer size" />
|
|
|
66 </inputs>
|
|
|
67 <outputs>
|
|
|
68 <data name="sketch" format="msh" from_work_dir="sketch.msh"/>
|
|
|
69 </outputs>
|
|
|
70 <tests>
|
|
|
71 <test>
|
|
|
72 <param name="reads_assembly_selector" value="reads" />
|
|
|
73 <param name="reads_input_selector" value="single"/>
|
|
|
74 <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
|
|
|
75 <output name="sketch" file="ERR024951_seqtk_sample_1000_1.sketch.msh" compare="sim_size" />
|
|
|
76 </test>
|
|
|
77 <test>
|
|
|
78 <param name="reads_assembly_selector" value="assembly" />
|
|
|
79 <param name="assembly" value="test_assembly.fasta"/>
|
|
|
80 <output name="sketch" file="test_assembly.sketch.msh" compare="sim_size" />
|
|
|
81 </test>
|
|
|
82 </tests>
|
|
|
83 <help><![CDATA[
|
|
|
84
|
|
|
85 **What it does**
|
|
|
86
|
|
|
87 Create a sketch file, which is a reduced representation of a sequence or set
|
|
|
88 of sequences (based on min-hashes) that can be used for fast distance
|
|
|
89 estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can
|
|
|
90 be given to read from standard input. Input files can also be files of file
|
|
|
91 names (see -l). For output, one sketch file will be generated, but it can have
|
|
|
92 multiple sketches within it, divided by sequences or files (see -i). By
|
|
|
93 default, the output file name will be the first input file with a '.msh'
|
|
|
94 extension, or 'stdin.msh' if standard input is used (see -o).
|
|
|
95 ]]></help>
|
|
|
96 <expand macro="citations"/>
|
|
|
97 </tool>
|