comparison sistr_cmd.xml @ 0:1f6159dc3598 draft default tip

planemo upload for repository https://github.com/phac-nml/sistr_cmd commit 4a9d0e766264aab4b92d8971b70112f84e28e8cd
author nml
date Fri, 20 Dec 2024 18:33:49 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6159dc3598
1 <tool id="sistr_cmd" name="sistr_cmd" version="@VERSION@+galaxy0">
2 <description>
3 Salmonella In Silico Typing Resource command-line tool for serovar prediction
4 </description>
5 <macros>
6 <token name="@VERSION@">1.1.3</token>
7 </macros>
8 <xrefs>
9 <xref type="bio.tools">SISTR</xref>
10 </xrefs>
11 <requirements>
12 <requirement type="package" version="@VERSION@">sistr_cmd</requirement>
13 </requirements>
14 <stdio>
15 <exit_code range="1:" />
16 </stdio>
17 <command><![CDATA[
18
19 sistr
20 #for $fasta in $input_fastas
21 -i '$fasta' '${$fasta.element_identifier.replace("." + $fasta.ext, "")}'
22 #end for
23 -f $output_format
24 -o sistr-report.$output_format
25 -p $cgmlst_profiles
26 -n $novel_alleles
27 -a $alleles_output
28 $use_full_cgmlst_db
29 $no_cgmlst
30 $run_mash
31 $qc
32 #if $serovar_list.selection_mode == "default"
33 --list-of-serovars
34 #elif $serovar_list.selection_mode == "custom"
35 --list-of-serovars $serovar_list.custom_list_of_serovars_file
36 #end if
37 --threads "\${GALAXY_SLOTS:-1}"
38 -T "\${TMPDIR:-/tmp}"
39 $keep_tmp
40 $more_output
41 $verbosity
42 ]]></command>
43 <inputs>
44 <param
45 name="input_fastas"
46 type="data"
47 label="Input Genome(s)"
48 optional="false"
49 multiple="true"
50 format="fasta"
51 />
52 <param
53 name="output_format"
54 type="select"
55 label="Results output format"
56 multiple="false">
57 <option value="tab" selected="true">
58 Tabular (tab-delimited values)
59 </option>
60 <option value="csv">
61 CSV (Comma Separated Values)
62 </option>
63 <option value="json">
64 JSON (JavaScript Object Notation)
65 </option>
66 </param>
67 <param
68 name="use_full_cgmlst_db"
69 type="boolean"
70 checked="false"
71 truevalue="--use-full-cgmlst-db"
72 falsevalue=""
73 label="Use full cgMLST database for serovar prediction. About 10X slower with equivalent results to reduced centroid allele database."
74 />
75 <param
76 name="run_mash"
77 type="boolean"
78 checked="true"
79 truevalue="--run-mash"
80 falsevalue=""
81 label="Run Mash MinHash-based serovar prediction"
82 />
83 <param
84 name="no_cgmlst"
85 type="boolean"
86 checked="false"
87 truevalue="--no-cgmlst"
88 falsevalue=""
89 label="Skip running cgMLST-based serovar prediction"
90 />
91 <param
92 name="qc"
93 type="boolean"
94 checked="true"
95 truevalue="--qc"
96 falsevalue=""
97 label="Basic QC of results"
98 />
99 <conditional name="serovar_list">
100 <param label="Check predicted SISTR serovar against the list of serovars?" name="selection_mode" type="select">
101 <option value="none">Not selected</option>
102 <option selected="true" value="default">Use the default list</option>
103 <option value="custom">Use a custom list</option>
104 </param>
105 <when value="none"/>
106 <when value="default"/>
107 <when value="custom">
108 <param name="custom_list_of_serovars_file" type="data" format="text" optional="true" label="Custom list of serovars (optional)"
109 help="if no file selected, the default one is used available at https://raw.githubusercontent.com/phac-nml/sistr_cmd/v1.1.3/sistr/data/serovar-list.txt"
110 />
111 </when>
112 </conditional>
113 <param
114 name="more_output"
115 type="select"
116 label="Results verbosity">
117 <option value="" selected="true">
118 Basic results only
119 </option>
120 <option value="-M">
121 Report top antigen BLAST results
122 </option>
123 <option value="-MM" >
124 Report all antigen BLAST results
125 </option>
126 </param>
127 <param
128 name="keep_tmp"
129 type="boolean"
130 checked="false"
131 falsevalue=""
132 truevalue="--keep-tmp"
133 label="Keep temporary analysis directory"
134 />
135 <param
136 name="verbosity"
137 type="select"
138 label="Logging verbosity">
139 <option value="">
140 Error messages only
141 </option>
142 <option value="-v">
143 Show warning messages
144 </option>
145 <option value="-vv" selected="true">
146 Show info messages
147 </option>
148 <option value="-vvv">
149 Show debug messages
150 </option>
151 </param>
152 </inputs>
153 <outputs>
154 <data
155 name="output_prediction_csv"
156 format="csv"
157 label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (csv)"
158 from_work_dir="sistr-report.csv">
159 <filter>output_format == "csv"</filter>
160 </data>
161 <data
162 name="output_prediction_json"
163 format="json"
164 label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (json)"
165 from_work_dir="sistr-report.json">
166 <filter>output_format == "json"</filter>
167 </data>
168 <data
169 name="output_prediction_tab"
170 format="tabular"
171 label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (tab)"
172 from_work_dir="sistr-report.tab">
173 <filter>output_format == "tab"</filter>
174 </data>
175 <data
176 name="cgmlst_profiles"
177 format="csv"
178 label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (csv)" />
179 <data
180 name="novel_alleles"
181 format="fasta"
182 label="${tool.name} on ${input_fastas[0].element_identifier}: Novel cgMLST alleles (fasta)" />
183 <data
184 name="alleles_output"
185 format="json"
186 label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (json)" />
187 </outputs>
188 <tests>
189 <test expect_num_outputs="4">
190 <param name="input_fastas" value="AE014613-699860.fasta" ftype="fasta"/>
191 <param name="output_format" value="tab"/>
192 <param name="selection_mode" value="none"/>
193 <output
194 name="cgmlst_profiles"
195 value="cgmlst-profiles-AE014613.csv"
196 ftype="csv"
197 compare="diff">
198 </output>
199 <output name="output_prediction_tab"
200 ftype="tabular"
201 value="sistr-results-AE014613-699860.tabular"
202 compare="sim_size">
203 <assert_contents>
204 <has_text text="AE014613-699860" />
205 <has_text text="Typhi" />
206 <has_text text="enterica" />
207 <has_text_matching expression="FAIL\t-\t-:-:-\t-:-:-"/>
208 <has_text text="2014-LET-0419"/>
209 <has_n_columns n="23" />
210 </assert_contents>
211 </output>
212 <output
213 name="alleles_output"
214 value="alleles-output-AE014613.json"
215 ftype="json"
216 compare="sim_size">
217 <assert_contents>
218 <has_text text="NZ_AOXE01000059.1"/>
219 <has_text text="Salmonella enterica subsp. enterica serovar Typhi Ty2"/>
220 <has_text text="NZ_AOXE01000033.1"/>
221 <has_text text="NZ_AOXE01000052.1"/>
222 </assert_contents>
223 </output>
224 </test>
225 <test expect_num_outputs="4">
226 <param name="input_fastas" value="13-1101-Paratyphi_B_varJava.fasta" ftype="fasta"/>
227 <param name="output_format" value="tab"/>
228 <output
229 name="novel_alleles"
230 value="novel-alleles-13-1101.fasta"
231 ftype="fasta"
232 compare="sim_size"/>
233 <output
234 name="cgmlst_profiles"
235 value="cgmlst-profiles-13-1101.csv"
236 ftype="csv"
237 compare="diff">
238 </output>
239 <output
240 name="output_prediction_tab"
241 value="sistr-results-13-1101.tabular"
242 ftype="tabular"
243 compare="sim_size">
244 <assert_contents>
245 <has_text text="13-1101-Paratyphi_B"/>
246 <has_text text="Paratyphi B var. Java"/>
247 <has_text text="enterica"/>
248 <has_text text="1,4,[5],12"/>
249 <has_text text="1,4,[5],12:b:1,2"/>
250 <has_text text="FAIL"/>
251 <has_text text="Y"/>
252 <has_n_columns n="24"/>
253 </assert_contents>
254 </output>
255 <output
256 name="alleles_output"
257 value="alleles-output-13-1101.json"
258 ftype="json"
259 compare="sim_size">
260 <assert_contents>
261 <has_text text="13-1101-Paratyphi_B"/>
262 <has_text text="NZ_AOXE01000059.1"/>
263 </assert_contents>
264 </output>
265 </test>
266 </tests>
267 <help>
268 <![CDATA[
269
270 Usage::
271
272 usage: sistr_cmd [-h] [-i fasta_path genome_name] [-f OUTPUT_FORMAT]
273 [-o OUTPUT_PREDICTION] [-M] [-p CGMLST_PROFILES]
274 [-n NOVEL_ALLELES] [-a ALLELES_OUTPUT] [-T TMP_DIR] [-K]
275 [--use-full-cgmlst-db] [--no-cgmlst] [-m] [--qc] [-t THREADS]
276 [-v] [-V]
277 [F [F ...]]
278
279 SISTR (Salmonella In Silico Typing Resource) Command-line Tool
280 ==============================================================
281 Serovar predictions from whole-genome sequence assemblies by determination of antigen gene and cgMLST gene alleles using BLAST.
282
283 Note about using the "--use-full-cgmlst-db" flag:
284 The "centroid" allele database is ~10% the size of the full set so analysis is much quicker with the "centroid" vs "full" set of alleles.
285 Results between 2 cgMLST allele sets should not differ.
286
287 If you find this program useful in your research, please cite as:
288 The Salmonella In Silico Typing Resource (SISTR): an open web-accessible tool for rapidly typing and subtyping draft Salmonella genome assemblies.
289 Catherine Yoshida, Peter Kruczkiewicz, Chad R. Laing, Erika J. Lingohr, Victor P.J. Gannon, John H.E. Nash, Eduardo N. Taboada.
290 PLoS ONE 11(1): e0147101. doi: 10.1371/journal.pone.0147101
291
292 positional arguments:
293 F Input genome FASTA file
294
295 optional arguments:
296 -h, --help show this help message and exit
297 -i fasta_path genome_name, --input-fasta-genome-name fasta_path genome_name
298 fasta file path to genome name pair
299 -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT
300 Output format (json, csv, pickle)
301 -o OUTPUT_PREDICTION, --output-prediction OUTPUT_PREDICTION
302 SISTR serovar prediction output path
303 -M, --more-results Output more detailed results (-M) and all antigen
304 search blastn results (-MM)
305 -p CGMLST_PROFILES, --cgmlst-profiles CGMLST_PROFILES
306 Output CSV file destination for cgMLST allelic
307 profiles
308 -n NOVEL_ALLELES, --novel-alleles NOVEL_ALLELES
309 Output FASTA file destination of novel cgMLST alleles
310 from input genomes
311 -a ALLELES_OUTPUT, --alleles-output ALLELES_OUTPUT
312 Output path of allele sequences and info to JSON
313 -T TMP_DIR, --tmp-dir TMP_DIR
314 Base temporary working directory for intermediate
315 analysis files.
316 -K, --keep-tmp Keep temporary analysis files.
317 --use-full-cgmlst-db Use the full set of cgMLST alleles which can include
318 highly similar alleles. By default the smaller
319 "centroid" alleles or representative alleles are used
320 for each marker.
321 --no-cgmlst Do not run cgMLST serovar prediction
322 -m, --run-mash Determine Mash MinHash genomic distances to Salmonella
323 genomes with trusted serovar designations. Mash binary
324 must be in accessible via $PATH (e.g. /usr/bin).
325 --qc Perform basic QC to provide level of confidence in
326 serovar prediction results.
327 -t THREADS, --threads THREADS
328 Number of parallel threads to run sistr_cmd analysis.
329 -l [LIST_OF_SEROVARS], --list-of-serovars [LIST_OF_SEROVARS]
330 A path to a single column text file containing list of
331 serovars to check SISTR serovar prediction against.
332 Result reported in the "predicted_serovar_in_list"
333 field as Y (present) or N (absent) value.
334 -v, --verbose Logging verbosity level (-v == show warnings; -vvv ==
335 show debug info)
336 -V, --version show program's version number and exit
337 ]]>
338
339 </help>
340 <citations>
341 <!-- Citation for SISTR PLOS ONE paper -->
342 <citation type="doi">10.1371/journal.pone.0147101</citation>
343 </citations>
344 </tool>