Mercurial > repos > nml > sistr_cmd
comparison sistr_cmd.xml @ 0:1f6159dc3598 draft default tip
planemo upload for repository https://github.com/phac-nml/sistr_cmd commit 4a9d0e766264aab4b92d8971b70112f84e28e8cd
| author | nml |
|---|---|
| date | Fri, 20 Dec 2024 18:33:49 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6159dc3598 |
|---|---|
| 1 <tool id="sistr_cmd" name="sistr_cmd" version="@VERSION@+galaxy0"> | |
| 2 <description> | |
| 3 Salmonella In Silico Typing Resource command-line tool for serovar prediction | |
| 4 </description> | |
| 5 <macros> | |
| 6 <token name="@VERSION@">1.1.3</token> | |
| 7 </macros> | |
| 8 <xrefs> | |
| 9 <xref type="bio.tools">SISTR</xref> | |
| 10 </xrefs> | |
| 11 <requirements> | |
| 12 <requirement type="package" version="@VERSION@">sistr_cmd</requirement> | |
| 13 </requirements> | |
| 14 <stdio> | |
| 15 <exit_code range="1:" /> | |
| 16 </stdio> | |
| 17 <command><![CDATA[ | |
| 18 | |
| 19 sistr | |
| 20 #for $fasta in $input_fastas | |
| 21 -i '$fasta' '${$fasta.element_identifier.replace("." + $fasta.ext, "")}' | |
| 22 #end for | |
| 23 -f $output_format | |
| 24 -o sistr-report.$output_format | |
| 25 -p $cgmlst_profiles | |
| 26 -n $novel_alleles | |
| 27 -a $alleles_output | |
| 28 $use_full_cgmlst_db | |
| 29 $no_cgmlst | |
| 30 $run_mash | |
| 31 $qc | |
| 32 #if $serovar_list.selection_mode == "default" | |
| 33 --list-of-serovars | |
| 34 #elif $serovar_list.selection_mode == "custom" | |
| 35 --list-of-serovars $serovar_list.custom_list_of_serovars_file | |
| 36 #end if | |
| 37 --threads "\${GALAXY_SLOTS:-1}" | |
| 38 -T "\${TMPDIR:-/tmp}" | |
| 39 $keep_tmp | |
| 40 $more_output | |
| 41 $verbosity | |
| 42 ]]></command> | |
| 43 <inputs> | |
| 44 <param | |
| 45 name="input_fastas" | |
| 46 type="data" | |
| 47 label="Input Genome(s)" | |
| 48 optional="false" | |
| 49 multiple="true" | |
| 50 format="fasta" | |
| 51 /> | |
| 52 <param | |
| 53 name="output_format" | |
| 54 type="select" | |
| 55 label="Results output format" | |
| 56 multiple="false"> | |
| 57 <option value="tab" selected="true"> | |
| 58 Tabular (tab-delimited values) | |
| 59 </option> | |
| 60 <option value="csv"> | |
| 61 CSV (Comma Separated Values) | |
| 62 </option> | |
| 63 <option value="json"> | |
| 64 JSON (JavaScript Object Notation) | |
| 65 </option> | |
| 66 </param> | |
| 67 <param | |
| 68 name="use_full_cgmlst_db" | |
| 69 type="boolean" | |
| 70 checked="false" | |
| 71 truevalue="--use-full-cgmlst-db" | |
| 72 falsevalue="" | |
| 73 label="Use full cgMLST database for serovar prediction. About 10X slower with equivalent results to reduced centroid allele database." | |
| 74 /> | |
| 75 <param | |
| 76 name="run_mash" | |
| 77 type="boolean" | |
| 78 checked="true" | |
| 79 truevalue="--run-mash" | |
| 80 falsevalue="" | |
| 81 label="Run Mash MinHash-based serovar prediction" | |
| 82 /> | |
| 83 <param | |
| 84 name="no_cgmlst" | |
| 85 type="boolean" | |
| 86 checked="false" | |
| 87 truevalue="--no-cgmlst" | |
| 88 falsevalue="" | |
| 89 label="Skip running cgMLST-based serovar prediction" | |
| 90 /> | |
| 91 <param | |
| 92 name="qc" | |
| 93 type="boolean" | |
| 94 checked="true" | |
| 95 truevalue="--qc" | |
| 96 falsevalue="" | |
| 97 label="Basic QC of results" | |
| 98 /> | |
| 99 <conditional name="serovar_list"> | |
| 100 <param label="Check predicted SISTR serovar against the list of serovars?" name="selection_mode" type="select"> | |
| 101 <option value="none">Not selected</option> | |
| 102 <option selected="true" value="default">Use the default list</option> | |
| 103 <option value="custom">Use a custom list</option> | |
| 104 </param> | |
| 105 <when value="none"/> | |
| 106 <when value="default"/> | |
| 107 <when value="custom"> | |
| 108 <param name="custom_list_of_serovars_file" type="data" format="text" optional="true" label="Custom list of serovars (optional)" | |
| 109 help="if no file selected, the default one is used available at https://raw.githubusercontent.com/phac-nml/sistr_cmd/v1.1.3/sistr/data/serovar-list.txt" | |
| 110 /> | |
| 111 </when> | |
| 112 </conditional> | |
| 113 <param | |
| 114 name="more_output" | |
| 115 type="select" | |
| 116 label="Results verbosity"> | |
| 117 <option value="" selected="true"> | |
| 118 Basic results only | |
| 119 </option> | |
| 120 <option value="-M"> | |
| 121 Report top antigen BLAST results | |
| 122 </option> | |
| 123 <option value="-MM" > | |
| 124 Report all antigen BLAST results | |
| 125 </option> | |
| 126 </param> | |
| 127 <param | |
| 128 name="keep_tmp" | |
| 129 type="boolean" | |
| 130 checked="false" | |
| 131 falsevalue="" | |
| 132 truevalue="--keep-tmp" | |
| 133 label="Keep temporary analysis directory" | |
| 134 /> | |
| 135 <param | |
| 136 name="verbosity" | |
| 137 type="select" | |
| 138 label="Logging verbosity"> | |
| 139 <option value=""> | |
| 140 Error messages only | |
| 141 </option> | |
| 142 <option value="-v"> | |
| 143 Show warning messages | |
| 144 </option> | |
| 145 <option value="-vv" selected="true"> | |
| 146 Show info messages | |
| 147 </option> | |
| 148 <option value="-vvv"> | |
| 149 Show debug messages | |
| 150 </option> | |
| 151 </param> | |
| 152 </inputs> | |
| 153 <outputs> | |
| 154 <data | |
| 155 name="output_prediction_csv" | |
| 156 format="csv" | |
| 157 label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (csv)" | |
| 158 from_work_dir="sistr-report.csv"> | |
| 159 <filter>output_format == "csv"</filter> | |
| 160 </data> | |
| 161 <data | |
| 162 name="output_prediction_json" | |
| 163 format="json" | |
| 164 label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (json)" | |
| 165 from_work_dir="sistr-report.json"> | |
| 166 <filter>output_format == "json"</filter> | |
| 167 </data> | |
| 168 <data | |
| 169 name="output_prediction_tab" | |
| 170 format="tabular" | |
| 171 label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (tab)" | |
| 172 from_work_dir="sistr-report.tab"> | |
| 173 <filter>output_format == "tab"</filter> | |
| 174 </data> | |
| 175 <data | |
| 176 name="cgmlst_profiles" | |
| 177 format="csv" | |
| 178 label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (csv)" /> | |
| 179 <data | |
| 180 name="novel_alleles" | |
| 181 format="fasta" | |
| 182 label="${tool.name} on ${input_fastas[0].element_identifier}: Novel cgMLST alleles (fasta)" /> | |
| 183 <data | |
| 184 name="alleles_output" | |
| 185 format="json" | |
| 186 label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (json)" /> | |
| 187 </outputs> | |
| 188 <tests> | |
| 189 <test expect_num_outputs="4"> | |
| 190 <param name="input_fastas" value="AE014613-699860.fasta" ftype="fasta"/> | |
| 191 <param name="output_format" value="tab"/> | |
| 192 <param name="selection_mode" value="none"/> | |
| 193 <output | |
| 194 name="cgmlst_profiles" | |
| 195 value="cgmlst-profiles-AE014613.csv" | |
| 196 ftype="csv" | |
| 197 compare="diff"> | |
| 198 </output> | |
| 199 <output name="output_prediction_tab" | |
| 200 ftype="tabular" | |
| 201 value="sistr-results-AE014613-699860.tabular" | |
| 202 compare="sim_size"> | |
| 203 <assert_contents> | |
| 204 <has_text text="AE014613-699860" /> | |
| 205 <has_text text="Typhi" /> | |
| 206 <has_text text="enterica" /> | |
| 207 <has_text_matching expression="FAIL\t-\t-:-:-\t-:-:-"/> | |
| 208 <has_text text="2014-LET-0419"/> | |
| 209 <has_n_columns n="23" /> | |
| 210 </assert_contents> | |
| 211 </output> | |
| 212 <output | |
| 213 name="alleles_output" | |
| 214 value="alleles-output-AE014613.json" | |
| 215 ftype="json" | |
| 216 compare="sim_size"> | |
| 217 <assert_contents> | |
| 218 <has_text text="NZ_AOXE01000059.1"/> | |
| 219 <has_text text="Salmonella enterica subsp. enterica serovar Typhi Ty2"/> | |
| 220 <has_text text="NZ_AOXE01000033.1"/> | |
| 221 <has_text text="NZ_AOXE01000052.1"/> | |
| 222 </assert_contents> | |
| 223 </output> | |
| 224 </test> | |
| 225 <test expect_num_outputs="4"> | |
| 226 <param name="input_fastas" value="13-1101-Paratyphi_B_varJava.fasta" ftype="fasta"/> | |
| 227 <param name="output_format" value="tab"/> | |
| 228 <output | |
| 229 name="novel_alleles" | |
| 230 value="novel-alleles-13-1101.fasta" | |
| 231 ftype="fasta" | |
| 232 compare="sim_size"/> | |
| 233 <output | |
| 234 name="cgmlst_profiles" | |
| 235 value="cgmlst-profiles-13-1101.csv" | |
| 236 ftype="csv" | |
| 237 compare="diff"> | |
| 238 </output> | |
| 239 <output | |
| 240 name="output_prediction_tab" | |
| 241 value="sistr-results-13-1101.tabular" | |
| 242 ftype="tabular" | |
| 243 compare="sim_size"> | |
| 244 <assert_contents> | |
| 245 <has_text text="13-1101-Paratyphi_B"/> | |
| 246 <has_text text="Paratyphi B var. Java"/> | |
| 247 <has_text text="enterica"/> | |
| 248 <has_text text="1,4,[5],12"/> | |
| 249 <has_text text="1,4,[5],12:b:1,2"/> | |
| 250 <has_text text="FAIL"/> | |
| 251 <has_text text="Y"/> | |
| 252 <has_n_columns n="24"/> | |
| 253 </assert_contents> | |
| 254 </output> | |
| 255 <output | |
| 256 name="alleles_output" | |
| 257 value="alleles-output-13-1101.json" | |
| 258 ftype="json" | |
| 259 compare="sim_size"> | |
| 260 <assert_contents> | |
| 261 <has_text text="13-1101-Paratyphi_B"/> | |
| 262 <has_text text="NZ_AOXE01000059.1"/> | |
| 263 </assert_contents> | |
| 264 </output> | |
| 265 </test> | |
| 266 </tests> | |
| 267 <help> | |
| 268 <![CDATA[ | |
| 269 | |
| 270 Usage:: | |
| 271 | |
| 272 usage: sistr_cmd [-h] [-i fasta_path genome_name] [-f OUTPUT_FORMAT] | |
| 273 [-o OUTPUT_PREDICTION] [-M] [-p CGMLST_PROFILES] | |
| 274 [-n NOVEL_ALLELES] [-a ALLELES_OUTPUT] [-T TMP_DIR] [-K] | |
| 275 [--use-full-cgmlst-db] [--no-cgmlst] [-m] [--qc] [-t THREADS] | |
| 276 [-v] [-V] | |
| 277 [F [F ...]] | |
| 278 | |
| 279 SISTR (Salmonella In Silico Typing Resource) Command-line Tool | |
| 280 ============================================================== | |
| 281 Serovar predictions from whole-genome sequence assemblies by determination of antigen gene and cgMLST gene alleles using BLAST. | |
| 282 | |
| 283 Note about using the "--use-full-cgmlst-db" flag: | |
| 284 The "centroid" allele database is ~10% the size of the full set so analysis is much quicker with the "centroid" vs "full" set of alleles. | |
| 285 Results between 2 cgMLST allele sets should not differ. | |
| 286 | |
| 287 If you find this program useful in your research, please cite as: | |
| 288 The Salmonella In Silico Typing Resource (SISTR): an open web-accessible tool for rapidly typing and subtyping draft Salmonella genome assemblies. | |
| 289 Catherine Yoshida, Peter Kruczkiewicz, Chad R. Laing, Erika J. Lingohr, Victor P.J. Gannon, John H.E. Nash, Eduardo N. Taboada. | |
| 290 PLoS ONE 11(1): e0147101. doi: 10.1371/journal.pone.0147101 | |
| 291 | |
| 292 positional arguments: | |
| 293 F Input genome FASTA file | |
| 294 | |
| 295 optional arguments: | |
| 296 -h, --help show this help message and exit | |
| 297 -i fasta_path genome_name, --input-fasta-genome-name fasta_path genome_name | |
| 298 fasta file path to genome name pair | |
| 299 -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT | |
| 300 Output format (json, csv, pickle) | |
| 301 -o OUTPUT_PREDICTION, --output-prediction OUTPUT_PREDICTION | |
| 302 SISTR serovar prediction output path | |
| 303 -M, --more-results Output more detailed results (-M) and all antigen | |
| 304 search blastn results (-MM) | |
| 305 -p CGMLST_PROFILES, --cgmlst-profiles CGMLST_PROFILES | |
| 306 Output CSV file destination for cgMLST allelic | |
| 307 profiles | |
| 308 -n NOVEL_ALLELES, --novel-alleles NOVEL_ALLELES | |
| 309 Output FASTA file destination of novel cgMLST alleles | |
| 310 from input genomes | |
| 311 -a ALLELES_OUTPUT, --alleles-output ALLELES_OUTPUT | |
| 312 Output path of allele sequences and info to JSON | |
| 313 -T TMP_DIR, --tmp-dir TMP_DIR | |
| 314 Base temporary working directory for intermediate | |
| 315 analysis files. | |
| 316 -K, --keep-tmp Keep temporary analysis files. | |
| 317 --use-full-cgmlst-db Use the full set of cgMLST alleles which can include | |
| 318 highly similar alleles. By default the smaller | |
| 319 "centroid" alleles or representative alleles are used | |
| 320 for each marker. | |
| 321 --no-cgmlst Do not run cgMLST serovar prediction | |
| 322 -m, --run-mash Determine Mash MinHash genomic distances to Salmonella | |
| 323 genomes with trusted serovar designations. Mash binary | |
| 324 must be in accessible via $PATH (e.g. /usr/bin). | |
| 325 --qc Perform basic QC to provide level of confidence in | |
| 326 serovar prediction results. | |
| 327 -t THREADS, --threads THREADS | |
| 328 Number of parallel threads to run sistr_cmd analysis. | |
| 329 -l [LIST_OF_SEROVARS], --list-of-serovars [LIST_OF_SEROVARS] | |
| 330 A path to a single column text file containing list of | |
| 331 serovars to check SISTR serovar prediction against. | |
| 332 Result reported in the "predicted_serovar_in_list" | |
| 333 field as Y (present) or N (absent) value. | |
| 334 -v, --verbose Logging verbosity level (-v == show warnings; -vvv == | |
| 335 show debug info) | |
| 336 -V, --version show program's version number and exit | |
| 337 ]]> | |
| 338 | |
| 339 </help> | |
| 340 <citations> | |
| 341 <!-- Citation for SISTR PLOS ONE paper --> | |
| 342 <citation type="doi">10.1371/journal.pone.0147101</citation> | |
| 343 </citations> | |
| 344 </tool> |
