Mercurial > repos > nml > sistr_cmd
diff sistr_cmd.xml @ 0:1f6159dc3598 draft default tip
planemo upload for repository https://github.com/phac-nml/sistr_cmd commit 4a9d0e766264aab4b92d8971b70112f84e28e8cd
| author | nml |
|---|---|
| date | Fri, 20 Dec 2024 18:33:49 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sistr_cmd.xml Fri Dec 20 18:33:49 2024 +0000 @@ -0,0 +1,344 @@ +<tool id="sistr_cmd" name="sistr_cmd" version="@VERSION@+galaxy0"> + <description> + Salmonella In Silico Typing Resource command-line tool for serovar prediction + </description> + <macros> + <token name="@VERSION@">1.1.3</token> + </macros> + <xrefs> + <xref type="bio.tools">SISTR</xref> + </xrefs> + <requirements> + <requirement type="package" version="@VERSION@">sistr_cmd</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + + sistr + #for $fasta in $input_fastas + -i '$fasta' '${$fasta.element_identifier.replace("." + $fasta.ext, "")}' + #end for + -f $output_format + -o sistr-report.$output_format + -p $cgmlst_profiles + -n $novel_alleles + -a $alleles_output + $use_full_cgmlst_db + $no_cgmlst + $run_mash + $qc + #if $serovar_list.selection_mode == "default" + --list-of-serovars + #elif $serovar_list.selection_mode == "custom" + --list-of-serovars $serovar_list.custom_list_of_serovars_file + #end if + --threads "\${GALAXY_SLOTS:-1}" + -T "\${TMPDIR:-/tmp}" + $keep_tmp + $more_output + $verbosity + ]]></command> + <inputs> + <param + name="input_fastas" + type="data" + label="Input Genome(s)" + optional="false" + multiple="true" + format="fasta" + /> + <param + name="output_format" + type="select" + label="Results output format" + multiple="false"> + <option value="tab" selected="true"> + Tabular (tab-delimited values) + </option> + <option value="csv"> + CSV (Comma Separated Values) + </option> + <option value="json"> + JSON (JavaScript Object Notation) + </option> + </param> + <param + name="use_full_cgmlst_db" + type="boolean" + checked="false" + truevalue="--use-full-cgmlst-db" + falsevalue="" + label="Use full cgMLST database for serovar prediction. About 10X slower with equivalent results to reduced centroid allele database." + /> + <param + name="run_mash" + type="boolean" + checked="true" + truevalue="--run-mash" + falsevalue="" + label="Run Mash MinHash-based serovar prediction" + /> + <param + name="no_cgmlst" + type="boolean" + checked="false" + truevalue="--no-cgmlst" + falsevalue="" + label="Skip running cgMLST-based serovar prediction" + /> + <param + name="qc" + type="boolean" + checked="true" + truevalue="--qc" + falsevalue="" + label="Basic QC of results" + /> + <conditional name="serovar_list"> + <param label="Check predicted SISTR serovar against the list of serovars?" name="selection_mode" type="select"> + <option value="none">Not selected</option> + <option selected="true" value="default">Use the default list</option> + <option value="custom">Use a custom list</option> + </param> + <when value="none"/> + <when value="default"/> + <when value="custom"> + <param name="custom_list_of_serovars_file" type="data" format="text" optional="true" label="Custom list of serovars (optional)" + help="if no file selected, the default one is used available at https://raw.githubusercontent.com/phac-nml/sistr_cmd/v1.1.3/sistr/data/serovar-list.txt" + /> + </when> + </conditional> + <param + name="more_output" + type="select" + label="Results verbosity"> + <option value="" selected="true"> + Basic results only + </option> + <option value="-M"> + Report top antigen BLAST results + </option> + <option value="-MM" > + Report all antigen BLAST results + </option> + </param> + <param + name="keep_tmp" + type="boolean" + checked="false" + falsevalue="" + truevalue="--keep-tmp" + label="Keep temporary analysis directory" + /> + <param + name="verbosity" + type="select" + label="Logging verbosity"> + <option value=""> + Error messages only + </option> + <option value="-v"> + Show warning messages + </option> + <option value="-vv" selected="true"> + Show info messages + </option> + <option value="-vvv"> + Show debug messages + </option> + </param> + </inputs> + <outputs> + <data + name="output_prediction_csv" + format="csv" + label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (csv)" + from_work_dir="sistr-report.csv"> + <filter>output_format == "csv"</filter> + </data> + <data + name="output_prediction_json" + format="json" + label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (json)" + from_work_dir="sistr-report.json"> + <filter>output_format == "json"</filter> + </data> + <data + name="output_prediction_tab" + format="tabular" + label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (tab)" + from_work_dir="sistr-report.tab"> + <filter>output_format == "tab"</filter> + </data> + <data + name="cgmlst_profiles" + format="csv" + label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (csv)" /> + <data + name="novel_alleles" + format="fasta" + label="${tool.name} on ${input_fastas[0].element_identifier}: Novel cgMLST alleles (fasta)" /> + <data + name="alleles_output" + format="json" + label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (json)" /> + </outputs> + <tests> + <test expect_num_outputs="4"> + <param name="input_fastas" value="AE014613-699860.fasta" ftype="fasta"/> + <param name="output_format" value="tab"/> + <param name="selection_mode" value="none"/> + <output + name="cgmlst_profiles" + value="cgmlst-profiles-AE014613.csv" + ftype="csv" + compare="diff"> + </output> + <output name="output_prediction_tab" + ftype="tabular" + value="sistr-results-AE014613-699860.tabular" + compare="sim_size"> + <assert_contents> + <has_text text="AE014613-699860" /> + <has_text text="Typhi" /> + <has_text text="enterica" /> + <has_text_matching expression="FAIL\t-\t-:-:-\t-:-:-"/> + <has_text text="2014-LET-0419"/> + <has_n_columns n="23" /> + </assert_contents> + </output> + <output + name="alleles_output" + value="alleles-output-AE014613.json" + ftype="json" + compare="sim_size"> + <assert_contents> + <has_text text="NZ_AOXE01000059.1"/> + <has_text text="Salmonella enterica subsp. enterica serovar Typhi Ty2"/> + <has_text text="NZ_AOXE01000033.1"/> + <has_text text="NZ_AOXE01000052.1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <param name="input_fastas" value="13-1101-Paratyphi_B_varJava.fasta" ftype="fasta"/> + <param name="output_format" value="tab"/> + <output + name="novel_alleles" + value="novel-alleles-13-1101.fasta" + ftype="fasta" + compare="sim_size"/> + <output + name="cgmlst_profiles" + value="cgmlst-profiles-13-1101.csv" + ftype="csv" + compare="diff"> + </output> + <output + name="output_prediction_tab" + value="sistr-results-13-1101.tabular" + ftype="tabular" + compare="sim_size"> + <assert_contents> + <has_text text="13-1101-Paratyphi_B"/> + <has_text text="Paratyphi B var. Java"/> + <has_text text="enterica"/> + <has_text text="1,4,[5],12"/> + <has_text text="1,4,[5],12:b:1,2"/> + <has_text text="FAIL"/> + <has_text text="Y"/> + <has_n_columns n="24"/> + </assert_contents> + </output> + <output + name="alleles_output" + value="alleles-output-13-1101.json" + ftype="json" + compare="sim_size"> + <assert_contents> + <has_text text="13-1101-Paratyphi_B"/> + <has_text text="NZ_AOXE01000059.1"/> + </assert_contents> + </output> + </test> + </tests> + <help> + <![CDATA[ + +Usage:: + + usage: sistr_cmd [-h] [-i fasta_path genome_name] [-f OUTPUT_FORMAT] + [-o OUTPUT_PREDICTION] [-M] [-p CGMLST_PROFILES] + [-n NOVEL_ALLELES] [-a ALLELES_OUTPUT] [-T TMP_DIR] [-K] + [--use-full-cgmlst-db] [--no-cgmlst] [-m] [--qc] [-t THREADS] + [-v] [-V] + [F [F ...]] + + SISTR (Salmonella In Silico Typing Resource) Command-line Tool + ============================================================== + Serovar predictions from whole-genome sequence assemblies by determination of antigen gene and cgMLST gene alleles using BLAST. + + Note about using the "--use-full-cgmlst-db" flag: + The "centroid" allele database is ~10% the size of the full set so analysis is much quicker with the "centroid" vs "full" set of alleles. + Results between 2 cgMLST allele sets should not differ. + + If you find this program useful in your research, please cite as: + The Salmonella In Silico Typing Resource (SISTR): an open web-accessible tool for rapidly typing and subtyping draft Salmonella genome assemblies. + Catherine Yoshida, Peter Kruczkiewicz, Chad R. Laing, Erika J. Lingohr, Victor P.J. Gannon, John H.E. Nash, Eduardo N. Taboada. + PLoS ONE 11(1): e0147101. doi: 10.1371/journal.pone.0147101 + + positional arguments: + F Input genome FASTA file + + optional arguments: + -h, --help show this help message and exit + -i fasta_path genome_name, --input-fasta-genome-name fasta_path genome_name + fasta file path to genome name pair + -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT + Output format (json, csv, pickle) + -o OUTPUT_PREDICTION, --output-prediction OUTPUT_PREDICTION + SISTR serovar prediction output path + -M, --more-results Output more detailed results (-M) and all antigen + search blastn results (-MM) + -p CGMLST_PROFILES, --cgmlst-profiles CGMLST_PROFILES + Output CSV file destination for cgMLST allelic + profiles + -n NOVEL_ALLELES, --novel-alleles NOVEL_ALLELES + Output FASTA file destination of novel cgMLST alleles + from input genomes + -a ALLELES_OUTPUT, --alleles-output ALLELES_OUTPUT + Output path of allele sequences and info to JSON + -T TMP_DIR, --tmp-dir TMP_DIR + Base temporary working directory for intermediate + analysis files. + -K, --keep-tmp Keep temporary analysis files. + --use-full-cgmlst-db Use the full set of cgMLST alleles which can include + highly similar alleles. By default the smaller + "centroid" alleles or representative alleles are used + for each marker. + --no-cgmlst Do not run cgMLST serovar prediction + -m, --run-mash Determine Mash MinHash genomic distances to Salmonella + genomes with trusted serovar designations. Mash binary + must be in accessible via $PATH (e.g. /usr/bin). + --qc Perform basic QC to provide level of confidence in + serovar prediction results. + -t THREADS, --threads THREADS + Number of parallel threads to run sistr_cmd analysis. + -l [LIST_OF_SEROVARS], --list-of-serovars [LIST_OF_SEROVARS] + A path to a single column text file containing list of + serovars to check SISTR serovar prediction against. + Result reported in the "predicted_serovar_in_list" + field as Y (present) or N (absent) value. + -v, --verbose Logging verbosity level (-v == show warnings; -vvv == + show debug info) + -V, --version show program's version number and exit +]]> + + </help> + <citations> + <!-- Citation for SISTR PLOS ONE paper --> + <citation type="doi">10.1371/journal.pone.0147101</citation> + </citations> +</tool>
