diff sistr_cmd.xml @ 0:1f6159dc3598 draft default tip

planemo upload for repository https://github.com/phac-nml/sistr_cmd commit 4a9d0e766264aab4b92d8971b70112f84e28e8cd
author nml
date Fri, 20 Dec 2024 18:33:49 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sistr_cmd.xml	Fri Dec 20 18:33:49 2024 +0000
@@ -0,0 +1,344 @@
+<tool id="sistr_cmd" name="sistr_cmd" version="@VERSION@+galaxy0">
+  <description>
+    Salmonella In Silico Typing Resource command-line tool for serovar prediction 
+  </description>
+  <macros>
+     <token name="@VERSION@">1.1.3</token>
+  </macros>
+  <xrefs>
+      <xref type="bio.tools">SISTR</xref>
+  </xrefs>
+  <requirements>
+    <requirement type="package" version="@VERSION@">sistr_cmd</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" />
+  </stdio>
+  <command><![CDATA[
+
+  sistr 
+    #for $fasta in $input_fastas
+      -i '$fasta' '${$fasta.element_identifier.replace("." + $fasta.ext, "")}'
+    #end for
+    -f $output_format
+    -o sistr-report.$output_format
+    -p $cgmlst_profiles
+    -n $novel_alleles
+    -a $alleles_output
+    $use_full_cgmlst_db
+    $no_cgmlst
+    $run_mash
+    $qc
+    #if $serovar_list.selection_mode == "default"
+      --list-of-serovars
+    #elif  $serovar_list.selection_mode == "custom"
+      --list-of-serovars  $serovar_list.custom_list_of_serovars_file
+    #end if
+    --threads "\${GALAXY_SLOTS:-1}"
+    -T "\${TMPDIR:-/tmp}"
+    $keep_tmp
+    $more_output
+    $verbosity
+  ]]></command>
+  <inputs>
+    <param 
+      name="input_fastas" 
+      type="data" 
+      label="Input Genome(s)" 
+      optional="false" 
+      multiple="true"
+      format="fasta"
+    />
+    <param 
+      name="output_format" 
+      type="select" 
+      label="Results output format"
+      multiple="false">
+      <option value="tab" selected="true">
+        Tabular (tab-delimited values)
+      </option>
+      <option value="csv">
+        CSV (Comma Separated Values)
+      </option>
+      <option value="json">
+        JSON (JavaScript Object Notation)
+      </option>
+    </param>
+    <param 
+      name="use_full_cgmlst_db"
+      type="boolean"
+      checked="false"
+      truevalue="--use-full-cgmlst-db"
+      falsevalue=""
+      label="Use full cgMLST database for serovar prediction. About 10X slower with equivalent results to reduced centroid allele database."
+      />
+    <param 
+      name="run_mash"
+      type="boolean"
+      checked="true"
+      truevalue="--run-mash"
+      falsevalue=""
+      label="Run Mash MinHash-based serovar prediction"
+      />
+    <param 
+      name="no_cgmlst"
+      type="boolean"
+      checked="false"
+      truevalue="--no-cgmlst"
+      falsevalue=""
+      label="Skip running cgMLST-based serovar prediction"
+      />
+    <param 
+      name="qc"
+      type="boolean"
+      checked="true"
+      truevalue="--qc"
+      falsevalue=""
+      label="Basic QC of results"
+    />
+    <conditional name="serovar_list">
+        <param label="Check predicted SISTR serovar against the list of serovars?" name="selection_mode" type="select">
+          <option value="none">Not selected</option>
+          <option selected="true" value="default">Use the default list</option>
+          <option value="custom">Use a custom list</option>
+        </param>
+        <when value="none"/>
+        <when value="default"/>
+        <when value="custom">
+          <param name="custom_list_of_serovars_file" type="data" format="text" optional="true" label="Custom list of serovars (optional)" 
+        help="if no file selected, the default one is used available at https://raw.githubusercontent.com/phac-nml/sistr_cmd/v1.1.3/sistr/data/serovar-list.txt"
+        />
+        </when>
+    </conditional>
+    <param 
+      name="more_output"
+      type="select" 
+      label="Results verbosity">
+      <option value="" selected="true">
+        Basic results only
+      </option>
+      <option value="-M">
+        Report top antigen BLAST results
+      </option>
+      <option value="-MM" >
+        Report all antigen BLAST results
+      </option>
+    </param>
+    <param 
+      name="keep_tmp"
+      type="boolean"
+      checked="false"
+      falsevalue=""
+      truevalue="--keep-tmp"
+      label="Keep temporary analysis directory"
+      />
+    <param 
+      name="verbosity"
+      type="select" 
+      label="Logging verbosity">
+      <option value="">
+        Error messages only
+      </option>
+      <option value="-v">
+        Show warning messages
+      </option>
+      <option value="-vv" selected="true">
+        Show info messages
+      </option>
+      <option value="-vvv">
+        Show debug messages
+      </option>
+    </param>
+  </inputs>
+  <outputs>
+    <data 
+      name="output_prediction_csv" 
+      format="csv" 
+      label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (csv)"
+      from_work_dir="sistr-report.csv">
+      <filter>output_format == "csv"</filter>
+    </data>
+    <data 
+      name="output_prediction_json" 
+      format="json" 
+      label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (json)"
+      from_work_dir="sistr-report.json">
+      <filter>output_format == "json"</filter>
+    </data>
+    <data 
+      name="output_prediction_tab" 
+      format="tabular" 
+      label="${tool.name} on ${input_fastas[0].element_identifier}: Main Report (tab)"
+      from_work_dir="sistr-report.tab">
+      <filter>output_format == "tab"</filter>
+    </data>
+    <data 
+      name="cgmlst_profiles" 
+      format="csv" 
+      label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (csv)" />
+    <data
+      name="novel_alleles"
+      format="fasta" 
+      label="${tool.name} on ${input_fastas[0].element_identifier}: Novel cgMLST alleles (fasta)" />
+    <data 
+      name="alleles_output"
+      format="json"
+      label="${tool.name} on ${input_fastas[0].element_identifier}: cgMLST allele match results (json)" />
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="input_fastas" value="AE014613-699860.fasta" ftype="fasta"/>
+      <param name="output_format" value="tab"/>
+      <param name="selection_mode" value="none"/>
+      <output 
+        name="cgmlst_profiles" 
+        value="cgmlst-profiles-AE014613.csv"
+        ftype="csv"
+        compare="diff">
+      </output>
+      <output name="output_prediction_tab" 
+        ftype="tabular"
+        value="sistr-results-AE014613-699860.tabular"
+        compare="sim_size"> 
+        <assert_contents>
+          <has_text text="AE014613-699860" />
+          <has_text text="Typhi" />
+          <has_text text="enterica" />
+          <has_text_matching expression="FAIL\t-\t-:-:-\t-:-:-"/>
+          <has_text text="2014-LET-0419"/>
+          <has_n_columns n="23" />
+        </assert_contents>
+      </output>
+      <output 
+        name="alleles_output" 
+        value="alleles-output-AE014613.json"
+        ftype="json"
+        compare="sim_size">
+        <assert_contents>
+          <has_text text="NZ_AOXE01000059.1"/>
+          <has_text text="Salmonella enterica subsp. enterica serovar Typhi Ty2"/>
+          <has_text text="NZ_AOXE01000033.1"/>
+          <has_text text="NZ_AOXE01000052.1"/>  
+        </assert_contents> 
+      </output>
+    </test>
+    <test expect_num_outputs="4">
+      <param name="input_fastas" value="13-1101-Paratyphi_B_varJava.fasta" ftype="fasta"/>
+      <param name="output_format" value="tab"/>
+      <output 
+        name="novel_alleles" 
+        value="novel-alleles-13-1101.fasta"
+        ftype="fasta"
+        compare="sim_size"/>
+      <output 
+        name="cgmlst_profiles" 
+        value="cgmlst-profiles-13-1101.csv"
+        ftype="csv"
+        compare="diff">
+      </output>
+      <output 
+        name="output_prediction_tab" 
+        value="sistr-results-13-1101.tabular"
+        ftype="tabular"
+        compare="sim_size">
+        <assert_contents>
+          <has_text text="13-1101-Paratyphi_B"/>
+          <has_text text="Paratyphi B var. Java"/>
+          <has_text text="enterica"/>
+          <has_text text="1,4,[5],12"/>
+          <has_text text="1,4,[5],12:b:1,2"/>
+          <has_text text="FAIL"/>
+          <has_text text="Y"/>
+          <has_n_columns n="24"/>
+        </assert_contents>
+      </output>
+      <output 
+        name="alleles_output" 
+        value="alleles-output-13-1101.json"
+        ftype="json"
+        compare="sim_size">
+        <assert_contents>
+          <has_text text="13-1101-Paratyphi_B"/>
+          <has_text text="NZ_AOXE01000059.1"/>
+        </assert_contents>
+      </output> 
+    </test>
+  </tests>
+  <help>
+  <![CDATA[
+
+Usage::
+
+    usage: sistr_cmd [-h] [-i fasta_path genome_name] [-f OUTPUT_FORMAT]
+                     [-o OUTPUT_PREDICTION] [-M] [-p CGMLST_PROFILES]
+                     [-n NOVEL_ALLELES] [-a ALLELES_OUTPUT] [-T TMP_DIR] [-K]
+                     [--use-full-cgmlst-db] [--no-cgmlst] [-m] [--qc] [-t THREADS]
+                     [-v] [-V]
+                     [F [F ...]]
+
+    SISTR (Salmonella In Silico Typing Resource) Command-line Tool
+    ==============================================================
+    Serovar predictions from whole-genome sequence assemblies by determination of antigen gene and cgMLST gene alleles using BLAST.
+
+    Note about using the "--use-full-cgmlst-db" flag:
+      The "centroid" allele database is ~10% the size of the full set so analysis is much quicker with the "centroid" vs "full" set of alleles. 
+      Results between 2 cgMLST allele sets should not differ.
+
+    If you find this program useful in your research, please cite as:
+      The Salmonella In Silico Typing Resource (SISTR): an open web-accessible tool for rapidly typing and subtyping draft Salmonella genome assemblies.
+      Catherine Yoshida, Peter Kruczkiewicz, Chad R. Laing, Erika J. Lingohr, Victor P.J. Gannon, John H.E. Nash, Eduardo N. Taboada.
+      PLoS ONE 11(1): e0147101. doi: 10.1371/journal.pone.0147101
+
+    positional arguments:
+      F                     Input genome FASTA file
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      -i fasta_path genome_name, --input-fasta-genome-name fasta_path genome_name
+                            fasta file path to genome name pair
+      -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT
+                            Output format (json, csv, pickle)
+      -o OUTPUT_PREDICTION, --output-prediction OUTPUT_PREDICTION
+                            SISTR serovar prediction output path
+      -M, --more-results    Output more detailed results (-M) and all antigen
+                            search blastn results (-MM)
+      -p CGMLST_PROFILES, --cgmlst-profiles CGMLST_PROFILES
+                            Output CSV file destination for cgMLST allelic
+                            profiles
+      -n NOVEL_ALLELES, --novel-alleles NOVEL_ALLELES
+                            Output FASTA file destination of novel cgMLST alleles
+                            from input genomes
+      -a ALLELES_OUTPUT, --alleles-output ALLELES_OUTPUT
+                            Output path of allele sequences and info to JSON
+      -T TMP_DIR, --tmp-dir TMP_DIR
+                            Base temporary working directory for intermediate
+                            analysis files.
+      -K, --keep-tmp        Keep temporary analysis files.
+      --use-full-cgmlst-db  Use the full set of cgMLST alleles which can include
+                            highly similar alleles. By default the smaller
+                            "centroid" alleles or representative alleles are used
+                            for each marker.
+      --no-cgmlst           Do not run cgMLST serovar prediction
+      -m, --run-mash        Determine Mash MinHash genomic distances to Salmonella
+                            genomes with trusted serovar designations. Mash binary
+                            must be in accessible via $PATH (e.g. /usr/bin).
+      --qc                  Perform basic QC to provide level of confidence in
+                            serovar prediction results.
+      -t THREADS, --threads THREADS
+                            Number of parallel threads to run sistr_cmd analysis.
+      -l [LIST_OF_SEROVARS], --list-of-serovars [LIST_OF_SEROVARS]
+                            A path to a single column text file containing list of
+                            serovars to check SISTR serovar prediction against.
+                            Result reported in the "predicted_serovar_in_list"
+                            field as Y (present) or N (absent) value.                      
+      -v, --verbose         Logging verbosity level (-v == show warnings; -vvv ==
+                            show debug info)
+      -V, --version         show program's version number and exit
+]]>
+  
+  </help>
+  <citations>
+    <!-- Citation for SISTR PLOS ONE paper -->
+    <citation type="doi">10.1371/journal.pone.0147101</citation>
+  </citations>
+</tool>