Mercurial > repos > bgruening > tiara

diff tiara.xml @ 0:928a2cd7b1dd draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tiara commit eb21d389502b053adf569eb53a0b4cc9e0864fbe
author: bgruening
date: Fri, 18 Oct 2024 11:50:19 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tiara.xml	Fri Oct 18 11:50:19 2024 +0000
@@ -0,0 +1,149 @@
+<tool id="tiara" name="tiara" version="@TOOL_VERSION@+galaxy1" profile="21.05">
+    <description>Deep-learning-based approach for identification of eukaryotic sequences in the metagenomic data </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[ 
+        mkdir ./results/ &&
+        tiara 
+        -t \${GALAXY_SLOTS:-4} 
+        -i '$input' 
+        -o ./results/main_result.txt
+
+        #if $probabilities
+            --pr '$probabilities'
+        #end if
+        #if $min_len
+            -m '$min_len'
+        #end if
+        #if $cutoff_stage1
+            -p $cutoff_stage1
+            #if $cutoff_stage2
+            $cutoff_stage2
+            #end if
+        #end if
+        #if $advanced_options.advance.customize_kmer_length == 'customize'
+            --k1 $advanced_options.advance.first_stage_kmer
+            --k2 $advanced_options.advance.second_stage_kmer
+        #end if
+        #if $taxonomy_filter
+            --tf #for $tf in $taxonomy_filter
+                $tf
+            #end for
+            #for $tf in $taxonomy_filter
+              && ls -l ./results/
+              &&  mv ./results/${tf}*.dat ./results/${tf}.fasta
+            #end for
+        #end if
+
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="fasta" label="input fasta,fasta.gz file"/>
+        <param name="taxonomy_filter" type="select" multiple="true" optional="true" label="Write sequences to fasta,fasta.gz files specified in the arguments to this option." help="all refers to all classes present in input fasta (to separate fasta files).">
+            <option value="mit">mitochondria</option>
+            <option value="pla">plastid</option>
+            <option value="bac">bacteria</option>
+            <option value="arc">archea</option>
+            <option value="euk">eukarya</option>
+        </param>
+        <param argument="probabilities" type="boolean" truevalue="--pr" falsevalue="" checked="false" label="Add probabilities of individual classes for each sequence."/>
+        <param argument="min_len" type="integer" value="3000" min="1000" optional="true" label="Minimum length of a sequence. Default: 3000 bp." help="Specify the desired minimum length in base pairs.Default value is 3000 bp and we do not recommend classifying sequences shorter than 1000 bp. "/>
+        <param argument="cutoff_stage1" type="float" value="0.65" min="0.5" max="1" optional="true" label="Probability threshold for the first stage." help="Probability threshold needed for classification in the first stage. Default: 0.65." />
+        <param argument="cutoff_stage2" type="float" value="0.65" min="0.5" max="1" optional="true" label="Probability threshold for the second stage." help="Probability threshold needed for classification in the second stage. Default: 0.65." /> 
+        <section name="advanced_options" title="k-mer" expanded="true">
+            <conditional name="advance">
+                <param argument="customize_kmer_length" type="select" label="Advanced options">
+                    <option value="default_options">No, Use param defaults</option>
+                    <option value="customize">Yes, See full parameter list</option>
+                </param>
+                <when value="customize">
+                    <param argument="first_stage_kmer" type="select" label="Select k-mer length used in the first stage of classification (Default: 6).">
+                        <option value="4">k-mer length 4</option>
+                        <option value="5">k-mer length 5</option>
+                        <option value="6" selected="True">default k-mer length</option>
+                    </param>
+                    <param argument="second_stage_kmer" type="select" label="k-mer length used in the second stage of classification (Default: 7).">
+                        <option value="4">k-mer length 4</option>
+                        <option value="5">k-mer length 5</option>
+                        <option value="6">k-mer length 6</option>
+                        <option value="7" selected="True">default k-mer length</option>
+                    </param>
+                </when>
+                <when value="default_options">
+                    <!-- Define actions or defaults for the default option if necessary -->
+                </when>
+            </conditional>
+        </section>
+    </inputs>
+    <outputs>
+        <collection name="output" type="list" label="${tool.name} on ${on_string}: classified sequences in txt and Fasta Output">
+            <discover_datasets pattern="__name_and_ext__"  ext="fasta,txt" directory="results" />
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" value="plast_fr.fasta.gz"/>
+            <param name="taxonomy_filter" value="pla"/>
+            <output_collection name="output" type="list">
+                <element name="main_result" file="main_result01.txt" ftype="txt"/>
+                <element name="pla" file="pla" ftype="fasta" />
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input" value="sample_all.fasta"/>
+            <param name="taxonomy_filter" value="euk,bac,arc"/>
+            <output_collection name="output" type="list">
+                <element name="arc" file="arc" ftype="fasta" />
+                <element name="bac" file="bac" ftype="fasta" />
+                <element name="euk" file="euk" ftype="fasta" />
+                <element name="main_result" file="main_result02.txt" ftype="txt" />
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input" value="eukarya_fr.fasta"/>
+            <param name="taxonomy_filter" value="euk"/>
+            <param name="min_len" value="5000"/>
+            <param name="cutoff_stage1" value="0.65"/>
+            <param name="cutoff_stage2" value="0.60"/>
+            <param name="probabilities" value="true"/>
+            <output_collection name="output" type="list">
+                <element name="euk" file="euk" ftype="fasta" />
+                <element name="main_result" file="main_result03.txt" ftype="txt" />
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+What it does
+============
+Tiara is a Deep-learning-based approach for identification of eukaryotic sequences in the metagenomic data powered by PyTorch.
+
+How it works
+============
+The sequences are classified in two stages:
+
+First Stage:
+Input: Sequences are classified into classes: archaea, bacteria, prokarya, eukarya, organelle, and unknown.
+Output: Classifications for each sequence into one of the above classes.
+
+Second Stage:
+Input: Sequences labeled as organelle from the first stage.
+Output: Further classification into mitochondria, plastid, or unknown.
+
+Required Inputs
+===============
+The primary input for Tiara is metagenomic sequence data that needs classification.
+
+Generated Outputs
+=================
+The output will be the sequences categorized into specific classes as described above.
+
+Additional Resources
+====================
+For a more comprehensive understanding of tiara and detailed usage instructions, please visit the tiara GitHub repository:
+tiara GitHub Repository: [https://github.com/ibe-uw/tiara]
+        
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
author	bgruening
date	Fri, 18 Oct 2024 11:50:19 +0000
parents
children