changeset 2:e5ff71aed27b draft default tip

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/helixer commit 54445b041f46b8b3a8e25e90e84f034cdddd2ab1
author genouest
date Tue, 09 Jul 2024 13:09:15 +0000
parents 0fefe93b5268
children
files helixer.xml macros.xml test-data/test.h5
diffstat 2 files changed, 110 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/helixer.xml	Mon Sep 25 12:47:01 2023 +0000
+++ b/helixer.xml	Tue Jul 09 13:09:15 2024 +0000
@@ -1,61 +1,76 @@
 <?xml version="1.0"?>
-<tool id="helixer" name="Helixer" version="@TOOL_VERSION@" profile="21.05">
+<tool id="helixer" name="Helixer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
     <description>gene calling</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-
+    <xrefs>
+        <xref type="bio.tools">helixer</xref>
+    </xrefs>
     <requirements>
         <expand macro="requirements"/>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[
-
         /usr/local/bin/fetch_helixer_models.py &&
         Helixer.py
         --fasta-path '$input'
         --species '$species'
-        --lineage $lineage.lineages
         --gff-output-path '$output'
-
         --temporary-dir ./
 
-        --subsequence-length $lineage.subsequence_length
-        #if str($lineage.option_overlap.use_overlap) == "true":
-            --overlap-offset $lineage.option_overlap.overlap_offset
-            --overlap-core-length $lineage.option_overlap.overlap_core_length
+        #set default_subsequence_length = {"fungi": 21384, "land_plant": 106920, "invertebrate": 213840, "vertebrate": 213840}
+        #set subsequence_len = default_subsequence_length.get(str($lineages))
+        #set default_overlap_offset = {"fungi": 10692, "land_plant": 53460, "invertebrate": 106920, "vertebrate": 106920}
+        #set overlap_off = default_overlap_offset.get(str($lineages))
+        #set default_overlap_core_length = {"fungi": 16038, "land_plant": 80190, "invertebrate": 160380, "vertebrate": 160380}
+        #set overlap_core_len = default_overlap_core_length.get(str($lineages))
+
+        #if str($input_model) != "" and str($input_model) != "None":
+            --model-filepath '$input_model'
+            --subsequence-length '$subsequence_length'
+            
         #else:
-            --no-overlap
+            --lineage '$lineages'
+            #if str($subsequence_length) == "":
+                --subsequence-length '$subsequence_len'
+            #else:
+                --subsequence-length '$subsequence_length'
+            #end if
+
+            #if str($option_overlap.use_overlap) == "true":
+                #if str($option_overlap.overlap_offset) == "":
+                    --overlap-offset '$overlap_off'
+                #else:
+                    --overlap-offset '$overlap_offset'
+                #end if
+
+                #if str($option_overlap.overlap_core_length) == "":
+                    --overlap-core-length '$overlap_core_len'
+                #else:
+                    --overlap-core-length '$overlap_core_length'
+                #end if
+            #end if
         #end if
+
         --batch-size $size
         --window-size $post_processing.window_size
         --min-coding-length $post_processing.min_coding_length
         --edge-threshold $post_processing.edge_threshold
         --peak-threshold $post_processing.peak_threshold
+
     ]]></command>
 
     <inputs>
         <param argument="--fasta-path" name="input" type="data" format="fasta,fasta.gz" label="Genomic sequence"></param>
-        <conditional name="lineage">
-            <param argument="--lineage" name="lineages" type="select" label="Available lineages" help="Choose the model to use for the annotation">
-                <option value="land_plant">land plant</option>
-                <option value="vertebrate">vertebrate</option>
-                <option value="invertebrate">invertebrate</option>
-                <option value="fungi">fungi</option>
-            </param>
-            <when value="land_plant">
-                <expand macro="subseq" length="106920" offset="53460" offsetlen="80190" />
-            </when>
-            <when value="vertebrate">
-                <expand macro="subseq" length="213840" offset="106920" offsetlen="160380" />
-            </when>
-            <when value="invertebrate">
-                <expand macro="subseq" length="213840" offset="106920" offsetlen="160380" />
-            </when>
-            <when value="fungi">
-                <expand macro="subseq" length="21384" offset="10692" offsetlen="16038" />
-            </when>
-        </conditional>
+        <param argument="--model-filepath" optional="true" name="input_model" type="data" format="h5" label="Lineage model" help="Import your lineage model to replace the default lineage. Please enter a value for the --subsequence_length parameter."></param>
+        <param argument="--lineage" name="lineages" type="select" label="Available lineages" help="Choose the model to use for the annotation">
+            <option value="land_plant">land plant</option>
+            <option value="vertebrate">vertebrate</option>
+            <option value="invertebrate">invertebrate</option>
+            <option value="fungi">fungi</option>
+        </param>
+
         <param argument="--species" type="text" optional="true" label="Species name">
             <sanitizer invalid_char="">
                 <valid initial="string.letters,string.digits">
@@ -71,6 +86,20 @@
             <param argument="--peak-threshold" type="float" min="0" max="1" value="0.8" label="Peak threshold" help="This threshold specifies the minimum peak genic score required to accept the candidate region"/>
             <param argument="--min-coding-length" type="integer"  min="0" value="100" label="Minimum coding length"/>
         </section>
+
+        <param name="subsequence_length" type="text" label="Subsequence length: how much of the genome the Neural Network can see at once" help="If you do not want to keep the default value, please enter the new value. Default values are 21384 for fungi, 106920 for land plant, and 213840 for vertebrates and invertebrates"/>
+
+        <conditional name="option_overlap">
+            <param name="use_overlap" type="select" label="Enable overlapping step after predictions" help="This step combines predictions made on each subsequences, to improve quality near start and end of subsequences.">
+                <option value="true" selected="true">Yes</option>
+                <option value="false">No</option>
+            </param>
+            <when value="true">
+                <param name="overlap_offset" type="text"  label="Overlap offset: Smaller values may lead to better predictions but will take longer" help="If you do not want to keep the default value, please enter the new value. Default values are 10692 for fungi, 53460 for land plant, and 106920 for vertebrates and invertebrates."/>
+                <param name="overlap_core_length" type="text" label="Overlap core length: Predicted subsequences will be cut to this length to increase prediction quality. Smaller values may lead to better predictions but will take longer" help="If you do not want to keep the default value, please enter the new value. Default values are 16038 for fungi, 80190 for land plant, and 160380 for vertebrates and invertebrates."/>
+            </when>
+            <when value="false"/>
+        </conditional>
     </inputs>
 
     <outputs>
@@ -79,41 +108,65 @@
     </outputs>
     <tests>
         <test expect_num_outputs="1">
-            <!-- Test for species and land_plant-->
+            <!-- Test for species and land_plant-->        
             <param name="input" value="sequence.fasta"/>
+            <param name="lineages" value="land_plant"/>
             <param name="species" value="Arabidopsis"/>
-            <conditional name="lineage">
-                <param name="lineages" value="land_plant"/>
-            </conditional>
             <param name="size" value="8"/>
+            <param name="subsequence_length" value=""/>
+        <section name="post_processing">
+            <param name="window_size" value="100"/>
+            <param name="edge_threshold" value="0.1"/>
+            <param name="peak_threshold" value="0.8"/>
+            <param name="min_coding_length" value="100"/>
+        </section>
+        <conditional name="option_overlap">
+            <param name="use_overlap" value="true"/>
+            <param name="overlap_offset" value=""/>
+            <param name="overlap_core_length" value=""/>
+        </conditional>
             <output name="output" value="ouput_species.gff3" ftype="gff3" compare="sim_size" delta="100"/>
         </test>
+        
         <test expect_num_outputs="1">
-            <!-- Test for vertebrates-->
+        <!-- Test for vertebrates -->
             <param name="input" value="sequence.fasta"/>
-            <conditional name="lineage">
-                <param name="lineages" value="vertebrate"/>
-            </conditional>
+            <param name="lineages" value="vertebrate"/>
+            <param name="size" value="8"/>
             <param name="size" value="8"/>
+            <param name="subsequence_length" value=""/>
+        <section name="post_processing">
+            <param name="window_size" value="100"/>
+            <param name="edge_threshold" value="0.1"/>
+            <param name="peak_threshold" value="0.8"/>
+            <param name="min_coding_length" value="100"/>
+        </section>
+        <conditional name="option_overlap">
+            <param name="use_overlap" value="true"/>
+            <param name="overlap_offset" value=""/>
+            <param name="overlap_core_length" value=""/>
+        </conditional>
             <output name="output" value="vertebrate.gff3" ftype="gff3" lines_diff="2"/>
         </test>
-        <test expect_num_outputs="1">
-            <!-- Test for invertebrates-->
+
+        <test expect_failure="true">        
+        <!-- Test for model-filepath -->
             <param name="input" value="sequence.fasta"/>
-            <conditional name="lineage">
-                <param name="lineages" value="invertebrate"/>
-            </conditional>
+            <param name="input_model" value="test.h5"/>
+            <param name="size" value="8"/>
             <param name="size" value="8"/>
-            <output name="output" value="invertebrate.gff3" ftype="gff3" lines_diff="2"/>
-        </test>
-        <test expect_num_outputs="1">
-            <!-- Test for fungi-->
-            <param name="input" value="sequence.fasta"/>
-            <conditional name="lineage">
-                <param name="lineages" value="fungi"/>
-            </conditional>
-            <param name="size" value="8"/>
-            <output name="output" value="fungi.gff3" ftype="gff3" lines_diff="2"/>
+            <param name="subsequence_length" value="1000"/>
+        <section name="post_processing">
+            <param name="window_size" value="100"/>
+            <param name="edge_threshold" value="0.1"/>
+            <param name="peak_threshold" value="0.8"/>
+            <param name="min_coding_length" value="100"/>
+        </section>
+        <conditional name="option_overlap">
+            <param name="use_overlap" value="true"/>
+            <param name="overlap_offset" value=""/>
+            <param name="overlap_core_length" value=""/>
+        </conditional>
         </test>
     </tests>
 
--- a/macros.xml	Mon Sep 25 12:47:01 2023 +0000
+++ b/macros.xml	Tue Jul 09 13:09:15 2024 +0000
@@ -1,5 +1,7 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.3.2</token>
+    <token name="@TOOL_VERSION@">0.3.3</token>
+    <token name="@VERSION_SUFFIX@">1</token>
+
 
     <xml name="citation">
         <citations>
@@ -11,20 +13,6 @@
     <xml name="requirements">
         <container type="docker">gglyptodon/helixer-docker:helixer_v@TOOL_VERSION@_cuda_11.8.0-cudnn8</container>
     </xml>
-    
-    <xml name="subseq" tokens="length,offset,offsetlen">
-        <param argument="--subsequence-length" type="integer" min="0" max="213840" value="@LENGTH@" label="Subsequence length: how much of the genome the Neural Network can see at once" help="Should ideally be comfortably longer than the typical gene. For genomes with large genes (>20kpb) it is recommended to increase this parameter."></param>
-        <conditional name="option_overlap">
-            <param name="use_overlap" type="select" label="Enable overlapping step after predictions" help="This step combines predictions made on each subsequences, to improve quality near start and end of subsequences">
-                <option value="true" selected="true">Yes</option>
-                <option value="false">No</option>
-            </param>
-            <when value="true">
-                <param argument="--overlap-offset" type="integer" min="0" value="@OFFSET@" label="Overlap offset" help="Smaller values may lead to better predictions but will take longer. The subsequence length should be evenly divisible by this value."/>
-                <param argument="--overlap-core-length" type="integer" min="0" value="@OFFSETLEN@" label="Overlap core length" help="Predicted subsequences will be cut to this length to increase prediction quality. Smaller values may lead to better predictions but will take longer. Has to be smaller than subsequence_length."/>
-            </when>
-            <when value="false"/>
-        </conditional>
-    </xml>
+
 </macros>