diff hal_halLodExtract.xml @ 0:d398af42f54a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools commit 6244b9d15a5ad97ae20191e2f8fbafe2050c3cac
author iuc
date Fri, 06 Feb 2026 10:32:18 +0000
parents
children b95d65e23448
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hal_halLodExtract.xml	Fri Feb 06 10:32:18 2026 +0000
@@ -0,0 +1,122 @@
+<tool id="hal_hallodextract" name="halLodExtract" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>generates a new HAL file at a coarser Level of Detail by interpolation</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/> 
+    <expand macro="stdio"/>
+    <command detect_errors="aggressive"><![CDATA[
+        halLodExtract
+            #if $backend.format == 'mmap':
+                --format 'mmap'
+                --mmapFileSize $backend.mmapFileSize
+            #end if
+            #if $root:
+                --root '$root'
+            #end if
+            #if $outTree:
+                --outTree '$outTree'
+            #end if
+            --probeFrac $probeFrac
+            --minSeqFrac $minSeqFrac
+            $keepSequences
+            $allSequences
+            '$input_hal' '$output_file' $scale
+    ]]></command>
+    <inputs>
+        <expand macro="input_hal"/>
+        <param name="scale" type="float" min="1" optional="false" label="Scale factor for interpolation"/>
+        <expand macro="params_root_optional"/>
+        <param argument="--outTree" type="text" value="" optional="true" label="Newick tree for new HAL file" help="Specify the Newick tree for the new HAL file. Must contain only genomes from the input HAL file (input's tree used if empty)">
+            <expand macro="sanitizer_default"/>
+            <expand macro="validator_newick"/>
+        </param>
+        <param argument="--probeFrac" type="float" value="0.035" min="0" label="Fraction of bases in step-interval" help="Fraction of bases in step-interval to sample while looking for most aligned column"/>
+        <param argument="--minSeqFrac" type="float" value="0.5" min="0" label="Minumum sequence length to sample (as fraction)" help="Minumum sequence length to sample as fraction of step size: i.e. sequences with length &lt;= floor(minSeqFrac * step) are ignored"/>
+        <param argument="--keepSequences" type="boolean" truevalue="--keepSequences" falsevalue="" checked="false" label="Keep sequences" help="Write the sequence strings to the output file"/>
+        <param argument="--allSequences" type="boolean" truevalue="--allSequences" falsevalue="" checked="false" label="All sequences" help="Sample all sequences (chromsomes / contigs / etc.) no matter how small they are. By default, small sequences may be skipped if they fall within the step size"/>
+        <expand macro="hal_backend_format"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="hal" label="${tool.name} on ${on_string}: New HAL"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <param name="scale" value="10"/>
+            <output name="output_file" ftype="hal">
+                <assert_contents>
+                    <has_size min="40000"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="tree ="/>
+                <has_text text="Graph:"/>
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <param name="scale" value="10"/>
+            <output name="output_file" ftype="hal">
+                <assert_contents>
+                    <has_size min="40000"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="tree ="/>
+                <has_text text="Graph:"/>
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <param name="scale" value="10"/>
+            <param name="root" value="Genome_0"/>
+            <param name="outTree" value="(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>
+            <output name="output_file" ftype="hal">
+                <assert_contents>
+                    <has_size min="40000"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="tree ="/>
+                <has_text text="Graph:"/>
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <param name="scale" value="10"/>
+            <param name="minSeqFrac" value="1"/>
+            <param name="keepSequences" value="true"/>
+            <param name="allSequences" value="true"/>
+            <output name="output_file" ftype="hal">
+                <assert_contents>
+                    <has_size min="50000"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="tree ="/>
+                <has_text text="Graph:"/>
+            </assert_stdout>
+        </test>
+    </tests>
+    <help><![CDATA[
+halLodExtract generates a reduced resolution HAL file as output from an input HAL file by sampling the alignment at a coarser Level of Detail by interpolation. 
+It needs a scale value that controls how the data is downsampled. 
+The tool can keep or drop short sequences, adjust how sequences are filtered relative to the step size, and optionally retain sequence strings. 
+Choose a different output tree, set a root genome, and tune how many bases are probed when selecting representative columns.
+
+Use it to create a lighter HAL file for visualization, fast browsing, or downstream analyses that do not require full-resolution data.
+
+-----
+
+.. class:: warningmark
+
+Running the tool on a HAL file in mmap format may fail, while the HDF5 format can run successfully. It is recommended to convert the input to HDF5 format first using halExtract.
+
+.. class:: warningmark
+
+At the moment, creating a HAL file in mmap format is disabled. The creation in HDF5 format is enforced.
+    ]]></help>
+    <expand macro="citation"/>
+    <expand macro="creator"/>
+</tool>
\ No newline at end of file