Mercurial > repos > iuc > megahit

--- a/megahit_wrapper.xml	Sun Jan 09 16:57:18 2022 +0000
+++ b/megahit_wrapper.xml	Thu Feb 13 19:36:06 2025 +0000
@@ -1,17 +1,20 @@
-<?xml version='1.0' encoding='utf-8'?>
-<tool id="megahit" name="MEGAHIT" version="@TOOL_VERSION@+galaxy0">
+<tool id="megahit" name="MEGAHIT" version="@TOOL_VERSION@+galaxy2">
     <description>for metagenomics assembly</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.2.9</token>
+    </macros>
     <xrefs>
         <xref type="bio.tools">megahit</xref>
     </xrefs>
-    <macros>
-        <token name="@TOOL_VERSION@">1.2.9</token>
-    </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">megahit</requirement>
     </requirements>
     <version_command>megahit --version</version_command>
     <command detect_errors="exit_code"><![CDATA[
+if [[ -n "\$GALAXY_MEMORY_MB" ]]; then
+    MEMORY="-m \$((GALAXY_MEMORY_MB * 1024))";
+fi;
+
 megahit
     --num-cpu-threads \${GALAXY_SLOTS:-4}
     #if $input_option.choice == 'paired'
@@ -52,7 +55,7 @@
     ${advanced_section.nolocal}
     ${advanced_section.kmin1pass}
     --min-contig-len '${output_section.min_contig_len}'
-&& cat megahit_out/log
+    \$MEMORY
     ]]></command>
     <inputs>
         <conditional name="input_option">
@@ -88,37 +91,38 @@
             </when>
         </conditional>
         <section name="basic_section" title="Basic assembly options" expanded="True">
-            <param name="min_count" argument="--min-count" type="integer" value="2" label="minimum multiplicity for filtering (k_min+1)-mers" help="(kmin+1)-mer with multiplicity lower than d (default 2, specified by --min-count option) will be discarded. You should be cautious to set d less than 2, which will lead to a much larger and noisy graph. We recommend using the default value 2 for metagenomics assembly. If you want to use MEGAHIT to do generic assemblies, please change this value according to the sequencing depth. (recommend --min-count 3 for >40x)."/>
+            <param argument="--min-count" type="integer" value="2" label="minimum multiplicity for filtering (k_min+1)-mers" help="(kmin+1)-mer with multiplicity lower than d (default 2, specified by --min-count option) will be discarded. You should be cautious to set d less than 2, which will lead to a much larger and noisy graph. We recommend using the default value 2 for metagenomics assembly. If you want to use MEGAHIT to do generic assemblies, please change this value according to the sequencing depth. (recommend --min-count 3 for >40x)."/>
             <conditional name="k_mer">
                 <param name="k_mer_method" type="select" label="K-mer specification method">
                     <option value="klist_method">Specify list</option>
                     <option value="klim_method">Specify min, max, and step values</option>
                 </param>
                 <when value="klist_method">
-                    <param name="k_list" argument="--k-list" type="text" value="21,29,39,59,79,99,119,141" label="Comma-separated list of kmer size" help="all must be odd, in the range 15-255, and with increments &lt;= 28"/>
+                    <param argument="--k-list" type="text" value="21,29,39,59,79,99,119,141" label="Comma-separated list of kmer size" help="all must be odd, in the range 15-255, and with increments &lt;= 28"/>
                 </when>
                 <when value="klim_method">
-                    <param name="k_min" argument="--k-min" type="integer" value="21" label="Minimum kmer size" max="255" help="Must be odd number. For ultra complex metagenomics data such as soil, a larger kmin, say 27, is recommended to reduce the complexity of the de Bruijn graph. Quality trimming is also recommended. For high-depth generic data, large --k-min (25 to 31) is recommended. Smaller --k-step, say 10, is more friendly to low-coverage datasets."/>
-                    <param name="k_max" argument="--k-max" type="integer" value="141" label="Maximum kmer size" max="255" help="must be odd number"/>
-                    <param name="k_step" argument="--k-step" type="integer" value="12" label="Increment of kmer size of each iteration" max="28" help="must be even number"/>
+                    <param argument="--k-min" type="integer" value="21" label="Minimum kmer size" max="255" help="Must be odd number. For ultra complex metagenomics data such as soil, a larger kmin, say 27, is recommended to reduce the complexity of the de Bruijn graph. Quality trimming is also recommended. For high-depth generic data, large --k-min (25 to 31) is recommended. Smaller --k-step, say 10, is more friendly to low-coverage datasets."/>
+                    <param argument="--k-max" type="integer" value="141" label="Maximum kmer size" max="255" help="must be odd number"/>
+                    <param argument="--k-step" type="integer" value="12" label="Increment of kmer size of each iteration" max="28" help="must be even number"/>
                 </when>
             </conditional>
         </section>
         <section name="advanced_section" title="Advanced assembly options" expanded="False">
             <param name="nomercy" type="boolean" checked="false" truevalue="--no-mercy" falsevalue="" label="Do not add mercy kmers" help="Mercy kmers are specially designed for metagenomics assembly to recover low coverage sequences. For generic dataset >= 30x, MEGAHIT may generate better results with no mercy kmers." />
-            <param name="bubble_level" argument="--bubble-level" type="integer" value="2" min="0" max="2" label="Intensity of bubble merging (0-2), 0 to disable" />
-            <param name="merge_level" argument="--merge-level" type="text" value="20,0.95" label="Merge complex bubbles of length" />
-            <param name="prune_level" argument="--prune-level" type="integer" value="2" min="0" max="3" label="Strength of low depth pruning" />
-            <param name="prune_depth" argument="--prune-depth" type="integer" value="2" min="0" label="Remove unitigs with avg kmer depth less than this value" />
-			<param name="disconnect_ratio" argument="--disconnect-ratio" type="float" value="0.1" label="Disconnect unitigs if its depth is less than this ratio times the total depth of itself and its siblings" />
-            <param name="low_local_ratio" argument="--low-local-ratio" type="float" value="0.2" label="Remove unitigs if its depth is less than this ratio times the average depth of the neighborhoods" />
-			<param name="cleaning_rounds" argument="--cleaning-rounds" type="integer" value="5" label="Number of rounds for graph cleanning" />
+            <param argument="--bubble-level" type="integer" value="2" min="0" max="2" label="Intensity of bubble merging (0-2), 0 to disable" />
+            <param argument="--merge-level" type="text" value="20,0.95" label="Merge complex bubbles of length" />
+            <param argument="--prune-level" type="integer" value="2" min="0" max="3" label="Strength of low depth pruning" />
+            <param argument="--prune-depth" type="integer" value="2" min="0" label="Remove unitigs with avg kmer depth less than this value" />
+			<param argument="--disconnect-ratio" type="float" value="0.1" label="Disconnect unitigs if its depth is less than this ratio times the total depth of itself and its siblings" />
+            <param argument="--low-local-ratio" type="float" value="0.2" label="Remove unitigs if its depth is less than this ratio times the average depth of the neighborhoods" />
+			<param argument="--cleaning-rounds" type="integer" value="5" label="Number of rounds for graph cleanning" />
             <param name="nolocal" type="boolean" checked="false" truevalue="--no-local" falsevalue="" label="Disable local assembly" />
             <param name="kmin1pass" type="boolean" checked="false" truevalue="--kmin-1pass" falsevalue="" label="Use 1pass mode to build SdBG of k_min" />
         </section>
         <section name="output_section" title="Output options" expanded="True">
-            <param name="min_contig_len" argument="--min-contig-len" type="integer" value="200" label="Minimum length of contigs to output" />
+            <param argument="--min-contig-len" type="integer" value="200" label="Minimum length of contigs to output" />
             <param name="show_intermediate_contigs" type="boolean" checked="false" label="Return intermediate contigs?"/>
+            <param name="log_file" type="boolean" label="Generate a log file" help="Generates a log file, which can be used as MultiQC input"/>
         </section>
     </inputs>
     <outputs>
@@ -127,27 +131,39 @@
             <filter>output_section['show_intermediate_contigs']</filter>
             <discover_datasets pattern="(?P&lt;designation&gt;.*\d)\.contigs\.fa$" ext="fasta" directory="megahit_out/intermediate_contigs" />
         </collection>
+        <data name="log_output" format="txt" label="${tool.name} on ${on_string} (Log file)" from_work_dir="megahit_out/log">
+            <filter>output_section['log_file']</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="2">
             <conditional name="input_option">
                 <param name="choice" value="single"/>
                 <param name="single_files" value="refExample.fa" ftype="fasta"/>
             </conditional>
+            <param name="log_file" value="true"/>
             <output name="output">
             <assert_contents>
-              <has_line_matching expression=">k21_0 flag=3 multi=1.0486 len=576" />
+              <has_line_matching expression=">k21_0 flag=3 multi=1.0505 len=576" />
             </assert_contents>
             </output>
+            <output name="log_output">
+                <assert_contents>
+                    <has_text text="- MEGAHIT v"/>
+                    <has_text text="- Build graph for k = 21"/>
+                    <has_text text="- Assemble contigs from SdBG for k = 21"/>
+                    <has_text text="- Local assembly for k = 21"/>
+                </assert_contents>
+            </output>
         </test>
-        <test>
+        <test expect_num_outputs="1">
              <conditional name="input_option">
                 <param name="choice" value="interleaved"/>
                 <param name="interleaved_file" value="interleaved-fq.fa"/>
             </conditional>
             <output name="output" file="interleaved_result.fa"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <conditional name="input_option">
                 <param name="choice" value="paired"/>
                 <param name="fastq_input1" value="paired-fq1.fa"/>
@@ -155,7 +171,7 @@
             </conditional>
             <output name="output" file="paired_result.fa"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <conditional name="input_option">
                 <param name="choice" value="paired_collection"/>
                 <conditional name="batchmode">