Mercurial > repos > bgruening > sambamba_markdup

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Sambamba_markdup.xml	Mon Feb 10 19:12:03 2025 +0000
@@ -0,0 +1,59 @@
+<tool id="sambamba_markdup" name="Sambamba markdup" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="23.2" license="MIT">
+    <description>Finds and marks duplicate reads in BAM files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        sambamba markdup
+               $remove_duplicates
+            -l $compression_level
+               '${input_bam}'
+               '${output}'
+            --nthreads \${GALAXY_SLOTS:-4}
+            --tmpdir '\${TMPDIR:-.}'
+            #if $log_out
+                2>&1 | tee '${log}'
+            #end if
+    ]]></command>
+    <inputs>
+        <!-- For deduplication the input bam needs to be coordinate sorted, hence the input file is expected to be coordinate sorted -->
+        <param name="input_bam" type="data" format="bam" label="Input BAM file"/>
+        <param argument="--compression-level" type="integer" value="5" min="0" max="9" label="Level of compression for merged BAM file, number from 0 to 9"/>
+        <param argument="--remove-duplicates" type="boolean" truevalue="-r" falsevalue="" label="Remove duplicates instead of just marking them"/>
+        <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="bam" label="${tool.name} on ${on_string}: Output BAM"/>
+        <data name="log" format="txt" label="${tool.name} on ${on_string}: Output Log">
+            <filter>log_out</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- sambamba markdup without dropped reads -->
+        <test expect_num_outputs="2">
+            <param name="input_bam" value="2.bam" ftype="bam"/>
+            <param name="compression_level" value="5"/>
+            <param name="remove_duplicates" value="false"/>
+            <param name="log_out" value="yes"/>
+            <output name="output" file="2.markdup.bam" ftype="bam" lines_diff="4"/>
+            <output name="log" file="2.markdup.txt" ftype="txt"/>
+        </test>
+        <!-- sambamba markdup with dropped reads -->
+        <test expect_num_outputs="1">
+            <param name="input_bam" value="2.bam" ftype="bam"/>
+            <param name="compression_level" value="5"/>
+            <param name="remove_duplicates" value="true"/>
+            <param name="log_out" value="no"/>
+            <output name="output" file="2.markdup_removed.bam" ftype="bam" lines_diff="4"/>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+Sambamba_markdup marks (by default) or removes duplicate reads. To determine whether a read is a duplicate or not, the same criteria as in Picard are used.
+
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Feb 10 19:12:03 2025 +0000
@@ -0,0 +1,17 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.0.1</token>
+    <token name="@SUFFIX_VERSION@">2</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.0.1">sambamba</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btv098</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
Binary file test-data/1.bam has changed
Binary file test-data/1.coord.sorted.bam has changed
Binary file test-data/1.qname.sorted.bam has changed
Binary file test-data/2.bam has changed
Binary file test-data/2.markdup.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.markdup.txt	Mon Feb 10 19:12:03 2025 +0000
@@ -0,0 +1,13 @@
+
+sambamba 1.0.1
+ by Artem Tarasov and Pjotr Prins (C) 2012-2023
+    LDC 1.39.0 / DMD v2.109.1 / LLVM17.0.6 / bootstrap LDC - the LLVM D compiler (1.39.0)
+
+finding positions of the duplicate reads in the file...
+  sorted 0 end pairs
+     and 4 single ends (among them 0 unmatched pairs)
+  collecting indices of duplicate reads...   done in 0 ms
+  found 3 duplicates
+collected list of positions in 0 min 0 sec
+marking duplicates...
+collected list of positions in 0 min 0 sec
Binary file test-data/2.markdup_removed.bam has changed
Binary file test-data/coordinate_sorted.bam has changed
Binary file test-data/coordinate_sorted_merged.bam has changed
Binary file test-data/qname_sorted.bam has changed
Binary file test-data/qname_sorted_merged.bam has changed