Mercurial > repos > bgruening > sambamba_markdup
changeset 0:72025d7986cc draft default tip
planemo upload for repository https://github.com/biod/sambamba commit 99bb4ed496a9cce79ab0a7e613230cf63a44d9f9
author | bgruening |
---|---|
date | Mon, 10 Feb 2025 19:12:03 +0000 |
parents | |
children | |
files | Sambamba_markdup.xml macros.xml test-data/1.bam test-data/1.coord.sorted.bam test-data/1.qname.sorted.bam test-data/2.bam test-data/2.markdup.bam test-data/2.markdup.txt test-data/2.markdup_removed.bam test-data/coordinate_sorted.bam test-data/coordinate_sorted_merged.bam test-data/qname_sorted.bam test-data/qname_sorted_merged.bam |
diffstat | 13 files changed, 89 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Sambamba_markdup.xml Mon Feb 10 19:12:03 2025 +0000 @@ -0,0 +1,59 @@ +<tool id="sambamba_markdup" name="Sambamba markdup" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="23.2" license="MIT"> + <description>Finds and marks duplicate reads in BAM files</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + sambamba markdup + $remove_duplicates + -l $compression_level + '${input_bam}' + '${output}' + --nthreads \${GALAXY_SLOTS:-4} + --tmpdir '\${TMPDIR:-.}' + #if $log_out + 2>&1 | tee '${log}' + #end if + ]]></command> + <inputs> + <!-- For deduplication the input bam needs to be coordinate sorted, hence the input file is expected to be coordinate sorted --> + <param name="input_bam" type="data" format="bam" label="Input BAM file"/> + <param argument="--compression-level" type="integer" value="5" min="0" max="9" label="Level of compression for merged BAM file, number from 0 to 9"/> + <param argument="--remove-duplicates" type="boolean" truevalue="-r" falsevalue="" label="Remove duplicates instead of just marking them"/> + <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> + </inputs> + <outputs> + <data name="output" format="bam" label="${tool.name} on ${on_string}: Output BAM"/> + <data name="log" format="txt" label="${tool.name} on ${on_string}: Output Log"> + <filter>log_out</filter> + </data> + </outputs> + <tests> + <!-- sambamba markdup without dropped reads --> + <test expect_num_outputs="2"> + <param name="input_bam" value="2.bam" ftype="bam"/> + <param name="compression_level" value="5"/> + <param name="remove_duplicates" value="false"/> + <param name="log_out" value="yes"/> + <output name="output" file="2.markdup.bam" ftype="bam" lines_diff="4"/> + <output name="log" file="2.markdup.txt" ftype="txt"/> + </test> + <!-- sambamba markdup with dropped reads --> + <test expect_num_outputs="1"> + <param name="input_bam" value="2.bam" ftype="bam"/> + <param name="compression_level" value="5"/> + <param name="remove_duplicates" value="true"/> + <param name="log_out" value="no"/> + <output name="output" file="2.markdup_removed.bam" ftype="bam" lines_diff="4"/> + </test> + </tests> + <help> +<![CDATA[ + +Sambamba_markdup marks (by default) or removes duplicate reads. To determine whether a read is a duplicate or not, the same criteria as in Picard are used. + +]]> + </help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Feb 10 19:12:03 2025 +0000 @@ -0,0 +1,17 @@ +<macros> + <token name="@TOOL_VERSION@">1.0.1</token> + <token name="@SUFFIX_VERSION@">2</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.0.1">sambamba</requirement> + <yield/> + </requirements> + </xml> + + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btv098</citation> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.markdup.txt Mon Feb 10 19:12:03 2025 +0000 @@ -0,0 +1,13 @@ + +sambamba 1.0.1 + by Artem Tarasov and Pjotr Prins (C) 2012-2023 + LDC 1.39.0 / DMD v2.109.1 / LLVM17.0.6 / bootstrap LDC - the LLVM D compiler (1.39.0) + +finding positions of the duplicate reads in the file... + sorted 0 end pairs + and 4 single ends (among them 0 unmatched pairs) + collecting indices of duplicate reads... done in 0 ms + found 3 duplicates +collected list of positions in 0 min 0 sec +marking duplicates... +collected list of positions in 0 min 0 sec