view preseq_lc_extrap.xml @ 0:079f2ee9bd64 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/preseq commit 730a243f12a30a36277b58fb0d92891c81af4c6f
author iuc
date Wed, 11 Mar 2026 19:19:04 +0000
parents
children
line wrap: on
line source

<tool id="preseq_lc_extrap" name="Preseq lc_extrap" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Yield Extrapolation</description>
    
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>

    <command detect_errors="exit_code"><![CDATA[
        ## 1. File Preparation: Symlink the input BAM to a standard name
		ln -s '$input_bam' input.bam &&
        
		## 2. lc_extrap Analysis Execute
        preseq lc_extrap 
            -B input.bam
            $verbose
            -e $extrap_limit
            -s $step_size
            -o '$output_tsv' 
    ]]></command>

    <inputs>
        <expand macro="macro_input_bam" />
        <param argument="-e" name="extrap_limit" type="integer" value="10000000" label="Extrapolation Limit" help="Total number of reads to extrapolate to."/>
        <param argument="-s" name="step_size" type="integer" value="100000" label="Step Size" help="Step size for extrapolation."/>
        <expand macro="macro_verbose" />
    </inputs>

    <outputs>
        <data name="output_tsv" format="tsv"/>
    </outputs>

    <tests>
        <test expect_num_outputs="1">
            <param name="input_bam" value="test.bam" ftype="bam"/>
            <param name="extrap_limit" value="100000"/>
            <param name="step_size" value="10000"/>
            <output name="output_tsv" file="test_lc_extrap.tsv" ftype="tsv" lines_diff="4"/>
        </test>
    </tests>

    <help><![CDATA[
**Preseq: lc_extrap**

The `lc_extrap` tool from the Preseq package predicts the expected yield of distinct reads for future sequencing efforts. By analyzing the current complexity of a genomic library, it calculates an extrapolation curve. This helps answer the question: "If I sequence this library deeper, how many new, unique reads will I discover?"

**Input**

- A **coordinate-sorted** BAM file containing aligned reads.

**Output**

- A tabular (TSV) file containing the extrapolated yield curve. This file is suitable for plotting and can be integrated into MultiQC reports.
    ]]></help>
    
    <expand macro="citations"/>

</tool>