Mercurial > repos > devteam > samtools_split

<macros>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@TOOL_VERSION@">samtools</requirement>
            <yield/>
        </requirements>
    </xml>
    <!-- NOTE: for some tools only the version of the requirement but not the
        tool's version is controlled by the TOOL_VERSION token
        (because their version is ahead of the requirement version ..
         please only bump the minor version in order to let the requirement
         version catch up eventually). To find the tools check:
        `grep "<tool" . -r | grep -v VERSION_SUFFIX | cut -d":" -f 1` -->
    <token name="@TOOL_VERSION@">1.20</token>
    <token name="@VERSION_SUFFIX@">2</token>
    <token name="@PROFILE@">22.05</token>
    <token name="@FLAGS@"><![CDATA[
        #set $flags = 0
        #if $filter
            #set $flags = sum(map(int, str($filter).split(',')))
        #end if
    ]]></token>
    <token name="@PREPARE_IDX@"><![CDATA[
        ##prepare input and indices
        ln -s '$input' infile &&
        #if $input.is_of_type('bam'):
            #if str( $input.metadata.bam_index ) != "None":
                ln -s '${input.metadata.bam_index}' infile.bai &&
            #else:
                samtools index infile infile.bai &&
            #end if
        #elif $input.is_of_type('cram'):
            #if str( $input.metadata.cram_index ) != "None":
                ln -s '${input.metadata.cram_index}' infile.crai &&
            #else:
                samtools index infile infile.crai &&
            #end if
        #end if
    ]]></token>
    <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[
        ##prepare input and indices
        #for $i, $bam in enumerate( $input_bams ):
            ln -s '$bam' '${i}' &&
            #if $bam.is_of_type('bam'):
                #if str( $bam.metadata.bam_index ) != "None":
                    ln -s '${bam.metadata.bam_index}' '${i}.bai' &&
                #else:
                    samtools index '${i}' '${i}.bai' &&
                #end if
            #elif $bam.is_of_type('cram'):
                #if str( $bam.metadata.cram_index ) != "None":
                    ln -s '${bam.metadata.cram_index}' '${i}.crai' &&
                #else:
                    samtools index '${i}' '${i}.crai' &&
                #end if
            #end if
        #end for
    ]]></token>
    <token name="@PREPARE_FASTA_IDX@"><![CDATA[
        ## Make the user-selected reference genome, if any, accessible through
        ## a shell variable $reffa, index the reference if necessary, and make
        ## the fai-index file available through a shell variable $reffai.

        ## For a cached genome simply sets the shell variables to point to the
        ## genome file and its precalculated index.
        ## For a genome from the user's history, if that genome is a plain
        ## fasta file, the code creates a symlink in the pwd, creates the fai
        ## index file next to it, then sets the shell variables to point to the
        ## symlink and its index.
        ## For a fasta.gz dataset from the user's history, it tries the same,
        ## but this will only succeed if the file got compressed with bgzip.
        ## For a regular gzipped file samtools faidx will fail, in which case
        ## the code falls back to decompressing to plain fasta before
        ## reattempting the indexing.
        ## Indexing of a bgzipped file produces a regular fai index file *and*
        ## a compressed gzi file. The former is identical to the fai index of
        ## the uncompressed fasta.

        ## If the user has not selected a reference (it's an optional parameter
        ## in some samtools wrappers), a cheetah boolean use_ref is set to
        ## False to encode that fact.

        #set use_ref=True
        #if $addref_cond.addref_select == "history":
            #if $addref_cond.ref.is_of_type('fasta'):
                reffa="reference.fa" &&
                ln -s '${addref_cond.ref}' \$reffa &&
                samtools faidx \$reffa &&
            #else:
                reffa="reference.fa.gz" &&
                ln -s '${addref_cond.ref}' \$reffa &&
                {
                    samtools faidx \$reffa ||
                    {
                        echo "Failed to index compressed reference. Trying decompressed ..." 1>&2 &&
                        gzip -dc \$reffa > reference.fa &&
                        reffa="reference.fa" &&
                        samtools faidx \$reffa;
                    }
                } &&
            #end if
            reffai=\$reffa.fai &&
        #elif $addref_cond.addref_select == "cached":
            ## in case of cached the absolute path is used which allows to read
            ## a cram file  without specifying the reference
            reffa='${addref_cond.ref.fields.path}' &&
            reffai=\$reffa.fai &&
        #else
            #set use_ref=False
        #end if
    ]]></token>

    <xml name="optional_reference" token_help="" token_argument="">
        <conditional name="addref_cond">
            <param name="addref_select" type="select" label="Use a reference sequence">
                <help>@HELP@</help>
                <option value="no">No</option>
                <option value="history">Use a genome/index from the history</option>
                <option value="cached">Use a built-in genome</option>
            </param>
            <when value="no"/>
            <when value="history">
                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
            </when>
            <when value="cached">
                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
                    <options from_data_table="fasta_indexes">
                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
                    </options>
                    <validator  type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/>
                </param>
            </when>
        </conditional>
    </xml>
    <xml name="mandatory_reference" token_help="" token_argument="">
        <conditional name="addref_cond">
            <param name="addref_select" type="select" label="Use a reference sequence">
                <help>@HELP@</help>
                <option value="history">Use a genome/index from the history</option>
                <option value="cached">Use a built-in genome</option>
            </param>
            <when value="history">
                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
            </when>
            <when value="cached">
                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
                    <options from_data_table="fasta_indexes">
                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
                        <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
                    </options>
                </param>
            </when>
        </conditional>
    </xml>


    <token name="@ADDTHREADS@"><![CDATA[
        ##compute the number of ADDITIONAL threads to be used by samtools (-@)
        addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
    ]]></token>
    <token name="@ADDMEMORY@"><![CDATA[
        ##compute the number of memory available to samtools sort (-m)
        ##use only 75% of available: https://github.com/samtools/samtools/issues/831
        addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
        ((addmemory=addmemory*75/100)) &&
    ]]></token>
    <xml name="seed_input">
       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
    </xml>
    <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false">
        <option value="1" selected="@S1@">Read is paired</option>
        <option value="2" selected="@S2@">Read is mapped in a proper pair</option>
        <option value="4" selected="@S4@">Read is unmapped</option>
        <option value="8" selected="@S8@">Mate is unmapped</option>
        <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option>
        <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option>
        <option value="64" selected="@S64@">Read is the first in a pair</option>
        <option value="128" selected="@S128@">Read is the second in a pair</option>
        <option value="256" selected="@S256@">Alignment of the read is not primary</option>
        <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option>
        <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option>
        <option value="2048" selected="@S2048@">Alignment is supplementary</option>
    </xml>

    <!-- region specification macros and tokens for tools that allow the specification
         of region by bed file / space separated list of regions -->
    <token name="@REGIONS_FILE@"><![CDATA[
        #if $cond_region.select_region == 'tab':
            -t '$cond_region.targetregions'
        #end if
    ]]></token>
    <token name="@REGIONS_MANUAL@"><![CDATA[
        #if $cond_region.select_region == 'text':
            #for $i, $x in enumerate($cond_region.regions_repeat):
               '${x.region}'
            #end for
        #end if
    ]]></token>
    <xml name="regions_macro">
        <conditional name="cond_region">
            <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)">
                <option value="no" selected="True">No</option>
                <option value="text">Manualy specify regions</option>
                <option value="tab">Regions from tabular file</option>
            </param>
            <when value="no"/>
            <when value="text">
                <repeat name="regions_repeat" min="1" default="1" title="Regions">
                    <param name="region" type="text" label="region" help="format chr:from-to">
                        <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator>
                    </param>
                </repeat>
            </when>
            <when value="tab">
                <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" />
            </when>
        </conditional>
    </xml>

    <xml name="citations">
        <citations>
            <citation type="doi">10.1093/gigascience/giab008</citation>
        </citations>
    </xml>
    <xml name="version_command">
        <version_command><![CDATA[samtools 2>&1 | grep Version]]></version_command>
    </xml>
    <xml name="stdio">
        <stdio>
            <exit_code range="1:" level="fatal" description="Error" />
        </stdio>
    </xml>
</macros>
author	iuc
date	Sun, 08 Sep 2024 03:23:35 +0000
parents	eda9f7aa1d54
children