view bcftools_annotate.xml @ 17:28647eee857d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit ccc967cdce26379110361f8dea521bade23f8612"
author iuc
date Tue, 08 Mar 2022 17:26:56 +0000
parents a974b56e4c43
children 65d418d476a5
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy1">
    <description>Annotate and edit VCF/BCF files</description>
    <macros>
        <token name="@EXECUTABLE@">annotate</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $annotation_file = None
#set $annotation_hdr = None
#set $section = $sec_annofile
#if $section.annofile.anno_fmt == 'vcf':
  #if $section.annofile.annotations.is_of_type('vcf')
    #set $annotation_file = 'annotations.vcf.gz'
    bgzip -c '$section.annofile.annotations' > $annotation_file &&
    bcftools index $annotation_file &&
  #elif $section.annofile.annotations.is_of_type('bcf')
    #set $annotation_file = 'annotations.bcf'
    ln -s '$section.annofile.annotations' $annotation_file &&
    #if $section.annofile.annotations.metadata.bcf_index:
      ln -s '${section.annofile.annotations.metadata.bcf_index}' ${annotation_file}.csi &&
    #else
      bcftools index $annotation_file &&
    #end if
  #end if
#elif $section.annofile.anno_fmt == 'tab':
  #if $section.annofile.annotations.is_of_type('bed')
    #set $annotation_file = 'annotations.bed.gz'
    bgzip -c '$section.annofile.annotations' > $annotation_file &&
    tabix -s 1 -b 2 -e 3 $annotation_file &&
  #else:
    #set $annotation_file = 'annotations.tab.gz'
    bgzip -c '$section.annofile.annotations' > $annotation_file &&
    tabix -s 1 -b 2 -e 2 $annotation_file &&
  #end if
  #if $section.annofile.header_file:
    #set $annotation_hdr = $section.annofile.header_file
  #elif $section.annofile.header_lines:
    #set $annotation_hdr = 'annotation.hdr'
    grep '^\#\#INFO' '${hdr_file}' > $annotation_hdr &&
  #end if
#end if
#set $section = $sec_restrict

bcftools @EXECUTABLE@
@PREPARE_REGIONS_FILE@

#set $section = $sec_annofile
@COLUMNS@
#if $annotation_file:
  --annotations '${annotation_file}'
#end if
#if $annotation_hdr:
    --header-lines '${annotation_hdr}'
#end if

#if $section.set_id:
  --set-id '${section.set_id}'
#end if
#if $section.mark_sites:
  --mark-sites '${section.mark_sites}'
#end if

#set $section = $sec_annotate
#if $section.rename_chrs:
  --rename-chrs '${section.rename_chrs}'
#end if
#if $section.remove:
  --remove '${section.remove}'
#end if

## Default section
#set $section = $sec_restrict
@INCLUDE@
@EXCLUDE@
@COLLAPSE@
@REGIONS@
@SAMPLES@

@OUTPUT_TYPE@
@THREADS@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'
]]>
    </command>
    <configfiles>
        <configfile name="hdr_file"><![CDATA[#slurp
#if $sec_annofile.annofile.anno_fmt == 'tab' and str($sec_annofile.annofile.header_lines) != '':
$sec_annofile.annofile.header_lines.__str__.strip()#slurp
#end if]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="macro_input" />
        <section name="sec_annofile" expanded="false" title="Add Annotations">
            <expand macro="macro_columns" />
            <conditional name="annofile">
                <param name="anno_fmt" type="select" label="Annotations File">
                    <option value="none">None</option>
                    <option value="vcf">From a VCF/BCF file</option>
                    <option value="tab">From a BED or tab-delimited file</option>
                </param>
                <when value="none"/>
                <when value="vcf">
                    <param name="annotations" type="data" format="vcf,bcf" label="Annotations VCF"/>
                </when>
                <when value="tab">
                    <param name="annotations" type="data" format="tabular,bed" label="Annotations">
                        <help><![CDATA[
                        BED, or a tab-delimited file with mandatory columns CHROM, POS (or, alternatively, FROM and TO),
                        optional columns REF and ALT, and arbitrary number of annotation columns.
                        Note that in case of tab-delimited file, the coordinates POS, FROM and TO are one-based and inclusive.
                        ]]></help>
                    </param>
                    <param name="header_file" type="data" format="txt" label="Header Lines File" optional="true" help="lines which should be appended to the VCF header" />
                    <param name="header_lines" type="text" area="true" label="Header Lines" optional="true" help="lines which should be appended to the VCF header" >
                        <help><![CDATA[
                        ##INFO=<ID=NUMERIC_TAG,Number=1,Type=Integer,Description="Example header line">
                        ##INFO=<ID=STRING_TAG,Number=1,Type=String,Description="Yet another header line">
                        ]]></help>
                        <sanitizer sanitize="False"/>
                    </param>
                </when>
            </conditional>
            <param argument="--mark-sites" type="text" value="" label="Mark Sites TAG"
                help="add INFO/TAG flag to sites which are (&quot;+&quot;) or are not (&quot;-&quot;) listed in the annotations file" />
            <param argument="--set-id" type="text" value="" optional="true" label="Set Id">
                <help>Assign ID on the fly using the given format.
                By default all existing IDs are replaced.
                If the format string is preceded by "+", only missing IDs will be set.
                For example: '%CHROM\_%POS\_%REF\_%FIRST_ALT'
                </help>
                <sanitizer sanitize="False"/>
                <validator type="regex" message="">^([+]?(%[A-Z_]+)(\\_%[A-Z_]+)*)?$</validator>
            </param>
        </section>
        <section name="sec_annotate" expanded="false" title="Change Annotations">
            <param argument="--remove" type="text" value="" label="Remove annotations" optional="true">
                <help><![CDATA[
                 List of annotations to remove.
                 Use "FILTER" to remove all filters or "FILTER/SomeFilter" to remove a specific filter.
                 Similarly, "INFO" can be used to remove all INFO tags and "FORMAT" to remove all FORMAT tags except GT.
                 To remove all INFO tags except "FOO" and "BAR", use "^INFO/FOO,INFO/BAR" (and similarly for FORMAT and FILTER).
                 "INFO" can be abbreviated to "INF" and "FORMAT" to "FMT".
                ]]></help>
                <validator type="regex" message="">^(\^?[A-Z]+(/\w+)?(,\^?[A-Z]+(/\w+)?)*)?$</validator>
            </param>
            <param argument="--rename-chrs" type="data" format="tabular" label="Rename CHROM" optional="true"
                   help="rename chromosomes according to the map in file, with old_name new_name pairs separated by whitespaces, each on a separate line." />
        </section>
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_include" />
            <expand macro="macro_exclude" />
            <expand macro="macro_collapse" />
            <expand macro="macro_restrict" />
            <expand macro="macro_samples" />
        </section>
        <expand macro="macro_select_output_type" />
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output"/>
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <param name="anno_fmt" value="tab" />
            <param name="annotations" value="annotate.tab" />
            <param name="header_file" value="annotate.hdr" />
            <param name="columns" value="CHROM,POS,REF,ALT,ID,QUAL,INFO/T_INT,INFO/T_FLOAT,INDEL" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="snp_3000150" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <section name="sec_annofile">
                <param name="columns" value="CHROM,FROM,TO,T_STR" />
                <conditional name="annofile">
                    <param name="anno_fmt" value="tab" />
                    <param name="annotations" value="annotate2.tab" />
                    <param name="header_file" value="annotate.hdr" />
                </conditional>
                <param name="set_id" value="%CHROM\_%POS\_%REF\_%FIRST_ALT" />
            </section>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="T_STR=region_3000150_3106154" />
                    <has_text text="1_3062915_GTTT_G" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots.vcf" />
            <param name="columns" value="STR,ID,QUAL,FILTER" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="idIndel" />
                    <has_text text="STR=testSNP" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots.bcf" />
            <param name="columns" value="STR,ID,QUAL,FILTER" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="idIndel" />
                    <has_text text="STR=testSNP" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate2.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots2.vcf" />
            <param name="columns" value="ID,QUAL,FILTER,INFO,FMT" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="q99" />
                    <has_text text="FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate2.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots2.vcf" />
            <param name="columns" value="ID,QUAL,FILTER,INFO,FMT" />
            <param name="samples" value="A" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="q11" />
                    <has_text text="FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate3.vcf" />
            <param name="anno_fmt" value="none" />
            <param name="remove" value="ID,QUAL,^FILTER/fltA,FILTER/fltB,^INFO/AA,INFO/BB,^FMT/GT,FMT/PL" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="fltY" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate3.vcf" />
            <param name="anno_fmt" value="none" />
            <param name="remove" value="FORMAT" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="GT:X:PL:Y:AA" />
                </assert_contents>
            </output>
        </test>

    </tests>
    <help><![CDATA[
==================================
 bcftools @EXECUTABLE@
==================================

Annotate and edit VCF/BCF files.

Examples:

    # Remove three fields
    bcftools annotate -x ID,INFO/DP,FORMAT/DP file.vcf.gz

    # Remove all INFO fields and all FORMAT fields except for GT and PL
    bcftools annotate -x INFO,^FORMAT/GT,FORMAT/PL file.vcf

    # Add ID, QUAL and INFO/TAG, not replacing TAG if already present
    bcftools annotate -a src.bcf -c ID,QUAL,+TAG dst.bcf

    # Carry over all INFO and FORMAT annotations except FORMAT/GT
    bcftools annotate -a src.bcf -c INFO,^FORMAT/GT dst.bcf

    # Annotate from a tab-delimited file with six columns (the fifth is ignored),
    # first indexing with tabix. The coordinates are 1-based.
    tabix -s1 -b2 -e2 annots.tab.gz
    bcftools annotate -a annots.tab.gz -h annots.hdr -c CHROM,POS,REF,ALT,-,TAG file.vcf

    # Annotate from a tab-delimited file with regions (1-based coordinates, inclusive)
    tabix -s1 -b2 -e3 annots.tab.gz
    bcftools annotate -a annots.tab.gz -h annots.hdr -c CHROM,FROM,TO,TAG inut.vcf

    # Annotate from a bed file (0-based coordinates, half-closed, half-open intervals)
    bcftools annotate -a annots.bed.gz -h annots.hdr -c CHROM,FROM,TO,TAG input.vcf

@REGIONS_HELP@
@EXPRESSIONS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>