Mercurial > repos > devteam > bwa

<?xml version="1.0"?>
<tool id="bwa" name="Map with BWA" version="@VERSION@.3">
    <description>- map short reads (&lt; 100 bp) against reference genome</description>
    <macros>
        <import>read_group_macros.xml</import>
        <import>bwa_macros.xml</import>
        <token name="@command_options@">
#if str( $analysis_type.analysis_type_selector ) == "full":
    -n ${analysis_type.n}
    -o ${analysis_type.o}
    -e ${analysis_type.e}
    -i ${analysis_type.i}
    -d ${analysis_type.d}
    -l ${analysis_type.l}
    -k ${analysis_type.k}
    -m ${analysis_type.m}
    -M ${analysis_type.M}
    -O ${analysis_type.O}
    -E ${analysis_type.E}
    -R ${analysis_type.R}
    -q ${analysis_type.q}
    #if str( $analysis_type.B ):
        -B ${analysis_type.B}
    #end if
    #if str( $analysis_type.L ):
        -L ${analysis_type.L}
    #end if
#end if
        </token>
        <token name="@read_group_options@">
#if $use_rg:
    @set_rg_string@
    -r '$rg_string'
#end if
        </token>
        <xml name="advanced_pe_options">
            <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?"
                   help="Provides additional controls">
                <option value="set">Set</option>
                <option value="do_not_set" selected="True">Do not set</option>
            </param>
            <when value="set">
                <param name="a" type="integer" value="500"
                       label="Maximum insert size for a read pair to be considered being mapped properly."
                       help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/>
                <param name="o" type="integer" value="100000"
                       label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read."
                       help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/>
                <param name="n" type="integer" value="3"
                       label="Maximum number of alignments to output in the XA tag for reads paired properly."
                       help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
                <param name="N" type="integer" value="10"
                       label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)."
                       help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/>
                <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)"
                       help="sampe -c"/>
            </when>
            <when value="do_not_set">
                <!-- do nothing -->
            </when>
        </xml>
        <xml name="advanced_se_options">
            <param name="adv_se_options_selector" type="select" label="Set advanced single end options?"
                   help="Provides additional controls">
                <option value="set">Set</option>
                <option value="do_not_set" selected="True">Do not set</option>
            </param>
            <when value="set">
                <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag."
                       help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
            </when>
            <when value="do_not_set">
                <!-- do nothing -->
            </when>
        </xml>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command>
        <![CDATA[
@set_reference_fasta_filename@

## setup vars for rg handling...
@define_read_group_helpers@
#if str( $input_type.input_type_selector ) == "paired":
  #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2)
#elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]:
  #set $rg_auto_name = $read_group_name_default($input_type.bam_input)
#else
  #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1)
#end if
@set_use_rg_var@
@set_read_group_vars@

## Begin bwa command line

####### Fastq paired

#if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection":
    bwa aln
    -t "\${GALAXY_SLOTS:-1}"
    @command_options@
    '$reference_fasta_filename'
    #if str( $input_type.input_type_selector ) == "paired_collection":
        '${input_type.fastq_input1.forward}'
    #else
        '${input_type.fastq_input1}'
    #end if
    > first.sai &&

    bwa aln
    -t "\${GALAXY_SLOTS:-1}"
    @command_options@
    '${reference_fasta_filename}'
    #if str( $input_type.input_type_selector ) == "paired_collection":
        '${input_type.fastq_input1.reverse}'
    #else
        '${input_type.fastq_input2}'
    #end if
    > second.sai &&

    bwa sampe
    #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True":
        -a ${$input_type.adv_pe_options.a}
        -o ${$input_type.adv_pe_options.o}
        -n ${$input_type.adv_pe_options.n}
        -N ${$input_type.adv_pe_options.N}
    #end if
    @read_group_options@
    #if str( $input_type.input_type_selector ) == "paired_collection":
        '${reference_fasta_filename}' first.sai second.sai '${input_type.fastq_input1.forward}' '${input_type.fastq_input1.reverse}'
    #else:
        '${reference_fasta_filename}' first.sai second.sai '${input_type.fastq_input1}' '${input_type.fastq_input2}'
    #end if

  ## Fastq single

#elif str( $input_type.input_type_selector ) == "single":
  bwa aln
  -t "\${GALAXY_SLOTS:-1}"

  @command_options@

  '${reference_fasta_filename}'
  '${input_type.fastq_input1}'
  > first.sai &&

  bwa samse

  #if str( $input_type.adv_se_options.adv_se_options_selector) == "True":
    -n ${$input_type.adv_se_options.n}
  #end if
  @read_group_options@
  '${reference_fasta_filename}' first.sai '${input_type.fastq_input1}'

####### BAM paired

#elif str( $input_type.input_type_selector ) == "paired_bam":
    bwa aln
    -t "\${GALAXY_SLOTS:-1}"
    -b
    -1
    @command_options@
    '${reference_fasta_filename}'
    '${input_type.bam_input}'
    > first.sai &&

    bwa aln
    -t "\${GALAXY_SLOTS:-1}"
    -b
    -2
    @command_options@
    '${reference_fasta_filename}'
    '${input_type.bam_input}'
    > second.sai &&

    bwa sampe

    #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True":
        -a ${$input_type.adv_bam_pe_options.a}
        -o ${$input_type.adv_bam_pe_options.o}
        -n ${$input_type.adv_bam_pe_options.n}
        -N ${$input_type.adv_bam_pe_options.N}
    #end if
  @read_group_options@
  '${reference_fasta_filename}' first.sai second.sai '${input_type.bam_input}' '${input_type.bam_input}'

####### Fastq single   ------------ to do next

#elif str( $input_type.input_type_selector ) == "single_bam":
    bwa aln
    -t "\${GALAXY_SLOTS:-1}"
    -b
    -0

    @command_options@

    '${reference_fasta_filename}'
    '${input_type.bam_input}'
    > first.sai &&

    bwa samse

    #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True":
        -n ${$input_type.adv_bam_se_options.n}
    #end if
  @read_group_options@
  '${reference_fasta_filename}' first.sai '${input_type.bam_input}'
#end if

| samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '$bam_output'
]]>
    </command>

    <inputs>
        <expand macro="reference_source_conditional"/>
        <conditional name="input_type">
            <param name="input_type_selector" type="select" label="Select input type"
                   help="Select between fastq and bam datasets and between paired and single end data">
                <option value="paired">Paired fastq</option>
                <option value="paired_collection">Paired fastq collection</option>
                <option value="single">Single fastq</option>
                <option value="paired_bam">Paired BAM</option>
                <option value="single_bam">Single BAM</option>
            </param>
            <when value="paired">
                <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta"
                       label="Select first set of reads" help="Specify dataset with forward reads"/>
                <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta"
                       label="Select second set of reads" help="Specify dataset with reverse reads"/>
                <conditional name="adv_pe_options">
                    <expand macro="advanced_pe_options"/>
                </conditional>
            </when>
            <when value="paired_collection">
                <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection"
                       help="See help section for an explanation of dataset collections"/>
                <conditional name="adv_pe_options">
                    <expand macro="advanced_pe_options"/>
                </conditional>
            </when>
            <when value="single">
                <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta"
                       label="Select fastq dataset" help="Specify dataset with single reads"/>
                <conditional name="adv_se_options">
                    <expand macro="advanced_se_options"/>
                </conditional>
            </when>
            <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options -->
            <when value="paired_bam">
                <param name="bam_input" type="data" format="bam" label="Select BAM dataset"
                       help="Specify BAM dataset with paired reads"/>
                <conditional name="adv_bam_pe_options">
                    <expand macro="advanced_pe_options"/>
                </conditional>
            </when>
            <when value="single_bam">
                <param name="bam_input" type="data" format="bam" label="Select BAM dataset"
                       help="Specify BAM dataset with single reads"/>
                <conditional name="adv_bam_se_options">
                    <expand macro="advanced_se_options"/>
                </conditional>
            </when>
        </conditional>
        <expand macro="read_group_conditional"/>
        <conditional name="analysis_type">
            <param name="analysis_type_selector" type="select" label="Select analysis mode">
                <option value="illumina">1.Simple Illumina mode</option>
                <option value="full">2.Full list of options</option>
            </param>
            <when value="illumina">
                <!-- do nothing -->
            </when>
            <when value="full">
                <param name="n" type="text" value="0.04"
                       label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths."
                       help="aln -n; default=0.04"/>
                <param name="o" type="integer" value="1" label="maximum number or gap openings"
                       help="aln -o; default=1"/>
                <param name="e" type="integer" value="-1" label="maximum number of gap extensions"
                       help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/>
                <param name="i" type="integer" value="5"
                       label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/>
                <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion"
                       help="aln -d; default=10"/>
                <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/>
                <param name="k" type="integer" value="2" label="maximum differences in the seed"
                       help="aln -k; default=2"/>
                <param name="m" type="integer" value="2000000" label="maximum entries in the queue"
                       help="aln -m; default=2000000"/>
                <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/>
                <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/>
                <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/>
                <param name="R" type="integer" value="30"
                       label="stop searching when there are more than this value of equally best hits"
                       help="aln -R; default=30"/>
                <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp"
                       help="aln -q; default=0"/>
                <param name="B" type="integer" optional="True" label="length of barcode"
                       help="aln -B; optional parameter"/>
                <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions"
                       help="aln -L; optional parameter"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)">
            <expand macro="dbKeyActionsBwa"/>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
            <param name="input_type_selector" value="single"/>
            <param name="fastq_input1" ftype="fasta" value="bwa-mem-fasta1.fa"/>
            <param name="analysis_type_selector" value="illumina"/>
            <output name="bam_output" ftype="bam" file="bwa-aln-test1-fasta.bam" lines_diff="2"/>
        </test>
        <test>
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
            <param name="input_type_selector" value="paired"/>
            <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
            <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
            <param name="analysis_type_selector" value="illumina"/>
            <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/>
        </test>
        <test>
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
            <param name="input_type_selector" value="paired"/>
            <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
            <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
            <param name="analysis_type_selector" value="illumina"/>
            <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/>
        </test>
        <test>
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
            <param name="input_type_selector" value="paired_bam"/>
            <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/>
            <param name="analysis_type_selector" value="illumina"/>
            <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2"/>
        </test>
        <test>
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
            <param name="input_type_selector" value="paired"/>
            <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
            <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
            <param name="rg_selector" value="set"/>
            <param name="ID" value="rg1"/>
            <param name="PL" value="CAPILLARY"/>
            <param name="analysis_type_selector" value="illumina"/>
            <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2"/>
        </test>
    </tests>
    <help><![CDATA[
**What is does**

BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the
human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use
the separate BWA-MEM Galaxy tool.

This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool:

- **bwa aln** - actual mapper placing reads onto the reference sequence
- **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for
  single reads
- **bam sampe** - post-processor for paired reads


The Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM format,
which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).

-----

**Indices: Selecting reference genomes for BWA**

The Galaxy wrapper for BWA allows you to select between precomputed and user-defined indices for reference genomes
using the **Will you select a reference genome from your history or use a built-in index?** select box.

This select box has two options:

  1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select
     reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility
     and are ready to be mapped against.
  2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select
     reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your
     current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome
     from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run
     mapping with `bwa aln`.


If your genome of interest is not listed here you have two choices:

  1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index
     needs to be added
  2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history
     and build index** option.


@RG@

@info@
    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btp324</citation>
        <citation type="doi">10.1093/bioinformatics/btp698</citation>
    </citations>
</tool>
author	iuc
date	Fri, 22 Dec 2017 15:02:36 -0500
parents	cbc665adcde4
children	29299da15326