Mercurial > repos > devteam > bowtie2

<tool id="bowtie2" name="Bowtie2" version="@TOOL_VERSION@+galaxy0" profile="20.05">
    <description>- map reads against reference genome</description>
    <macros>
        <import>bowtie2_macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">bowtie2</requirement>
        <requirement type="package" version="1.11">samtools</requirement>
    </requirements>
    <version_command>bowtie2 --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
## Use pipefail if available to quit with first non-zero exit code
set -o | grep -q pipefail && set -o pipefail;
## prepare bowtie2 index
#set index_path = ''
#if str($reference_genome.source) == "history":
    bowtie2-build --threads \${GALAXY_SLOTS:-4} '$reference_genome.own_file' genome &&
    ln -s -f '$reference_genome.own_file' genome.fa &&
    #set index_path = 'genome'
#else:
    #set index_path = $reference_genome.index.fields.path
#end if

## Link in the input files, so bowtie2 can tell their type

#set compressed="False"
#set reads_are_fastq = True
#if str($library.type) == 'paired':
    #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read1 = "input_f.fastq.gz"
        #set compressed = "GZ"
    #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "input_f.fastq.bz2"
        #set compressed = "BZ2"
    #else if $library.input_1.is_of_type('fasta'):
        #set reads_are_fastq = False
        #set read1 = "input_f.fasta"
    #else:
        #set read1 = "input_f.fastq"
    #end if
    ln -f -s '${library.input_1}' ${read1} &&

    #if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read2 = "input_r.fastq.gz"
        #set compressed = "GZ"
    #else if $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read2 = "input_r.fastq.bz2"
        #set compressed = "BZ2"
    #else if $library.input_2.is_of_type('fasta'):
        #set read2 = "input_r.fasta"
    #else:
        #set read2 = "input_r.fastq"
    #end if
    ln -f -s '${library.input_2}' ${read2} &&
#else if str($library.type) == 'paired_collection':
    #if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read1 = "input_f.fastq.gz"
        #set compressed = "GZ"
    #else if $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "input_f.fastq.bz2"
        #set compressed = "BZ2"
    #else if $library.input_1.forward.is_of_type('fasta'):
        #set reads_are_fastq = False
        #set read1 = "input_f.fasta"
    #else:
        #set read1 = "input_f.fastq"
    #end if
    ln -s '${library.input_1.forward}' ${read1} &&

    #if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read2 = "input_r.fastq.gz"
        #set compressed = "GZ"
    #else if $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read2 = "input_r.fastq.bz2"
        #set compressed = "BZ2"
    #else if $library.input_1.reverse.is_of_type("fasta"):
        #set read2 = "input_r.fasta"
    #else:
        #set read2 = "input_r.fastq"
    #end if
    ln -s '${library.input_1.reverse}' ${read2} &&

#else if str($library.type) == 'paired_interleaved':
    #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read1 = "input_il.fastq.gz"
        #set compressed = "GZ"
    #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "input_il.fastq.bz2"
        #set compressed = "BZ2"
    #else if $library.input_1.is_of_type("fasta"):
        #set reads_are_fastq = False
        #set read1 = "input_il.fasta"
    #else:
        #set read1 = "input_il.fastq"
    #end if
    ln -s '${library.input_1}' ${read1} &&
#else:
    #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read1 = "input_f.fastq.gz"
        #set compressed = "GZ"
    #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "input_f.fastq.bz2"
        #set compressed = "BZ2"
    #else if $library.input_1.is_of_type("fasta"):
        #set reads_are_fastq = False
        #set read1 = "input_f.fasta"
    #else:
        #set read1 = "input_f.fastq"
    #end if
    ln -s '${library.input_1}' ${read1} &&
#end if

## execute bowtie2

bowtie2

## number of threads
-p \${GALAXY_SLOTS:-4}

## index file path
-x '$index_path'

## Input reads are fasta?
#if not reads_are_fastq:
    -f
#end if

## Input reads
#if str( $library.type ) == "single":
    -U '${read1}'
    #if str( $library.unaligned_file ) == "true":
        #if $compressed == "GZ":
            --un-gz '${output_unaligned_reads_l}'
        #else if $compressed == "BZ2":
            --un-bz2 '${output_unaligned_reads_l}'
        #else:
            --un '${output_unaligned_reads_l}'
        #end if
    #end if
    #if str( $library.aligned_file ) == "true":
        #if $compressed == "GZ":
            --al-gz '${output_aligned_reads_l}'
        #else if $compressed == "BZ2":
            --al-bz2 '${output_aligned_reads_l}'
        #else:
            --al '${output_aligned_reads_l}'
        #end if
    #end if

#elif str( $library.type ) == "paired_interleaved":
    --interleaved '${read1}'
    #if str( $library.unaligned_file ) == "true":
        #if $compressed == "GZ":
            --un-gz '${output_unaligned_reads_l}'
        #else if $compressed == "BZ2":
            --un-bz2 '${output_unaligned_reads_l}'
        #else:
            --un '${output_unaligned_reads_l}'
        #end if
    #end if
    #if str( $library.aligned_file ) == "true":
        #if $compressed == "GZ":
            --al-gz '${output_aligned_reads_l}'
        #else if $compressed == "BZ2":
            --al-bz2 '${output_aligned_reads_l}'
        #else:
            --al '${output_aligned_reads_l}'
        #end if
    #end if
#else:
    -1 '${read1}'
    -2 '${read2}'
    #if str( $library.unaligned_file ) == "true":
        #if $compressed == "GZ":
            --un-conc-gz '${output_unaligned_reads_l}'
        #else if $compressed == "BZ2":
            --un-conc-bz2 '${output_unaligned_reads_l}'
        #else:
            --un-conc '${output_unaligned_reads_l}'
        #end if
    #end if
    #if str( $library.aligned_file ) == "true":
        #if $compressed == "GZ":
            --al-conc-gz '${output_aligned_reads_l}'
        #else if $compressed == "BZ2":
            --al-conc-bz2 '${output_aligned_reads_l}'
        #else:
            --al-conc '${output_aligned_reads_l}'
        #end if
    #end if
    #if str( $library.paired_options.paired_options_selector ) == "yes":
        -I ${library.paired_options.I}
        -X ${library.paired_options.X}
        ${library.paired_options.fr_rf_ff}
        ${library.paired_options.no_mixed}
        ${library.paired_options.no_discordant}
        ${library.paired_options.dovetail}
        ${library.paired_options.no_contain}
        ${library.paired_options.no_overlap}
    #end if
#end if

## Read group information.
@define_read_group_helpers@
#if str( $library.type ) == "single":
    #set $rg_auto_name = $read_group_name_default($library.input_1)
#elif str( $library.type ) == "paired":
    #set $rg_auto_name = $read_group_name_default($library.input_1, $library.input_2)
#else
    #set $rg_auto_name = $read_group_name_default($library.input_1)
#end if
@set_use_rg_var@
@set_read_group_vars@
#if $use_rg
  $format_read_group("", $rg_id, '"', arg='--rg-id ')
  $format_read_group("SM:", $rg_sm, '"', arg='--rg ')
  $format_read_group("PL:", $rg_pl, '"', arg='--rg ')
  $format_read_group("LB:", $rg_lb, '"', arg='--rg ')
  $format_read_group("CN:", $rg_cn, '"', arg='--rg ')
  $format_read_group("DS:", $rg_ds, '"', arg='--rg ')
  $format_read_group("DT:", $rg_dt, '"', arg='--rg ')
  $format_read_group("FO:", $rg_fo, '"', arg='--rg ')
  $format_read_group("KS:", $rg_ks, '"', arg='--rg ')
  $format_read_group("PG:", $rg_pg, '"', arg='--rg ')
  $format_read_group("PI:", $rg_pi, '"', arg='--rg ')
  $format_read_group("PU:", $rg_pu, '"', arg='--rg ')
#end if

## Analysis type
#if ( str( $analysis_type.analysis_type_selector ) == "simple" and str( $analysis_type.presets ) != "no_presets" ):
    $analysis_type.presets
#elif str( $analysis_type.analysis_type_selector ) == "full":
    #if str( $analysis_type.input_options.input_options_selector ) == "yes":
        --skip ${analysis_type.input_options.skip}
        --qupto ${analysis_type.input_options.qupto}
        --trim5 ${analysis_type.input_options.trim5}
        --trim3 ${analysis_type.input_options.trim3}
        ${analysis_type.input_options.qv_encoding}
        ${analysis_type.input_options.solexa_quals}
        ${analysis_type.input_options.int_quals}
    #end if

    #if str( $analysis_type.alignment_options.alignment_options_selector ) == "yes":
        -N ${analysis_type.alignment_options.N}
        -L ${analysis_type.alignment_options.L}
        -i '${analysis_type.alignment_options.i}'
        --n-ceil '${analysis_type.alignment_options.n_ceil}'
        --dpad ${analysis_type.alignment_options.dpad}
        --gbar ${analysis_type.alignment_options.gbar}
        ${analysis_type.alignment_options.ignore_quals}
        ${analysis_type.alignment_options.nofw}
        ${analysis_type.alignment_options.norc}
        ${analysis_type.alignment_options.no_1mm_upfront}
        #if str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "end-to-end":
            --end-to-end
            --score-min '${analysis_type.alignment_options.align_mode.score_min_ete}'
        #elif str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local":
            --local
            --score-min '${analysis_type.alignment_options.align_mode.score_min_loc}'
        #end if
    #end if

    #if str( $analysis_type.scoring_options.scoring_options_selector ) == "yes":
        #if ( str( $analysis_type.alignment_options.alignment_options_selector ) == "yes" and str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local" ):
            --ma ${analysis_type.scoring_options.ma}
        #end if
        --mp '${analysis_type.scoring_options.mp}'
        --np ${analysis_type.scoring_options.np}
        --rdg ${analysis_type.scoring_options.rdg_read_open},${analysis_type.scoring_options.rdg_read_extend}
        --rfg ${analysis_type.scoring_options.rfg_ref_open},${analysis_type.scoring_options.rfg_ref_extend}
    #end if

    #if str( $analysis_type.reporting_options.reporting_options_selector ) == "k":
        -k ${analysis_type.reporting_options.k}
    #elif str( $analysis_type.reporting_options.reporting_options_selector ) == "a":
        -a
    #end if

    #if str( $analysis_type.effort_options.effort_options_selector ) == "yes":
        -D ${analysis_type.effort_options.D}
        -R ${analysis_type.effort_options.R}
    #end if
    #if str( $analysis_type.other_options.other_options_selector ) == "yes":
        ${analysis_type.other_options.non_deterministic}
        --seed ${analysis_type.other_options.seed}
    #end if

#elif str( $analysis_type.analysis_type_selector ) == "cline":
    ${analysis_type.cline}
#end if

#if str( $sam_options.sam_options_selector ) == "yes":
    ${sam_options.no_unal}
    ${sam_options.omit_sec_seq}
    ${sam_options.sam_no_qname_trunc}
    ${sam_options.xeq}
    ${sam_options.soft_clipped_unmapped_tlen}
    ${sam_options.reorder}
#end if

## mapping stats (i.e. stderr from bowtie2)
#if $save_mapping_stats
    2> '$mapping_stats'
#end if

## output file
#if str( $sam_options.sam_options_selector ) == "no" or (str( $sam_options.sam_opt ) == "false" and str($sam_options.reorder) == ''):
    | samtools sort --no-PG -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$output'
#else if $sam_options.reorder:
    | samtools view --no-PG -bS - -o '$output'
#else:
    > '$output'
#end if

## rename unaligned sequence files
#if ($library.type == "paired" or $library.type == "paired_collection") and $output_unaligned_reads_l and $output_unaligned_reads_r:
    #from os.path import splitext
    #set _unaligned_root, _unaligned_ext = splitext( str( $output_unaligned_reads_l ) )
    && mv '${ _unaligned_root }.1${_unaligned_ext}' '$output_unaligned_reads_l'
    && mv '${ _unaligned_root }.2${_unaligned_ext}' '$output_unaligned_reads_r'
#end if
#if ($library.type == "paired" or $library.type == "paired_collection") and $output_aligned_reads_l and $output_aligned_reads_r:
    #from os.path import splitext
    #set _aligned_root, _aligned_ext = splitext( str( $output_aligned_reads_l ) )
    && mv '${ _aligned_root }.1${_aligned_ext}' '$output_aligned_reads_l'
    && mv '${ _aligned_root }.2${_aligned_ext}' '$output_aligned_reads_r'
#end if

        ]]></command>
    <inputs>
        <!-- single/paired -->
        <conditional name="library">
            <param name="type" type="select" label="Is this single or paired library">
              <option value="single">Single-end</option>
              <option value="paired">Paired-end</option>
              <option value="paired_collection">Paired-end Dataset Collection</option>
              <option value="paired_interleaved">Paired-end data from single interleaved dataset</option>
            </param>

            <when value="single">
                <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
                <expand macro="align_unalign" />
            </when>
            <when value="paired">
                <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file #1" help="Must be of datatype &quot;fastqsanger&quot;or &quot;fasta&quot;" />
                <param name="input_2" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file #2" help="Must be of datatype &quot;fastqsanger&quot;or &quot;fasta&quot;" />
                <expand macro="align_unalign" />
                <expand macro="paired_end_options" />
            </when>
            <when value="paired_collection">
                <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
                <expand macro="align_unalign" />
                <expand macro="paired_end_options" />
            </when>
            <when value="paired_interleaved">
                <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="Interleaved FASTQ file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;. --interleaved"/>
                <expand macro="align_unalign" />
                <expand macro="paired_end_options" />
            </when>
        </conditional>
        <!-- reference genome -->
        <conditional name="reference_genome">
          <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
            <option value="indexed">Use a built-in genome index</option>
            <option value="history">Use a genome from the history and build index</option>
          </param>
          <when value="indexed">
            <param name="index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
              <options from_data_table="bowtie2_indexes">
                <filter type="sort_by" column="2"/>
                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
              </options>
            </param>
          </when>
          <when value="history">
            <param name="own_file" type="data" format="fasta" label="Select reference genome" />
          </when>
        </conditional>
        <!-- read group settings -->
        <expand macro="read_group_conditional" />
        <conditional name="analysis_type">
            <param name="analysis_type_selector" type="select" label="Select analysis mode">
                <option value="simple">1: Default setting only</option>
                <option value="full">2: Full parameter list</option>
            </param>
            <when value="simple">
                <param name="presets" type="select" display="radio" label="Do you want to use presets?" help="Allow selecting among several preset parameter settings. Choosing between these will result in dramatic changes in runtime. See help below to understand effects of these presets.">
                    <option value="no_presets" selected="True">No, just use defaults</option>
                    <option value="--very-fast">Very fast end-to-end (--very-fast)</option>
                    <option value="--fast">Fast end-to-end (--fast)</option>
                    <option value="--sensitive">Sensitive end-to-end (--sensitive)</option>
                    <option value="--very-sensitive">Very sensitive end-to-end (--very-sensitive)</option>
                    <option value="--very-fast-local">Very fast local (--very-fast-local)</option>
                    <option value="--fast-local">Fast local (--fast-local)</option>
                    <option value="--sensitive-local">Sensitive local (--sensitive-local)</option>
                    <option value="--very-sensitive-local">Very sensitive local (--very-sensitive-local)</option>
                </param>
            </when>
            <when value="full">
                <conditional name="input_options">
                    <param name="input_options_selector" type="select" label="Do you want to tweak input options?" help="See &quot;Input Options&quot; section of Help below for information">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <param name="skip" type="integer" min="0" value="0" label="Skip (i.e. do not align) the first that many reads or pairs in the input" help="-s/--skip; default=0"/>
                        <param name="qupto" type="integer" min="1" value="100000000" label="Align the first that many reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop" help="-u/--qupto; for default behavior (no limit) leave this value very large"/>
                        <param name="trim5" type="integer" min="0" value="0" label="Trim that many bases from 5' (left) end of each read before alignment" help="-5/--trim5; default=0"/>
                        <param name="trim3" type="integer" min="0" value="0" label="Trim that many bases from 3' (right) end of each read before alignment" help="-3/--trim3; default=0"/>
                        <param name="qv_encoding" type="select" display="radio" label="Select quality score encoding" help="See help below for more details">
                            <option value="--phred33" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option>
                            <option value="--phred64">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option>
                        </param>
                        <param name="solexa_quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="False" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default=False"/>
                        <param name="int_quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="False" label="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified" help="--int-quals; default=False"/>
                    </when>
                    <when value="no">
                        <!-- do nothing -->
                    </when>
                </conditional>
                <conditional name="alignment_options">
                    <param name="alignment_options_selector" type="select" label="Do you want to tweak alignment options?" help="See &quot;Alignment Options&quot; section of Help below for information">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <param name="N" type="integer" min="0" max="1" value="0" label="Set the number of mismatches to be allowed in a seed alignment during multiseed alignment (see `Multiseed alignment` section of help below)" help="-N; Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) but increases sensitivity; default=0"/>
                        <param name="L" type="integer" min="0" max="32" value="22" label="Sets the length of the seed substrings to align during multiseed alignment (see `Multiseed alignment` section of help below)" help="-L; Smaller values make alignment slower but more sensitive. Default=22"/>
                        <param name="i" type="text" value="S,1,1.15" label="Set a function governing the interval between seed substrings to use during multiseed alignment (see `Multiseed alignment` section of help below). Also see description of this option below in the help section" help="-i; Since it's best to use longer intervals for longer reads, this parameter sets the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. If the function returns a result less than 1, it is rounded up to 1. Default=`S,1,1.15`"/>
                        <param name="n_ceil" type="text" value="L,0,0.15" label="Set a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length" help="--n-ceil; For instance, specifying `L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, where x is the read length. Reads exceeding this ceiling are filtered out. Default=`L,0,0.15`"/>
                        <param name="dpad" type="integer" min="0" value="15" label="Pad dynamic programming problems by that many columns on either side to allow gaps" help="--dpad; default=15"/>
                        <param name="gbar" type="integer" min="0" value="4" label="Disallow gaps within that many positions of the beginning or end of the read" help="--gbar; default=4"/>
                        <param name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" label="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value" help="--ignore-quals; input is treated as though all quality values are high; default=False"/>
                        <param name="nofw" type="boolean" truevalue="--nofw" falsevalue="" label="Do not attempt to align unpaired reads to the forward (Watson) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/>
                        <param name="norc" type="boolean" truevalue="--norc" falsevalue="" label="Do not attempt to align unpaired reads to the reverse (Crick) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/>
                        <param name="no_1mm_upfront" type="boolean" truevalue="--no-1mm-upfront" falsevalue="" label="Prevent searching for 1-mismatch end-to-end alignments before using the multiseed heuristic (see `Multiseed alignment` section of help below)" help="--no-1mm-upfront; By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch end-to-end alignment for the read *before* trying the multiseed heuristic.  Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments.  However, this can lead to unexpected alignments when the user also sets options governing the multiseed heuristic, like `-L` and `-N`.  For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported.  This option prevents Bowtie 2 from searching for 1-mismatch end-to-end alignments before using the multiseed heuristic, which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed; Default=False"/>
                        <conditional name="align_mode">
                            <param name="align_mode_selector" type="select" display="radio" label="Select between `--local` and `--end-to-end` alignment modes" help="--local and --end-to-end; see help below for detailed explanation; default=--end-to-end">
                                <option value="end-to-end" selected="True">End to End (--end-to-end)</option>
                                <option value="local">Local (--local)</option>
                            </param>
                            <when value="end-to-end">
                                <param name="score_min_ete" type="text" value="L,-0.6,-0.6" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
                            </when>
                            <when value="local">
                                <param name="score_min_loc" type="text" value="G,20,8" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
                            </when>
                        </conditional>
                    </when>
                    <when value="no">
                        <!-- do nothing -->
                    </when>
                </conditional>
                <conditional name="scoring_options">
                    <param name="scoring_options_selector" type="select" label="Do you want to tweak scoring options?" help="See &quot;Scoring Options&quot; section of Help below for information">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <param name="ma" type="integer" value="2" label="Set the match bonus" help="--ma;  In `--local` mode match bonus is added to the alignment score for each position where a read character aligns to a reference character and the characters match. Not used in `--end-to-end` mode; Default=2"/>
                        <param name="mp" type="text" value="6,2" label="Set the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers" help="--mp; A number less than or equal to `MX` and greater than or equal to `MN` is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an `N`.  If `--ignore-quals` is specified, the number subtracted quals `MX`. Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` where Q is the Phred quality value; Default=6,2"/>
                        <param name="np" type="integer" value="1" label="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as `N`" help="--np; Default=1"/>
                        <param name="rdg_read_open" type="integer" value="5" label="Set the read gap opening penalty" help="--rdg; this is the first component of --rdg flag - opening penalty; Default=5"/>
                        <param name="rdg_read_extend" type="integer" value="3" label="Set the read gap extension penalty" help="--rdg; this is the second component of --rdg flag - extension penalty; Default=3"/>
                        <param name="rfg_ref_open" type="integer" value="5" label="Set the reference gap opening penalty" help="--rfg; this is the first component of --rfg flag - opening penalty; Default=5"/>
                        <param name="rfg_ref_extend" type="integer" value="3" label="Set the reference gap extension penalty" help="--rfg; this is the second component of --rfg flag - extension penalty; Default=3"/>
                    </when>
                    <when value="no">
                        <!-- do nothing -->
                    </when>
                </conditional>
                <conditional name="reporting_options">
                    <param name="reporting_options_selector" type="select" label="Do you want to use -a or -k options" help="Make sure you understand implications of setting -k and -a. See &quot;Reporting Options&quot; section of Help below for information on -k and -a options">
                        <option value="no" selected="true">No, do not set</option>
                        <option value="k">Set -k option and enter -k value</option>
                        <option value="a">Set -a option</option>
                    </param>
                    <when value="no">
                        <!-- do nothing -->
                    </when>
                    <when value="k">
                        <param name="k" type="integer" min="1" value="1" label="Searches for at most that many distinct, valid alignments for each read" help="-k; see detailed description of this option in the help section below. Note: Bowtie 2 is not designed with large values for `-k` in mind, and when aligning reads to long, repetitive genomes large `-k` can be very, very slow"/>
                    </when>
                    <when value="a">
                        <!-- do nothing here; set -a flag on the command line-->
                    </when>
                </conditional>
                <conditional name="effort_options">
                    <param name="effort_options_selector" type="select" label="Do you want to tweak effort options?" help="See &quot;Effort Options&quot; section of Help below for information">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <param name="D" type="integer" value="15" min="0" label="Attempt that many consecutive seed extension attempts to `fail` before Bowtie 2 moves on, using the alignments found so far" help="-D; A seed extension `fails` if it does not yield a new best or a new second-best alignment.  This limit is automatically adjusted up when -k or -a are specified. Default=15"/>
                        <param name="R" type="integer" value="2" min="0" label="Set the maximum number of times Bowtie 2 will `re-seed` reads with repetitive seeds" help="When `re-seeding`, Bowtie 2 simply chooses a new set of reads (same length, same number of mismatches allowed) at different offsets and searches for more alignments.  A read is considered to have repetitive seeds if the total number of seed hits divided by the number of seeds that aligned at least once is greater than 300.  Default=2"/>
                    </when>
                    <when value="no">
                        <!-- do nothing -->
                    </when>
                </conditional>
                <conditional name="other_options">
                    <param name="other_options_selector" type="select" label="Do you want to tweak Other Options?" help="See &quot;Other Options&quot; section of Help below for information">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <param name="seed" type="integer" value="0" min="0" label="Use this number as the seed for pseudo-random number generator" help="--seed; Default=0"/>
                        <param name="non_deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="--non-deterministic; see Help below for explanation of this option; default=False"/>
                    </when>
                    <when value="no">
                        <!-- do nothing -->
                    </when>
                </conditional>
            </when>
        </conditional>
        <conditional name="sam_options">
            <param name="sam_options_selector" type="select" label="Do you want to tweak SAM/BAM Options?" help="See &quot;Output Options&quot; section of Help below for information">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param name="sam_opt" type="boolean" truevalue="true" falsevalue="false" label="Would you like the output to be a SAM file" help="By default, the output from this Bowtie2 wrapper is a sorted BAM file."/>
                <param name="no_unal" type="boolean" truevalue="--no-unal" falsevalue="" label="Suppress SAM records for reads that failed to align" help="--no-unal; Default=False"/>
                <param name="omit_sec_seq" type="boolean" truevalue="--omit-sec-seq" falsevalue="" label="Suppress SEQ and QUAL strings for secondary alignments" help="--omit-sec-seq; Default=False"/>
                <param name="sam_no_qname_trunc" argument="--sam-no-qname-trunc" type="boolean" truevalue="--sam-no-qname-trunc" falsevalue="" label="Suppress standard behavior of truncating readname at first whitespace at the expense of generating non-standard SAM"/>
                <param argument="--xeq" type="boolean" truevalue="--xeq" falsevalue="" label="Use '='/'X', instead of 'M,' to specify matches/mismatches in SAM record."/>
                <param name="soft_clipped_unmapped_tlen" argument="--soft-clipped-unmapped-tlen" type="boolean" truevalue="--soft-clipped-unmapped-tlen" falsevalue="" label=" Exclude soft-clipped bases when reporting TLEN"/>
                <param name="reorder" argument="--reorder" type="boolean" truevalue="--reorder" falsevalue=""
                    label="Reorder output to reflect order of the input file"
                    help="Reorder guarantees that output SAM records are printed in an order corresponding to the order of the reads in the original input file, even when -p is set greater than 1." />
            </when>
            <when value="no"/>
        </conditional>
        <param name="save_mapping_stats" type="boolean" checked="False" label="Save the bowtie2 mapping statistics to the history" />
    </inputs>
    <!-- define outputs -->
    <outputs>
        <data format="fastqsanger" name="output_unaligned_reads_l" label="${tool.name} on ${on_string}: unaligned reads (L)" >
            <filter>library['unaligned_file'] is True</filter>
            <actions>
                <conditional name="library.type">
                    <when value="single">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>
        <data format="fastqsanger" name="output_aligned_reads_l" label="${tool.name} on ${on_string}: aligned reads (L)" >
            <filter>library['aligned_file'] is True</filter>
            <actions>
                <conditional name="library.type">
                    <when value="single">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>
        <data format="fastqsanger" name="output_aligned_reads_r" label="${tool.name} on ${on_string}: aligned reads (R)">
            <filter>( library['type'] == "paired" or library['type'] == "paired_collection" ) and library['aligned_file'] is True</filter>
            <actions>
                <conditional name="library.type">
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_2" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="reverse.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>
        <data format="fastqsanger" name="output_unaligned_reads_r" label="${tool.name} on ${on_string}: unaligned reads (R)">
            <filter>( library['type'] == "paired" or library['type'] == "paired_collection" ) and library['unaligned_file'] is True</filter>
            <actions>
                <conditional name="library.type">
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_2" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="reverse.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>
        <data format="bam" name="output" label="${tool.name} on ${on_string}: alignments">
          <change_format>
              <when input="sam_options.reorder" value="--reorder" format="qname_input_sorted.bam" />
              <when input="sam_options.sam_opt" value="true" format="sam" />
          </change_format>
          <actions>
            <conditional name="reference_genome.source">
              <when value="indexed">
                <action type="metadata" name="dbkey">
                  <option type="from_data_table" name="bowtie2_indexes" column="1" offset="0">
                    <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                    <filter type="param_value" ref="reference_genome.index" column="0"/>
                  </option>
                </action>
              </when>
              <when value="history">
                <action type="metadata" name="dbkey">
                  <option type="from_param" name="reference_genome.own_file" param_attribute="dbkey" />
                </action>
              </when>
            </conditional>
            <expand macro="dbKeyActions" />
          </actions>
        </data>
        <data format="txt" name="mapping_stats" label="${tool.name} on ${on_string}: mapping stats">
          <filter>save_mapping_stats is True</filter>
          <actions>
            <expand macro="dbKeyActions" />
          </actions>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!-- test on paired-end datasets -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fq" ftype="fastqsanger"/>
            <param name="input_2" value="bowtie2-fq2.fq" ftype="fastqsanger"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test expect_num_outputs="3">
            <!-- test on list paired collection -->
            <param name="type" value="paired_collection"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="true"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1">
                <collection type="paired">
                    <element name="forward" value="bowtie2-fq1.fq" ftype="fastqsanger" />
                    <element name="reverse" value="bowtie2-fq2.fq" ftype="fastqsanger" />
                </collection>
            </param>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test on paired-end datasets with read group info -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="rg_selector" value="set"/>
            <param name="ID" value="rg1"/>
            <param name="PL" value="CAPILLARY"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fq" ftype="fastqsanger"/>
            <param name="input_2" value="bowtie2-fq2.fq" ftype="fastqsanger"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <output name="output" file="bowtie2-test2.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test expect_num_outputs="2">
            <!-- test on paired-end datasets with stats output -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fq" ftype="fastqsanger"/>
            <param name="input_2" value="bowtie2-fq2.fq" ftype="fastqsanger"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <param name="save_mapping_stats" value="true" />
            <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
            <output name="mapping_stats">
                <assert_contents>
                    <has_text text="of these" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test on interleaved dataset -->
            <param name="type" value="paired_interleaved"/>
            <!-- <param name="paired_options_selector" value="no"/> -->
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="rg_selector" value="set"/>
            <param name="ID" value="rg1"/>
            <param name="PL" value="CAPILLARY"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq_il.fq" ftype="fastqsanger"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <output name="output" file="bowtie2-test_il.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test on fastqsanger.gz paired-end datasets -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fq.gz" ftype="fastqsanger.gz"/>
            <param name="input_2" value="bowtie2-fq2.fq.gz" ftype="fastqsanger.gz"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test on fastqsanger.bz2 paired-end datasets -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fq.bz2" ftype="fastqsanger.bz2"/>
            <param name="input_2" value="bowtie2-fq2.fq.bz2" ftype="fastqsanger.bz2"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test on fasta paired-end datasets -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fa" ftype="fasta"/>
            <param name="input_2" value="bowtie2-fq2.fa" ftype="fasta"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <output name="output" file="bowtie2-test_fasta_in.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test on fasta paired-end datasets with bam_native as output -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fa" ftype="fasta"/>
            <param name="input_2" value="bowtie2-fq2.fa" ftype="fasta"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <param name="sam_options_selector" value="yes" />
            <param name="reorder" value="true" />
            <output name="output" file="bowtie2-test_fasta_in_bam_qname_input_sorted.bam" ftype="qname_input_sorted.bam" compare="sim_size"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test on fasta paired-end datasets with sam as output -->
            <param name="type" value="paired"/>
            <param name="paired_options_selector" value="no"/>
            <param name="unaligned_file" value="false"/>
            <param name="analysis_type_selector" value="simple"/>
            <param name="source" value="history" />
            <param name="input_1" value="bowtie2-fq1.fa" ftype="fasta"/>
            <param name="input_2" value="bowtie2-fq2.fa" ftype="fasta"/>
            <param name="own_file" value="bowtie2-ref.fasta" />
            <param name="sam_options_selector" value="yes" />
            <param name="sam_options|sam_opt" value="true" />
            <output name="output" ftype="sam">
                <assert_contents>
                    <has_text text="M01368:8:000000000-A3GHV:1:1101:6911:8255" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**Bowtie2 Overview**

Bowtie2_ is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 supports gapped, local, and paired-end alignment modes. Galaxy wrapper for Bowtie 2 outputs alignments in `BAM format`_, enabling interoperation with a large number of other tools available at this site.
Majority of information in this page is derived from an excellent `Bowtie2 manual`_ written by Ben Langmead.

.. _Bowtie2: http://bowtie-bio.sourceforge.net/bowtie2/
.. _`Bowtie2 manual`: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
.. _`BAM format`: http://samtools.github.io/hts-specs/SAMv1.pdf

-----

**Selecting reference genomes for Bowtie2**

Galaxy wrapper for Bowtie2 allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:

    1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bowtie2-build utility and are ready to be mapped against.
    2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using bowtie2-build command, and then run mapping with bowtie2.

If your genome of interest is not listed here you have two choices:

    1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
    2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.

------

.. class:: infomark

**Bowtie2 options**

Galaxy wrapper for Bowtie2 implements most but not all options available through the command line. Supported options are described below.

-----

**Inputs**

Bowtie 2 accepts files in Sanger FASTQ format (single or paired-end). Paired-end data can represented as two individual (forward and reverse) datasets, as well as a single interleaved dataset (see an example at the end of the help section).

------

**Input options**::

    --interleaved
            Reads interleaved FASTQ files where the first two records (8 lines) represent a mate pair.

    -s/--skip <int>
            Skip (i.e. do not align) the first `<int>` reads or pairs in the input.

    -u/--qupto <int>
            Align the first `<int>` reads or read pairs from the input (after the
            `-s`/`--skip` reads or pairs have been skipped), then stop.  Default: no limit.

    -5/--trim5 <int>
            Trim `<int>` bases from 5' (left) end of each read before alignment (default: 0).

    -3/--trim3 <int>
            Trim `<int>` bases from 3' (right) end of each read before alignment (default: 0).

    --phred33
            Input qualities are ASCII chars equal to the Phred quality plus 33.  This is
            also called the "Phred+33" encoding, which is used by the very latest Illumina
            pipelines.

    --phred64
            Input qualities are ASCII chars equal to the Phred quality plus 64.  This is
            also called the "Phred+64" encoding.

    --solexa-quals
            Convert input qualities from Solexa Phred quality (which can be negative) to
            Phred Phred quality (which can't).  This scheme was used in older Illumina GA
            Pipeline versions (prior to 1.3).  Default: off.

    --int-quals
            Quality values are represented in the read input file as space-separated ASCII integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`....
            Integers are treated as being on the Phred quality scale unless
            `--solexa-quals` is also specified. Default: off.

------

**Presets in `--end-to-end` mode**::

    --very-fast
            Same as: `-D 5 -R 1 -N 0 -L 22 -i S,0,2.50`

    --fast
            Same as: `-D 10 -R 2 -N 0 -L 22 -i S,0,2.50`

    --sensitive
            Same as: `-D 15 -R 2 -L 22 -i S,1,1.15` (default in `--end-to-end` mode)

    --very-sensitive
            Same as: `-D 20 -R 3 -N 0 -L 20 -i S,1,0.50`

------

**Presets options in `--local` mode**::

    --very-fast-local
            Same as: `-D 5 -R 1 -N 0 -L 25 -i S,1,2.00`

    --fast-local
            Same as: `-D 10 -R 2 -N 0 -L 22 -i S,1,1.75`

    --sensitive-local
            Same as: `-D 15 -R 2 -N 0 -L 20 -i S,1,0.75` (default in `--local` mode)

    --very-sensitive-local
            Same as: `-D 20 -R 3 -N 0 -L 20 -i S,1,0.50`

------

**Alignment options**::

    -N <int>
            Sets the number of mismatches to allowed in a seed alignment during multiseed
            alignment.  Can be set to 0 or 1. Setting this higher makes alignment slower
            (often much slower) but increases sensitivity.  Default: 0.

    -L <int>
            Sets the length of the seed substrings to align during multiseed alignment.
            Smaller values make alignment slower but more sensitive. Default: the
            `--sensitive` preset is used by default, which sets `-L` to 22 in
            `--end-to-end` mode and to 20 in `--local` mode.

    -i <func>
            Sets a function governing the interval between seed substrings to use during
            multiseed alignment.  For instance, if the read has 30 characers, and seed
            length is 10, and the seed interval is 6, the seeds extracted will be:

    Read:      TAGCTACGCTCTACGCTATCATGCATAAAC
    Seed 1 fw: TAGCTACGCT
    Seed 1 rc: AGCGTAGCTA
    Seed 2 fw:       CGCTCTACGC
    Seed 2 rc:       GCGTAGAGCG
    Seed 3 fw:             ACGCTATCAT
    Seed 3 rc:             ATGATAGCGT
    Seed 4 fw:                   TCATGCATAA
    Seed 4 rc:                   TTATGCATGA

    Since it's best to use longer intervals for longer reads, this parameter sets
    the interval as a function of the read length, rather than a single
    one-size-fits-all number.  For instance, specifying `-i S,1,2.5` sets the
    interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length.
    If the function returns a result less than
    1, it is rounded up to 1. Default: the `--sensitive` preset is used by
    default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75`
    in `--local` mode.

    --n-ceil <func>
            Sets a function governing the maximum number of ambiguous characters (usually
            `N`s and/or `.`s) allowed in a read as a function of read length.  For instance,
            specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`,
            where x is the read length.  Reads exceeding this ceiling are filtered out.
            Default: `L,0,0.15`.

    --dpad <int>
            "Pads" dynamic programming problems by `<int>` columns on either side to allow
            gaps.  Default: 15.

    --gbar <int>
            Disallow gaps within `<int>` positions of the beginning or end of the read.
            Default: 4.

    --ignore-quals
            When calculating a mismatch penalty, always consider the quality value at the
            mismatched position to be the highest possible, regardless of the actual value.
            I.e. input is treated as though all quality values are high.  This is also the
            default behavior when the input doesn't specify quality values (e.g. in `-f`,
            `-r`, or `-c` modes).

    --nofw/--norc
            If `--nofw` is specified, `bowtie2` will not attempt to align unpaired reads to
            the forward (Watson) reference strand.  If `--norc` is specified, `bowtie2` will
            not attempt to align unpaired reads against the reverse-complement (Crick)
            reference strand. In paired-end mode, `--nofw` and `--norc` pertain to the
            fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those
            paired-end configurations corresponding to fragments from the reverse-complement
            (Crick) strand.  Default: both strands enabled.

    --no-1mm-upfront
            By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch
            end-to-end alignment for the read *before* trying the multiseed heuristic.  Such
            alignments can be found very quickly, and many short read alignments have exact or
            near-exact end-to-end alignments.  However, this can lead to unexpected
            alignments when the user also sets options governing the multiseed heuristic,
            like `-L` and `-N`.  For instance, if the user specifies `-N 0` and `-L` equal
            to the length of the read, the user will be surprised to find 1-mismatch alignments
            reported.  This option prevents Bowtie 2 from searching for 1-mismatch end-to-end
            alignments before using the multiseed heuristic, which leads to the expected
            behavior when combined with options such as `-L` and `-N`.  This comes at the
            expense of speed.

    --end-to-end
            In this mode, Bowtie 2 requires that the entire read align from one end to the
            other, without any trimming (or "soft clipping") of characters from either end.
            The match bonus `--ma` always equals 0 in this mode, so all alignment scores
            are less than or equal to 0, and the greatest possible alignment score is 0.
            This is mutually exclusive with `--local`.  `--end-to-end` is the default mode.

    --local
            In this mode, Bowtie 2 does not require that the entire read align from one end
            to the other.  Rather, some characters may be omitted ("soft clipped") from the
            ends in order to achieve the greatest possible alignment score.  The match bonus
            `--ma` is used in this mode, and the best possible alignment score is equal to
            the match bonus (`--ma`) times the length of the read.  Specifying `--local`
            and one of the presets (e.g. `--local --very-fast`) is equivalent to specifying
            the local version of the preset (`--very-fast-local`).  This is mutually
            exclusive with `--end-to-end`.  `--end-to-end` is the default mode.

-----

**Scoring options**::

    --ma <int>
            Sets the match bonus.  In `--local` mode `<int>` is added to the alignment
            score for each position where a read character aligns to a reference character
            and the characters match.  Not used in `--end-to-end` mode.  Default: 2.

    --mp MX,MN
            Sets the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers.  A
            number less than or equal to `MX` and greater than or equal to `MN` is
            subtracted from the alignment score for each position where a read character
            aligns to a reference character, the characters do not match, and neither is an
            `N`.  If `--ignore-quals` is specified, the number subtracted quals `MX`.
            Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )`
            where Q is the Phred quality value.  Default: `MX` = 6, `MN` = 2.

    --np <int>
            Sets penalty for positions where the read, reference, or both, contain an
            ambiguous character such as `N`.  Default: 1.

    --rdg <int1>,<int2>
            Sets the read gap open (`<int1>`) and extend (`<int2>`) penalties.  A read gap of
            length N gets a penalty of `<int1>` + N * `<int2>`.  Default: 5, 3.

    --rfg <int1>,<int2>
            Sets the reference gap open (`<int1>`) and extend (`<int2>`) penalties.  A
            reference gap of length N gets a penalty of `<int1>` + N * `<int2>`.  Default:
            5, 3.

    --score-min <func>
            Sets a function governing the minimum alignment score needed for an alignment to
            be considered "valid" (i.e. good enough to report).  This is a function of read
            length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f`
            to `f(x) = 0 + -0.6 * x`, where `x` is the read length.  The default in `--end-to-end` mode is `L,-0.6,-0.6` and
            the default in `--local` mode is `G,20,8`.

-----

**Reporting options**::

    -k <int>
            By default, `bowtie2` searches for distinct, valid alignments for each read.
            When it finds a valid alignment, it continues looking for alignments that are
            nearly as good or better.  The best alignment found is reported (randomly
            selected from among best if tied).  Information about the best alignments is
            used to estimate mapping quality and to set SAM optional fields, such as
            `AS:i` and `XS:i`.

    When `-k` is specified, however, `bowtie2` behaves differently.  Instead, it
    searches for at most `<int>` distinct, valid alignments for each read.  The
    search terminates when it can't find more distinct valid alignments, or when it
    finds `<int>`, whichever happens first.  All alignments found are reported in
    descending order by alignment score. The alignment score for a paired-end
    alignment equals the sum of the alignment scores of the individual mates. Each
    reported read or pair alignment beyond the first has the SAM 'secondary' bit
    (which equals 256) set in its FLAGS field.  For reads that have more than
    `<int>` distinct, valid alignments, `bowtie2` does not guarantee that the
    `<int>` alignments reported are the best possible in terms of alignment score.
    `-k` is mutually exclusive with `-a`.

    Note: Bowtie 2 is not designed with large values for `-k` in mind, and when
    aligning reads to long, repetitive genomes large `-k` can be very, very slow.

    -a
            Like `-k` but with no upper limit on number of alignments to search for.  `-a`
            is mutually exclusive with `-k`.

    Note: Bowtie 2 is not designed with `-a` mode in mind, and when
    aligning reads to long, repetitive genomes this mode can be very, very slow.

-----

**Effort options**::

    -D <int>
            Up to `<int>` consecutive seed extension attempts can "fail" before Bowtie 2
            moves on, using the alignments found so far.  A seed extension "fails" if it
            does not yield a new best or a new second-best alignment.  This limit is
            automatically adjusted up when -k or -a are specified.  Default: 15.

    -R <int>
            `<int>` is the maximum number of times Bowtie 2 will "re-seed" reads with
            repetitive seeds. When "re-seeding," Bowtie 2 simply chooses a new set of reads
            (same length, same number of mismatches allowed) at different offsets and
            searches for more alignments.  A read is considered to have repetitive seeds if
            the total number of seed hits divided by the number of seeds that aligned at
            least once is greater than 300.  Default: 2.

-----

**Paired-end options**::

    -I/--minins <int>
            The minimum fragment length for valid paired-end alignments.  E.g. if `-I 60` is
            specified and a paired-end alignment consists of two 20-bp alignments in the
            appropriate orientation with a 20-bp gap between them, that alignment is
            considered valid (as long as `-X` is also satisfied).  A 19-bp gap would not
            be valid in that case.  If trimming options `-3` or `-5` are also used, the
            `-I` constraint is applied with respect to the untrimmed mates.

            The larger the difference between `-I` and `-X`, the slower Bowtie 2 will
            run.  This is because larger differences bewteen `-I` and `-X` require that
            Bowtie 2 scan a larger window to determine if a concordant alignment exists.
            For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very
            efficient.

            Default: 0 (essentially imposing no minimum)

    -X/--maxins <int>
            The maximum fragment length for valid paired-end alignments.  E.g. if `-X 100`
            is specified and a paired-end alignment consists of two 20-bp alignments in the
            proper orientation with a 60-bp gap between them, that alignment is considered
            valid (as long as `-I` is also satisfied).  A 61-bp gap would not be valid in
            that case.  If trimming options `-3` or `-5` are also used, the `-X`
            constraint is applied with respect to the untrimmed mates, not the trimmed
            mates.

            The larger the difference between `-I` and `-X`, the slower Bowtie 2 will
            run.  This is because larger differences bewteen `-I` and `-X` require that
            Bowtie 2 scan a larger window to determine if a concordant alignment exists.
            For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very
            efficient.

            Default: 500.

    --fr/--rf/--ff
            The upstream/downstream mate orientations for a valid paired-end alignment
            against the forward reference strand.  E.g., if `--fr` is specified and there is
            a candidate paired-end alignment where mate 1 appears upstream of the reverse
            complement of mate 2 and the fragment length constraints (`-I` and `-X`) are
            met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse
            complement of mate 1 and all other constraints are met, that too is valid.
            `--rf` likewise requires that an upstream mate1 be reverse-complemented and a
            downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1
            and a downstream mate 2 to be forward-oriented.  Default: `--fr` (appropriate
            for Illumina's Paired-end Sequencing Assay).

    --no-mixed
            By default, when `bowtie2` cannot find a concordant or discordant alignment for
            a pair, it then tries to find alignments for the individual mates.  This option
            disables that behavior.

    --no-discordant
            By default, `bowtie2` looks for discordant alignments if it cannot find any
            concordant alignments.  A discordant alignment is an alignment where both mates
            align uniquely, but that does not satisfy the paired-end constraints
            (`--fr`/`--rf`/`--ff`, `-I`, `-X`).  This option disables that behavior.

    --dovetail
            If the mates "dovetail", that is if one mate alignment extends past the
            beginning of the other such that the wrong mate begins upstream, consider that
            to be concordant.  Default: mates cannot dovetail in a concordant alignment.

    --no-contain
            If one mate alignment contains the other, consider that to be non-concordant.
            Default: a mate can contain the other in a concordant alignment.

    --no-overlap
            If one mate alignment overlaps the other at all, consider that to be
            non-concordant.  Default: mates can overlap in a concordant alignment.

------

**SAM options**::

    --rg-id <text>
            Set the read group ID to `<text>`.  This causes the SAM `@RG` header line to be
            printed, with `<text>` as the value associated with the `ID:` tag.  It also
            causes the `RG:Z:` extra field to be attached to each SAM output record, with
            value set to `<text>`.

    --rg <text>
            Add `<text>` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the
            `@RG` header line.  Note: in order for the `@RG` line to appear, `--rg-id`
            must also be specified.  This is because the `ID` tag is required by the SAM
            Specification.  Specify `--rg` multiple times to set multiple fields.  See the
            SAM Specification for details about what fields are legal.

    --omit-sec-seq
            When printing secondary alignments, Bowtie 2 by default will write out the `SEQ`
            and `QUAL` strings.  Specifying this option causes Bowtie 2 to print an asterix
            in those fields instead.

-----

**Other options**::

    --reorder
            Guarantees that output SAM records are printed in an order corresponding to the
            order of the reads in the original input file, even when `-p` is set greater
            than 1.  Specifying `--reorder` and setting `-p` greater than 1 causes Bowtie
            2 to run somewhat slower and use somewhat more memory then if `--reorder` were
            not specified.  Has no effect if `-p` is set to 1, since output order will
            naturally correspond to input order in that case.

    --seed <int>
            Use `<int>` as the seed for pseudo-random number generator.  Default: 0.

    --non-deterministic
            Normally, Bowtie 2 re-initializes its pseudo-random generator for each read.  It
            seeds the generator with a number derived from (a) the read name, (b) the
            nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed`
            option. This means that if two reads are identical (same name, same
            nucleotides, same qualities) Bowtie 2 will find and report the same alignment(s)
            for both, even if there was ambiguity.  When `--non-deterministic` is specified,
            Bowtie 2 re-initializes its pseudo-random generator for each read using the
            current time.  This means that Bowtie 2 will not necessarily report the same
            alignment for two identical reads.  This is counter-intuitive for some users,
            but might be more appropriate in situations where the input consists of many
            identical reads.

-----


**Paired-end (and mate-pair) data in fastq format**

Paired end datasets can be represented as two individual datasets:

First dataset::

 @1/1
 AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
 +
 EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED
 @2/1
 AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
 +
 HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG

Second dataset::

 @1/2
 CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
 +
 GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF
 @2/2
 CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
 +
 HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH

Or a single *interleaved* dataset::

 @1/1
 AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
 +
 EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED
 @1/2
 CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
 +
 GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF
 @2/1
 AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
 +
 HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG
 @2/2
 CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
 +
 HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH
    ]]></help>
    <citations>
        <citation type="doi">10.1186/gb-2009-10-3-r25</citation>
        <citation type="doi">10.1038/nmeth.1923</citation>
    </citations>
</tool>
author	iuc
date	Sat, 10 Jul 2021 07:44:43 +0000
parents	c9105910063f
children	56dcf3929d02