Mercurial > repos > devteam > lastz

<tool id="lastz_wrapper_2" name="LASTZ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>: align long sequences</description>
    <macros>
        <import>lastz_macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">lastz</requirement>
        <requirement type="package" version="1.14">samtools</requirement>
        <requirement type="package" version="3.6.3">r-base</requirement>
        <requirement type="package" version="1.0.8">bzip2</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        #if $lastz_32:
            lastz_32
        #else:
            lastz
        #end if

        @TARGET_INPUT_COMMAND_LINE@
        ## If --self is set: perform self alignment and ignore the query
        #if $where_to_look.self:
            '${where_to_look.self}'
            #if $where_to_look.nomirror
                '${where_to_look.nomirror}'
            #end if
        #else:
            @query_input@
        #end if

## WHERE TO LOOK --------------------------------

        '${where_to_look.strand}'
        #if str( $where_to_look.qhsplimit.qhsplimit_selector ) == "yes":
            #if $where_to_look.qhsplimit.qhsplimit_n:
                #if $where_to_look.qhsplimit.qhsplimit_nowarn:
                    #if $where_to_look.qhsplimit.qhsplimit_keep:
                        '--queryhsplimit=${where_to_look.qhsplimit.qhsplimit_keep},${where_to_look.qhsplimit.qhsplimit_nowarn}:${where_to_look.qhsplimit.qhsplimit_n}'
                    #else:
                        '--queryhsplimit=${where_to_look.qhsplimit.qhsplimit_nowarn}:${where_to_look.qhsplimit.qhsplimit_n}'
                    #end if
                #else:
                    '--queryhsplimit=${where_to_look.qhsplimit.qhsplimit_n}'
                #end if
            #end if
        #end if

        #if $where_to_look.qhspbest:
            '--queryhspbest=${where_to_look.qhspbest}'
        #end if

        #if str( $where_to_look.qdepth.qdepth_selector ) == "yes":
            #if $where_to_look.qdepth.qdepth_n:
                #if $where_to_look.qdepth.qdepth_nowarn and not $where_to_look.qdepth.qdepth_keep:
                    '--querydepth=${where_to_look.qdepth.qdepth_nowarn}:${where_to_look.qdepth.qdepth_n}'
                #elif not $where_to_look.qdepth.qdepth_nowarn and $where_to_look.qdepth.qdepth_keep:
                    '--querydepth=${where_to_look.qdepth.qdepth_keep}:${where_to_look.qdepth.qdepth_n}'
                #elif $where_to_look.qdepth.qdepth_nowarn and $where_to_look.qdepth.qdepth_keep:
                    '--querydepth=${where_to_look.qdepth.qdepth_keep},${where_to_look.qdepth.qdepth_nowarn}:${where_to_look.qdepth.qdepth_n}'
                #else:
                    '--querydepth=${where_to_look.qdepth.qdepth_n}'
                #end if
            #end if
        #end if

## SCORING --------------------------------

        #if $scoring.score_file:
            '--scores=${scoring.score_file}'
        #end if

        #if str( $scoring.match.match_selector ) == "yes":
            #if $scoring.match.match_reward:
                #if $scoring.match.match_penalty:
                    '--match=${scoring.match.match_reward},${scoring.match.match_penalty}'
                #else:
                    '--match=${scoring.match.match_reward}'
                #end if
            #end if
        #end if
        #if str( $scoring.gap.gap_selector ) == "yes":
            #if $scoring.gap.gap_extend:
                #if $scoring.gap.gap_open:
                    '--gap=${scoring.gap.gap_open},${scoring.gap.gap_extend}'
                #else:
                    '--gap=${scoring.gap.gap_extend}'
                #end if
            #end if
        #end if
        #if $scoring.ambigN:
            '${scoring.ambigN}'
        #end if
        #if $scoring.ambigIUPAC:
            '${scoring.ambigIUPAC}'
        #end if

## SEEDING --------------------------------

        #if str( $seeding.seed.seed_selector ) == "pre_set":
            '${seeding.seed.pre_set_seeds}'
        #elif str( $seeding.seed.seed_selector ) == "len":
            '--seed=match${seeding.seed.seed_match}'
        #elif str( $seeding.seed.seed_selector ) == "half_len":
            '--seed=half${seeding.seed.seed_half}'
        #elif str( $seeding.seed.seed_selector ) == "pattern":
            '--seed=${seeding.seed.pattern}'
        #end if
        #if str( $seeding.transitions ) != "--transition":
            '${seeding.transitions}'
        #end if
        #if str( $seeding.seed_filer.seed_filer_selector ) == "yes":
            #if $seeding.seed_filer.filter_match:
                #if $seeding.seed_filer.filter_tr:
                    '--filter=${seeding.seed_filer.filter_tr},${seeding.seed_filer.filter_match}'
                #else:
                    '--filter=${seeding.seed_filer.filter_match}'
                #end if
            #end if
        #end if

## FINDING HSPs --------------------------------

        #if $hsp.gfextend:
            '${hsp.gfextend}'
        #end if
        #if $hsp.nogfextend
            '${hsp.nogfextend}'
        #end if
        #if str( $hsp.hsp_method.hsp_method_selector ) == "match":

            #if $hsp.hsp_method.exact:
                '--exact=${hsp.hsp_method.exact}'
            #end if
        #elif str( $hsp.hsp_method.hsp_method_selector ) == "mismatch":

            #if $hsp.hsp_method.mismatch_count and $hsp.hsp_method.mismatch_length:
                '--mismatch=${hsp.hsp_method.mismatch_count},${hsp.hsp_method.mismatch_length}'
            #end if
        #elif str( $hsp.hsp_method.hsp_method_selector ) == "x":

            #if $hsp.hsp_method.xdrop:
                '--xdrop=${hsp.hsp_method.xdrop}'
            #end if

            #if $hsp.hsp_method.hspthresh:
                '--hspthresh=${hsp.hsp_method.hspthresh}'
            #end if

            #if $hsp.hsp_method.hspthresh_top:
                '--hspthresh=top${hsp.hsp_method.hspthresh_top}'
            #end if

            #if $hsp.hsp_method.hspthresh_top_percent:
                '--hspthresh=top${hsp.hsp_method.hspthresh_top_percent}%'
            #end if
        #end if
        #if $hsp.entropy:
            '${hsp.entropy}'
        #end if
        #if $hsp.entropy_report:
            '${hsp.entropy_report}'
        #end if
        #if $hsp.noentropy:
            '${hsp.noentropy}'
        #end if

## CHAINING --------------------------------

        #if $chaining.chain:
            '${chaining.chain}'
        #end if
        #if str( $chaining.chaning_penalties.chaning_penalties_selector ) == "yes":
            #if $chaining.chaning_penalties.diag and $chaining.chaning_penalties.anti:
                '--chain=${chaining.chaning_penalties.diag},${chaining.chaning_penalties.anti}'
            #end if
        #end if

## GAPPED EXTENSION --------------------------------

        #if $gap_ext.gapped:
            '${gap_ext.gapped}'
        #end if
        #if $gap_ext.nogapped:
            '${gap_ext.nogapped}'
        #end if
        #if $gap_ext.ydrop:
            '--ydrop=${gap_ext.ydrop}'
        #end if
        #if $gap_ext.noytrim:
            '${gap_ext.noytrim}'
        #end if
        #if $gap_ext.gappedthresh:
            '--gappedthresh=${gap_ext.gappedthresh}'
        #end if
        #if $gap_ext.allgappedbounds:
            '${gap_ext.allgappedbounds}'
        #end if

## FILTERING --------------------------------

        #if $filters.identity.id_min:
            #if $filters.identity.id_max:
                '--filter=identity:${filters.identity.id_min}..${filters.identity.id_max}'
            #else:
                '--filter=identity:${filters.identity.id_min}'
            #end if
        #end if
        #if $filters.continuity.cont_min:
            #if $filters.continuity.cont_max:
                '--filter=continuity:${filters.continuity.cont_min}..${filters.continuity.cont_max}'
            #else:
                '--filter=continuity:${filters.continuity.cont_min}'
            #end if
        #end if
        #if $filters.coverage.cov_min:
            #if $filters.coverage.cov_max:
                '--filter=coverage:${filters.coverage.cov_min}..${filters.coverage.cov_max}'
            #else:
                '--filter=coverage:${filters.coverage.cov_min}'
            #end if
        #end if
        #if $filters.filter_nmatch:
            '--filter=nmatch:${filters.filter_nmatch}'
        #end if
        #if $filters.filter_nmatch_percent:
            '--filter=nmatch:${filters.filter_nmatch_percent}%'
        #end if
        #if $filters.filter_nmismatch:
            '--filter=nmismatch:0..${filters.filter_nmismatch}'
        #end if
        #if $filters.filter_ngap:
            '--filter=ngap:0..${filters.filter_ngap}'
        #end if
        #if $filters.filter_cgap:
            '--filter=cgap:0..${filters.filter_cgap}'
        #end if
        #if $filters.notrivial:
            '${filters.notrivial}'
        #end if

## INTERPOLATION --------------------------------

        #if $interpolation.inner:
            '--inner=${interpolation.inner}'
        #end if

## HOUSEKEEPING ----------------------------------

        --traceback=160M


## OUTPUT FORMATS --------------------------------

        #if str( $output_format.out.format ) == "bam":
            '--format=${output_format.out.bam_options}'
        #elif str( $output_format.out.format ) == "general_def":
            --format=general-
        #elif str( $output_format.out.format ) == "maf":
            '--format=${output_format.out.maf_type}'
        #elif str( $output_format.out.format ) == "blastn":
            --format=BLASTN-
        #elif str( $output_format.out.format ) == "general_full":
            '--format=general-:${output_format.out.fields}'
        #elif str( $output_format.out.format ) == "differences":
            '--format=differences'
        #end if
        --action:target=multiple
        $output_format.rplot
        #if str( $output_format.out.format ) == "bam":
            | samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '${output}'
        #else:
            > '${output}'
        #end if
        #if $output_format.rplot:
            &&
            Rscript $r_plot > /dev/null 2>&1
        #end if

        ]]>
    </command>
    <configfiles>
        <configfile name="r_plot">
            <![CDATA[
            #!/usr/bin/env Rscript
            ## Setup R error handling to go to stderr
            options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
            ## Only produce image when 'plot.r' actually contains data
            if(file.info('plot.r')\$size>0){
                x <- read.table("plot.r", as.is=TRUE)
                png("${out_plot}", width=640, height=640, res=72)
                plot(x, type='l', xlab="target", ylab="query", main="plot")
                dev.off()
            }
            ]]>
        </configfile>
    </configfiles>
    <inputs>
        <expand macro="target_input"/>
        <param name="query" format="fasta,fastq,fasta.gz,fastq.gz,fastq.bz2" type="data" label="Select QUERY sequence(s)" help="These are the sequences that you are aligning against TARGET"/>
        <param name="lastz_32" type="boolean" checked="false" label="Use lastz_32" help="It is highly recommended to use lastz_32 instead of lastz if the reference genome size is greater than 2G"/>
        <section name="where_to_look" expanded="False" title="Where to look">
            <param type="select" display="radio" label="which strand to search" argument="--strand" help="Search both strands or choose plus or minus">
                <option value="--strand=both" selected="True">Both</option>
                <option value="--strand=plus">Plus</option>
                <option value="--strand=minus">Minus</option>
            </param>
            <param type="boolean" display="radio" truevalue="--self" falsevalue="" checked="False" argument="--self" label="Perform a self-alignment: the target sequence is also the query." help="Computation is more efficient than it would be without this option, since only one of each mirror-image pair of alignment blocks is processed (the other, redundant one is skipped during processing, but re-created in the output). Also, the trivial self-alignment block along the main diagonal is omitted from the output. THIS OPTION CANNOT BE USED IF THE TARGET IS COMPRIZED OF MULTIPLE SEQUENCES"/>
            <param type="boolean" display="radio" truevalue="--nomirror" falsevalue="" checked="False" label="Inhibit the re-creation of mirror-image alignments." argument="--nomirror" help="Output consists of only one copy of each meaningful alignment block in a self-alignment. This option is only applicable when the ‑‑self option is used."/>
            <conditional name="qhsplimit">
                <param name="qhsplimit_selector" type="select" display="radio" label="Set HSP limit" argument="--queryhsplimit">
                    <option value="yes">Yes</option>
                    <option value="no" selected="true">No</option>
                </param>
                <when value="yes">
                    <param name="qhsplimit_n" optional="true" type="integer" label="Discard queries that have more than N HSPs" help="Any queries that exceed this limit are reported as a warning (to stderr), and no alignments are reported. This is useful for mapping reads to a reference genome, when some reads align to too many places in the reference."/>
                    <param name="qhsplimit_nowarn" type="boolean" truevalue="nowarn" display="radio" checked="False" label=" Withhold warnings for queries that exceed the limit set above"/>
                    <param name="qhsplimit_keep" type="boolean" truevalue="keep" display="radio" checked="False" label="Keep queries that exceed the limit and supress warnings" help="For such a query, the first N HSPs found are passed along to downstream processing. Note that the HSPs reported are not the best N HSPs. They are simply the first N found; they very likely have a positional bias."/>
                </when>
                <when value="no">
                    <!-- Do nothing -->
                </when>
            </conditional>
            <param name="qhspbest" type="integer" optional="true" label="For queries that have more than N HSPs, discard any HSPs that score below the Nth best." help="This is useful for mapping reads to a reference genome, when some reads align to too many places in the reference."/>
            <conditional name="qdepth">
                <param name="qdepth_selector" type="select" display="radio" label="Set ratio of aligned bases to query length" argument="--querydepth">
                    <option value="yes">Yes</option>
                    <option value="no" selected="true">No</option>
                </param>
                <when value="yes">
                    <param name="qdepth_n" optional="true" type="integer" label="Stop processing gapped alignments for a query/strand if its ratio of aligned bases to query length exceeds" help="This is a real number that corresponds to a depth of coverage threshold. For example, a value of 5.0 would cause termination once a query/strand has an average of five alignments for every base in the query. The numerator is the number of matches or substitutions (but not gaps); the denominator is the length of the query sequence. The purpose of this option is one of saving time. It is useful for automatically terminating the processing of queries with high repeat content, for which other methods of dealing with repetitive content fail. Moreover, back-end filtering options are not considered. In other words, matches are counted for any alignment that meets the scoring threshold, regardless of whether that alignment would be reported. The justification is that we are trying to abort the processing of queries that have too many bounding alignments in the DP matrix, and back-end filtering occurs later in the process."/>
                    <param name="qdepth_keep" type="boolean" truevalue="keep" display="radio" checked="False" label="Warnings for queries that exceed the limit are witheld" help="Note that the alignments reported are not guaranteed to be the highest scoring alignments that would achieve the threshold. They are simply the first alignments found. In other words, the purpose of this option is one of saving time, not one of finding optimal alignments."/>
                    <param name="qdepth_nowarn" type="boolean" truevalue="nowarn" display="radio" checked="False" label="Same as above but any alignments discovered for this query/strand, before it exceeds the threshold, are reported."/>
                </when>
                <when value="no">
                    <!-- Do nothing -->
                </when>
            </conditional>
        </section>
        <section name="scoring" expanded="false" title="Scoring">
            <param name="score_file" type="data" format="txt" optional="true" argument="--scores" label="Read the substitution scores and gap penalties (and possibly other options) from a scoring file (see help below for file fomat description)." help="This option cannot be used in conjunction with ‑‑match or inference."/> <!--TODO EDIT INFERENCE -->
            <conditional name="match">
                <param name="match_selector" type="select" display="radio" label="Set the score values for a match and mismatch" argument="--match" help="Note that specifying ‑‑match changes the defaults for some of the other options (e.g. the scoring penalties for gaps, and various extension thresholds), as described in respective sections of LASTZ manual. The regular defaults are chosen for compatibility with BLASTZ, but since BLASTZ doesn't support ‑‑match, LASTZ infers that you are not expecting BLASTZ compatibility for this run, so it is free to use improved defaults. THIS OPTION CANNOT BE USED WITH --scores">
                    <option value="yes">Yes</option>
                    <option value="no" selected="true">No</option>
                </param>
                <when value="yes">
                    <param name="match_reward" type="integer" optional="true" label="Score value for a match"/>
                    <param name="match_penalty" type="integer" optional="true" label="Score value for a mismatch"/>
                </when>
                <when value="no">
                    <!-- Do nothing -->
                </when>
            </conditional>
            <conditional name="gap">
                <param name="gap_selector" type="select" display="radio" label="Set the score penalties for opening and extending a gap" argument="--gap" help="These are specified as positive values; subtraction is implicitly assumed. Note that the first base in a gap incurs the sum of both penalties. This option is only valid if gapped extension is being performed, and cannot be used in conjunction with inference. These values specified on the command line override any corresponding values from a file provided with ‑‑scores.">
                    <option value="yes">Yes</option>
                    <option value="no" selected="true">No</option>
                </param>
                <when value="yes">
                    <param name="gap_open" type="integer" optional="true" label="Gap opening penalty"/>
                    <param name="gap_extend" type="integer" optional="true" label="Gap extension penalty"/>
                </when>
                <when value="no">
                    <!-- Do nothing -->
                </when>
            </conditional>
            <param name="ambigN" type="boolean" truevalue="--ambiguous=n" checked="false" label="Treat each N in the input sequences as an ambiguous nucleotide" argument="--ambiguous=n" help="Substitutions with N are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>
            <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="true" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>
        </section>
        <section name="seeding" expanded="false" title="Seeding">
            <conditional name="seed">
                <param name="seed_selector" type="select" display="radio" label="Select seed type">
                    <option value="defaults" selected="true">Use defaults</option>
                    <option value="pre_set">Use pre-set values</option>
                    <option value="len">Set length</option>
                    <option value="half_len">Set half length</option>
                    <option value="pattern">Set pattern</option>
                </param>
                <when value="defaults">
                    <!-- Do nothing -->
                </when>
                <when value="pre_set">
                    <param name="pre_set_seeds" type="select" display="radio" label="Select seed pattern" help="Select between 12of19 seed (19-bp word with matches in 12 specific positions: 1110100110010101111) or 14of22 seed (22-bp word with matches in 14 specific positions: 1110101100110010101111). 0 = mismatch, 1 = match" argument="--seed=12of19,--seed=14of22">
                        <option value="--seed=12of19">12of19</option>
                        <option value="--seed=14of22">14of22</option>
                    </param>
                </when>
                <when value="len">
                     <param name="seed_match" type="integer" optional="true" label="Set seed length" help="Seeds require a N-bp word with matches in all positions" argument="--seed=match" />
                </when>
                <when value="half_len">
                    <param name="seed_half" type="integer" optional="true" label="Set seed length" help="Seeds requires N-bp word with matches or transitions in all positions" argument="--seed=half" />
                </when>
                <when value="pattern">
                    <param name="pattern" type="text" size="25" optional="true" argument="--seed=PATTERN" label="Specify an arbitrary pattern" help="Use 1s, 0s, and Ts for seed discovery (e.g., 1110100110010101111 where 1 = match, 0 = mismatch, T = transition)" />
                </when>
            </conditional>
            <param name="transitions" type="select" display="radio" label="Allow transitions" help="In each seed, specify how many match positions can be transition(s) instead">
                <option value="--transition" selected="true">One</option>
                <option value="--transition=2">Two</option>
                <option value="--notransition">None</option>
            </param>
            <conditional name="seed_filer">
                <param name="seed_filer_selector" type="select" display="radio" label="Filter seeds" argument="--filter">
                    <option value="yes">Yes</option>
                    <option value="no" selected="true">No</option>
                </param>
                <when value="yes">
                    <param name="filter_tr" type="integer" optional="true" label="Number of transitions" help="Allowing no more than this number of transversions. If not specified, any number of transversions is allowed (they are not limited)" />
                    <param name="filter_match" type="integer" optional="true" label="Number of matches" help="Require at least this many exact matches"/>
                </when>
                <when value="no">
                    <!-- Do nothing -->
                </when>
            </conditional>
        </section>
        <section name="hsp" expanded="false" title="HSPs (Gap-free extension)">
            <param type="boolean" truevalue="--gfextend" checked="false" argument="--gfextend" label="Perform gap-free extension of seeds to HSPs" help="This will take into account other papermeters in this section"/>
            <param type="boolean" truevalue="--nogfextend" argument="--nogfextend" label="Skip the gap-free extension stage" help="Instead, pass the seeds along to the next specified stage.It is not recommended to use --nogfextend without also using --nogapped."/>
            <conditional name="hsp_method">
                <param name="hsp_method_selector" type="select" display="radio" label="Select HSP finding method">
                    <option value="none" selected="true">None</option>
                    <option value="match">Match extension</option>
                    <option value="mismatch">Mismatch extension</option>
                    <option value="x">X-drop extension</option>
                </param>
                <when value="none">
                    <!-- Do nothing -->
                </when>
                <when value="match">
                    <param type="integer" optional="true" argument="--exact" label="Find HSPs using the exact match extension method with the given length threshold" help="This is instead of using the x-drop method"/>
                </when>
                <when value="mismatch">
                    <param name="mismatch_count" type="integer" optional="true" label="Specify number of mismatches"/>
                    <param name="mismatch_length" type="integer" min="1" max="50" optional="true" label="Specify length threshold" help="Find HSPs using the mismatch extension method with the given length threshold and allowing specified number of mismatches" argument="--mismatch"/>
                </when>
                <when value="x">
                    <param type="integer" optional="true" argument="--xdrop" label="Find HSPs using the x-drop extension method with this threshold" help="The dropoff setting determines the endpoints of each gap-free segment: the extension of each seed is stopped when its cumulative score drops off by more than the given threshold from the maximum seen so far."/>
                    <param type="integer" optional="true" argument="--hspthresh" label="Set the score threshold for the x-drop extension method" help="HSPs scoring lower are discarded"/>
                    <param name="hspthresh_top" type="integer" optional="true" argument="--hspthresh=top" label="Set an adaptive score threshold for the x-drop extension method" help="HSPs scoring lower are discarded. The score threshold is chosen to limit the number of target sequence bases in HSPs to about this value (or possibly a little higher in case of ties, etc.)."/>
                    <param name="hspthresh_top_percent" type="integer" optional="true" argument="--hspthresh=top%" label="Set an adaptive score threshold for the x-drop extension method" help="HSPs scoring lower are discarded. The score threshold is chosen to limit the number of target sequence bases in HSPs to about this perentage value (or possibly a little higher in case of ties, etc.)."/>
                </when>
            </conditional>
            <param name="entropy" type="boolean" truevalue="--entropy" checked="false" label="Adjust for entropy when qualifying HSPs in the x-drop extension method" help="Those that score just slightly above the HSP threshold are adjusted downward according to the entropy of their nucleotides, and any that then fall below the threshold are discarded."/>
            <param name="entropy_report" type="boolean" truevalue="--entropy=report" checked="false" label="Adjust for entropy when qualifying HSPs in the x-drop extension method, and report (to stderr) any HSPs that are discarded as a result." help="Reported HSPs are printed to stderr"/>
            <param name="noentropy" type="boolean" truevalue="--noentropy" checked="false" label="Don't adjust for entropy when qualifying HSPs"/>
        </section>
        <section name="chaining" expanded="false" title="Chaining">
            <param type="boolean" truevalue="--chain" checked="false" argument="--chain" label="Perform chaining of HSPs with no penalties"/>
            <conditional name="chaning_penalties">
                <param name="chaning_penalties_selector" type="select" display="radio" argument="--chain=" label="Perform chaining with penalties">
                    <option value="yes">Yes</option>
                    <option value="no" selected="true">No</option>
                </param>
                <when value="yes">
                    <param name="diag" type="integer" optional="true" label="Penalty for diagonal in dynamic programming matrix"/>
                    <param name="anti" type="integer" optional="true" label="Penalty for anti-diagonal in dynamic programming matrix" help="These are specified as positive values; subtraction from the score is implicitly assumed."/>
                </when>
                <when value="no">
                    <!-- Do nothing -->
                </when>
            </conditional>
        </section>
        <section name="gap_ext" expanded="false" title="Gapped extension">
            <param type="boolean" truevalue="--gapped" argument="--gapped" label="Perform gapped extension of HSPs" help="Extension of HSPs (or seeds, if gap-free extension is not performed), is performed after first reducing them to anchor points."/>
            <param type="boolean" truevalue="--nogapped" argument="--nogapped" label="Skip the gapped extension stage" help="This means that interpolation must also be skipped, since it is not allowed without gapped extension"/>
            <param type="integer" optional="true" argument="--ydrop" label="Set the threshold for terminating gapped extension" help="This restricts the endpoints of each local alignment by limiting the local region around each anchor in which extension is performed. The boundary of this region in the dynamic programming matrix is formed by the points where the cumulative score has dropped off by more than the given threshold from the maximum seen so far."/>
            <param type="boolean" truevalue="--noytrim" argument="--noytrim" label="If y-drop extension encounters the end of the sequence, extend the alignment to the end of the sequence rather than trimming it back to the location giving the maximum score" help="This is highly recommended when either the target or query sequences are short reads (say, less than 100 bases), to prevent y-drop mismatch shadow."/>
            <param type="integer" optional="true" argument="--gappedthresh" label="Set the threshold for gapped extension" help="Alignments scoring lower than that value are discarded. When used along with the x-drop method for gap-free extension, this value is generally set at least as high as the HSP threshold. Setting it lower has no effect, since at worst the HSP itself would always qualify (both extension stages use the same scoring matrix)."/>
            <param type="boolean" truevalue="--allgappedbounds" argument="--allgappedbounds" label="Revert to handling bounding alignments the way they were handled in BLASTZ."/>
        </section>
        <section name="filters" expanded="false" title="Filtering">
            <section name="identity" expanded="true" title="Filter alignments by percent identity">
                <param name="id_min" type="integer" min="0" max="100" optional="true" label="Minimum identity"/>
                <param name="id_max" type="integer" min="0" max="100" optional="true" argument="--filter=identity" label="Maximum identity (optional)" help="0 ≤ min ≤ max ≤ 100 percent. Identity is the percentage of aligned bases that are matches. Alignment blocks outside the given range are discarded."/>
            </section>
            <section name="continuity" expanded="false" title="Filter alignments by continuity">
                <param name="cont_min" type="integer" min="0" max="100" optional="true" label="Minimum continuity"/>
                <param name="cont_max" type="integer" min="0" max="100" optional="true" argument="--filter=continuity" label="Maximum continuity (optional)" help="0 ≤ min ≤ max ≤ 100 percent. Continuity is the percentage of alignment columns that are not gaps. Alignment blocks outside the given range are discarded."/>
            </section>
            <section name="coverage" expanded="false" title="Filter alignments by coverage">
                <param name="cov_min" type="integer" min="0" max="100" optional="true" label="Minimum coverage"/>
                <param name="cov_max" type="integer" min="0" max="100" optional="true" argument="--filter=coverage" label="Maximum coverage (optional)" help=" 0 ≤ min ≤ max ≤ 100 percent. Coverage is the percentage of the entire target or query sequence (whichever is shorter) that is included in the alignment block. Blocks outside the given range are discarded."/>
            </section>
            <param name="filter_nmatch" type="integer" min="1" optional="true" argument="--filter=nmatch" label="Filter alignments by how many bases match" help="Requiring at least this number of matched bases, min > 0. Match count is the number of matched bases in the alignment."/>
            <param name="filter_nmatch_percent" type="integer" min="1" optional="true" argument="--filter=nmatch%" label="Filter alignments by how many bases match expressed as percentage" help="e.g., percentage of the query length."/>
            <param name="filter_nmismatch" type="integer" min="0" optional="true" argument="--filter=nmismatch:0.." label="Filter alignments by the number of mismatches" help="Allow no more than this number of mismatched bases. Mismatch count, or nmismatch, is the number of aligned bases in the alignment that are mismatches (substitutions)."/>
            <param name="filter_ngap" type="integer" min="0" optional="true" argument="--filter=ngap:0.." label="Filter alignments by the number of gaps" help="Allow no more than this number of gaps. Gap count, or ngap, is the number of runs of gapped columns in the alignment (each run is counted as one gap)."/>
            <param name="filter_cgap" type="integer" min="0" optional="true" argument="--filter=cgap:0.." label="Filter alignments by the number of gap columns" help="Allow no more than this number of gaps. Gap column count, or cgap, is the number of gapped columns in the alignment (each column is counted as one gap)."/>
            <param type="boolean" truevalue="--notrivial" argument="--notrivial" label="Do not output a trivial self-alignment block if the target and query sequences are identical" help="Note that using ‑‑self automatically enables this option."/>
        </section>
        <section name="interpolation" expanded="false" title="Interpolation">
            <param type="integer" optional="true" argument="--inner" label="Perform additional alignment between the gapped alignment blocks, using (presumably) more sensitive alignment parameters" help="This value is used as the threshold for both the gap-free and gapped extension sub-stages; see the discussion of interpolation for more details. This option is only valid if gapped extension is performed."/>
        </section>
        <section name="output_format" expanded="false" title="Output">
        <conditional name="out">
                <param name="format" type="select" display="radio" label="Specify the output format">
                    <option value="bam" selected="true">BAM --format=sam)</option>
                    <option value="general_def">General default (--format=general)</option>
                    <option value="general_full">Customized general (--format=general[:fields])</option>
                    <option value="maf">MAF (--format=maf)</option>
                    <option value="blastn">blastn (--format=BLASTN)</option>
                    <option value="differences">Differences (--format=differences)</option>
                </param>
                <when value="bam">
                    <param name="bam_options" type="select" display="radio" argument="--format=sam, --format=softsam" label="Select a BAM flavor to output" help="Lastz actually outputs SAM data but Galaxy converts it into BAM to save space. For alignments that don't reach the end of a query, ‑‑format=sam uses 'hard clipping', while ‑‑format=softsam uses 'soft clipping'. See the section on 'clipped alignment' in the SAM specification for an explanation of what this means. The options ‑‑format=sam- and ‑‑format=softsam- suppress the SAM header lines. This makes them suitable for concatenating output from multiple runs. If you need to specify readgroup information: use AddOrEplaceReadGroups from Picard package">
                        <option value="sam" selected="true">BAM</option>
                        <option value="softsam">soft-clipped BAM</option>
                        <option value="sam-">BAM without header</option>
                        <option value="softsam-">soft-clipped BAM without header</option>
                    </param>
                </when>
                <when value="general_def">
                    <!-- Do nothing -->
                </when>
                <when value="general_full">
                    <param name="fields" type="select" display="checkboxes" multiple="true" label="Select which fields to include" argument="--format=general-[:fields]">
                        <option value="score" selected="true">score: Score of the alignment block</option>
                        <option value="name1" selected="true">name1: Name of the target sequence</option>
                        <option value="number1">number1: Number of the target sequence within the target file</option>
                        <option value="strand1" selected="true">strand1: Target sequence strand </option>
                        <option value="size1" selected="true">size1: Size of the entire target sequence</option>
                        <option value="start1">start1: Starting position of the alignment block in the target, origin-one</option>
                        <option value="zstart1" selected="true">zstart1: Starting position of the alignment block in the target, origin-zero</option>
                        <option value="end1" selected="true">end1: Ending position of the alignment block in the target</option>
                        <option value="length1">length1: Length of the alignment block in the target (excluding gaps)</option>
                        <option value="text1">text1: Aligned characters in the target, including gap characters</option>
                        <option value="qalign1">qalign1: The target quality sequence (if there is one) correpsonding to aligned characters</option>
                        <option value="nucs1">nucs1: The entire target sequence</option>
                        <option value="name2" selected="true">name2: Name of the query sequence</option>
                        <option value="number2">number2: Number of the query sequence within the query file</option>
                        <option value="strand2" selected="true">strand2: Query sequence strand</option>
                        <option value="size2" selected="true">size2: Size of the entire query sequence</option>
                        <option value="start2">start2: Starting position of the alignment block in the query, origin-one</option>
                        <option value="zstart2" selected="true">zstart2: Starting position of the alignment block in the query, origin-one</option>
                        <option value="end2" selected="true">end2: Ending position of the alignment block in the query</option>
                        <option value="length2">length2: Length of the alignment block in the query (excluding gaps)</option>
                        <option value="text2">text2: Aligned characters in the query, including gap characters</option>
                        <option value="qalign2">qalign2: The query quality sequence (if there is one) correpsonding to aligned characters</option>
                        <option value="nucs2">nucs2: The entire query sequence</option>
                        <option value="nmatch">nmatch: Match count</option>
                        <option value="nmismatch">nmismatch: Mismatch count</option>
                        <option value="ncolumn">ncolumn: Number of columns in the block. This includes matches, mismatches (substitutions), and gaps</option>
                        <option value="npair">npair: Number of aligned bases in the block that are matches or mismatches (substitutions)</option>
                        <option value="ngap">ngap: Gap count, the number of gaps in the block, counting each run of gapped columns as a single gap</option>
                        <option value="cgap">cgap: Gap column count, the number of gaps in the block, counting each gapped column as a separate gap</option>
                        <option value="diff">diff: Differences between what would be written for text1 and text2</option>
                        <option value="cigar">cigar: A CIGAR-like representation of the alignment’s path</option>
                        <option value="cigarx">cigarx: Same as cigar, but uses a newer syntax that distinguishes matches from substitutions</option>
                        <option value="identity" selected="true">identity: Fraction of aligned bases in the block that are matches </option>
                        <option value="idfrac">idfrac: Fraction of aligned bases in the block that are matches </option>
                        <option value="id%" selected="true">id% Fraction of aligned bases in the block that are matches (as %)</option>
                        <option value="blastid%">blastid%: Fraction of the alignment block that is matches, as would be reported by NCBI BLAST</option>
                        <option value="continuity">continuity: Rate of non-gaps (non-indels) in the alignment block</option>
                        <option value="confrac">confrac: Rate of non-gaps (non-indels) in the alignment block (as fraction)</option>
                        <option value="con%">con%: Rate of non-gaps (non-indels) in the alignment block (as %)</option>
                        <option value="coverage" selected="true">coverage: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block</option>
                        <option value="covfrac">covfrac: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (as fraction)</option>
                        <option value="cov%" selected="true">cov%: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (as %)</option>
                        <option value="diagonal">diagonal: The diagonal of the start of the alignment block in the DP matrix, expressed as an identifying number start1-start2</option>
                        <option value="shingle">shingle: A measurement of the shingle overlap between the target and the query</option>
                        <option value="number">number: The alignment number, counted as alignments are written to output (1-base)</option>
                        <option value="znumber">znumber: The alignment number, counted as alignments are written to output (0-base)</option>
                        <sanitizer invalid_char="">
                            <valid initial="string.letters,string.digits"><add value="%" /> </valid>
                        </sanitizer>
                    </param>
                </when>
                <when value="maf">
                    <param name="maf_type" type="select" display="radio" argument="--format=maf" label="Seleat MAF flavor" help="MAF is a multiple alignment format developed at UCSC">
                        <option value="maf">MAF</option>
                        <option value="maf+">MAF with additional stats</option>
                        <option value="maf-">MAF without header and comments</option>
                    </param>
                </when>
                <when value="blastn">
                    <!-- Do nothing -->
                </when>
                <when value="differences">
                    <!-- Do nothing -->
                </when>
            </conditional>
            <param name="rplot" type="boolean" truevalue="--rdotplot=plot.r" falsevalue="" checked="false" argument="--rdotplot" label="Create a dotplot representation of alignments?" help="The dotplot is only useful if query and target contain exactly one sequence each"/>
        </section>
    </inputs>
    <outputs>
        <data format="tabular" name="output" label="${tool.name} on ${on_string}: mapped reads">
            <change_format>
                <when input="output_format.out.format" value="bam" format="bam" />
                <when input="output_format.out.format" value="maf" format="maf" />
                <when input="output_format.out.format" value="differences" format="interval" />
            </change_format>
        </data>
        <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot">
            <filter>output_format['rplot']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="ref_source" value="cached" />
            <param name="target_2bit" value="phiX174" />
            <param name="query" value="phiX_split.fasta" />
            <param name="strand" value="--strand=both" />
            <param name="format" value="general_def" />
            <output name="output" value="test1.out" />
        </test>
        <test expect_num_outputs="1">
            <param name="ref_source" value="history" />
            <param name="target" value="phiX_split.fasta" />
            <param name="query" value="phiX.fasta" />
            <param name="strand" value="--strand=both" />
            <param name="format" value="general_def" />
            <output name="output" value="test2.out" />
        </test>
        <test expect_num_outputs="1">
            <param name="ref_source" value="history" />
            <param name="target" value="phiX_split.fasta" />
            <param name="query" value="phiX.fasta" />
            <param name="strand" value="--strand=both" />
            <param name="score_file" value="score_file.txt" />
            <param name="format" value="general_def" />
            <output name="output" value="test3.out" />
        </test>
        <test expect_num_outputs="1">
            <param name="ref_source" value="history" />
            <param name="target" value="chrM_mouse.fa.gz" />
            <param name="query" value="chrM_human.fa.gz" />
            <param name="strand" value="--strand=both" />
            <param name="format" value="blastn" />
            <output name="output" value="test4.out" />
        </test>
        <test expect_num_outputs="1">
            <param name="ref_source" value="history" />
            <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" />
            <param name="query" ftype="fastq.bz2" value="chrM_mouse.fq.bz2" />
            <param name="strand" value="--strand=both" />
            <param name="format" value="blastn" />
            <output name="output" value="test5.out" />
        </test>
        <test expect_num_outputs="1">
            <param name="ref_source" value="history" />
            <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" />
            <param name="query" ftype="fastq.gz" value="chrM_mouse.fq.gz" />
            <param name="strand" value="--strand=both" />
            <param name="format" value="blastn" />
            <output name="output" value="test5.out" />
        </test>
        <test expect_num_outputs="1">
            <param name="ref_source" value="history" />
            <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" />
            <param name="query" ftype="fastq" value="chrM_mouse.fq" />
            <param name="lastz_32" value="true" />
            <param name="strand" value="--strand=both" />
            <param name="format" value="blastn" />
            <output name="output" value="test5.out" />
        </test>
        <test expect_num_outputs="1">
            <param name="ref_source" value="cached" />
            <param name="target_2bit" value="phiX174" />
            <param name="query" value="phiX_split.fasta" />
            <param name="strand" value="--strand=both" />
            <param name="format" value="differences" />
            <output name="output" value="test6.out" />
        </test>
    </tests>

    <help><![CDATA[

**What is does**

LASTZ is designed to preprocess one sequence or set of sequences (which we collectively call the *TARGET*) and then align several *QUERY* sequences to it. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State.

.. class:: infomark

**Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. Default parameters may be sufficient to obtain the initial idea about how similar your sequences are, but to produce reliable alignments you may need to tweak the parameters. Read the manual.

.. class:: warningmark

Galaxy version of LASTZ sets **--ambiguous=iupac** as default (see **Scoring** section). This prevents LASTZ from erroring out if one of the DNA inputrs contains "non-standard" nucleotides.

**About LASTZ parameters**

Galaxy's version of LASTZ has nine parameter sections (*Where to look*, *Scoring*, *Seeding*, *HSPs*, *Chaining*, *Gapped extension*, *Filtering*, *Interpolation*, and *Output*). These sections closely follow parameter description in the `manual <https://lastz.github.io/lastz/#syntax>`_.

**Defaults**

here are defaults for some of the most important parameters::

    --seed=<pattern>       set seed pattern (12of19, 14of22, or general pattern)
                           (default is 1110100110010101111)
                           SEE "Seeding" SECTION -> "Select seed type"

    --[no]transition       allow (or don't) one transition in a seed hit
                           (by default a transition is allowed)
                           SEE "Seeding" SECTION -> "Allow transitions"

    --[no]chain            perform chaining
                           (by default no chaining is performed)
                           SEE "Chaining" SECTION

    --[no]gapped           perform gapped alignment (instead of gap-free)
                           (by default gapped alignment is performed)
                           SEE "Gapped extension" SECTION

    --strand=both          search both strands
    --strand=plus          search + strand only (matching strand of query spec)
                           (by default both strands are searched)
                           SEE "Where to look" SECTION

    --scores=<file>        read substitution and gap scores from a file
                           SEE "Scoring" SECTION

    --xdrop=<score>        set x-drop threshold (default is 10sub[A][A])
                           SEE "HSPs" SECTION

    --ydrop=<score>        set y-drop threshold (default is open+300extend)
                           SEE "Gapped extension" SECTION

    --hspthresh=<score>    set threshold for high scoring pairs (default is 3000)
                           ungapped extensions scoring lower are discarded
                           <score> can also be a percentage or base count
                           SEE "HSPs" SECTION

    --gappedthresh=<score> set threshold for gapped alignments
                           gapped extensions scoring lower are discarded
                           <score> can also be a percentage or base count
                           (default is to use same value as --hspthresh)
                           SEE "Gapped extension" SECTION


**Substitution matrix**

By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_)::

    bad_score          = X:-1000  # used for sub['X'][*] and sub[*]['X']
    fill_score         = -100     # used when sub[*][*] is not defined
    gap_open_penalty   =  400
    gap_extend_penalty =   30

         A     C     G     T
    A   91  -114   -31  -123
    C -114   100  -125   -31
    G  -31  -125   100  -114
    T -123   -31  -114    91

Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores).

**Output**

This version of LASTZ produces one output by default: a BAM alignment file. Other formats as well as a Dot Plot can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with '#'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_.

        ]]>
    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Sun, 02 Jul 2023 12:10:29 +0000
parents	86f2c3695694
children