Mercurial > repos > devteam > lastz
comparison lastz.xml @ 4:0acd9701676b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lastz commit a7e9d5b3906b7ebb35b1c29c3a8e8203b2cefccd
author | iuc |
---|---|
date | Fri, 18 May 2018 16:58:24 -0400 |
parents | c3767eaae954 |
children | ec4affe27298 |
comparison
equal
deleted
inserted
replaced
3:c3767eaae954 | 4:0acd9701676b |
---|---|
1 <tool id="lastz_wrapper_2" name="LASTZ" version="1.3.1"> | 1 <tool id="lastz_wrapper_2" name="LASTZ" version="1.3.2"> |
2 <description>: align long sequences</description> | 2 <description>: align long sequences</description> |
3 <macros> | 3 <macros> |
4 <import>lastz_macros.xml</import> | 4 <import>lastz_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
236 | 236 |
237 #if $interpolation.inner: | 237 #if $interpolation.inner: |
238 '--inner=${interpolation.inner}' | 238 '--inner=${interpolation.inner}' |
239 #end if | 239 #end if |
240 | 240 |
241 ## HOUSEKEEPING ---------------------------------- | |
242 | |
243 --traceback=160M | |
244 | |
245 | |
241 ## OUTPUT FORMATS -------------------------------- | 246 ## OUTPUT FORMATS -------------------------------- |
242 | 247 |
243 #if str( $output_format.out.format ) == "bam": | 248 #if str( $output_format.out.format ) == "bam": |
244 '--format=${output_format.out.bam_options}' | 249 '--format=${output_format.out.bam_options}' |
245 #elif str( $output_format.out.format ) == "general_def": | 250 #elif str( $output_format.out.format ) == "general_def": |
250 --format=BLASTN- | 255 --format=BLASTN- |
251 #elif str( $output_format.out.format ) == "general_full": | 256 #elif str( $output_format.out.format ) == "general_full": |
252 '--format=general-:${output_format.out.fields}' | 257 '--format=general-:${output_format.out.fields}' |
253 #end if | 258 #end if |
254 --action:target=multiple | 259 --action:target=multiple |
255 --rdotplot=plot.r | 260 $output_format.rplot |
256 #if str( $output_format.out.format ) == "bam": | 261 #if str( $output_format.out.format ) == "bam": |
257 | samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '${output}' && | 262 | samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '${output}' |
258 #else: | 263 #else: |
259 > '${output}' && | 264 > '${output}' |
260 #end if | 265 #end if |
261 Rscript $r_plot > /dev/null 2>&1 | 266 #if $output_format.rplot: |
267 && | |
268 Rscript $r_plot > /dev/null 2>&1 | |
269 #end if | |
262 | 270 |
263 ]]> | 271 ]]> |
264 </command> | 272 </command> |
265 <configfiles> | 273 <configfiles> |
266 <configfile name="r_plot"> | 274 <configfile name="r_plot"> |
347 <when value="no"> | 355 <when value="no"> |
348 <!-- Do nothing --> | 356 <!-- Do nothing --> |
349 </when> | 357 </when> |
350 </conditional> | 358 </conditional> |
351 <param name="ambigN" type="boolean" truevalue="--ambiguous=n" checked="false" label="Treat each N in the input sequences as an ambiguous nucleotide" argument="--ambiguous=n" help="Substitutions with N are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/> | 359 <param name="ambigN" type="boolean" truevalue="--ambiguous=n" checked="false" label="Treat each N in the input sequences as an ambiguous nucleotide" argument="--ambiguous=n" help="Substitutions with N are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/> |
352 <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="false" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/> | 360 <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="true" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/> |
353 </section> | 361 </section> |
354 <section name="seeding" expanded="false" title="Seeding"> | 362 <section name="seeding" expanded="false" title="Seeding"> |
355 <conditional name="seed"> | 363 <conditional name="seed"> |
356 <param name="seed_selector" type="select" display="radio" label="Select seed type"> | 364 <param name="seed_selector" type="select" display="radio" label="Select seed type"> |
357 <option value="defaults" selected="true">Use defaults</option> | 365 <option value="defaults" selected="true">Use defaults</option> |
558 </when> | 566 </when> |
559 <when value="blastn"> | 567 <when value="blastn"> |
560 <!-- Do nothing --> | 568 <!-- Do nothing --> |
561 </when> | 569 </when> |
562 </conditional> | 570 </conditional> |
571 <param name="rplot" type="boolean" truevalue="--rdotplot=plot.r" falsevalue="" checked="false" argument="--rdotplot" label="Create a dotplot representation of alignments?" help="The dotplot is only useful if query and target contain exactly one sequence each"/> | |
563 </section> | 572 </section> |
564 </inputs> | 573 </inputs> |
565 <outputs> | 574 <outputs> |
566 <data format="tabular" name="output" label="${tool.name} on ${on_string}: mapped reads"> | 575 <data format="tabular" name="output" label="${tool.name} on ${on_string}: mapped reads"> |
567 <change_format> | 576 <change_format> |
568 <when input="output_format.out.format" value="bam" format="bam" /> | 577 <when input="output_format.out.format" value="bam" format="bam" /> |
569 <when input="output_format.out.format" value="maf" format="maf" /> | 578 <when input="output_format.out.format" value="maf" format="maf" /> |
570 </change_format> | 579 </change_format> |
571 </data> | 580 </data> |
572 <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot"/> | 581 <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot"> |
582 <filter>output_format['rplot']</filter> | |
583 </data> | |
573 </outputs> | 584 </outputs> |
574 <tests> | 585 <tests> |
575 <test> | 586 <test> |
576 <param name="ref_source" value="cached" /> | 587 <param name="ref_source" value="cached" /> |
577 <param name="target_2bit" value="phiX174" /> | 588 <param name="target_2bit" value="phiX174" /> |
607 </test> | 618 </test> |
608 <test> | 619 <test> |
609 <param name="ref_source" value="history" /> | 620 <param name="ref_source" value="history" /> |
610 <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" /> | 621 <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" /> |
611 <param name="query" ftype="fastq.bz2" value="chrM_mouse.fq.bz2" /> | 622 <param name="query" ftype="fastq.bz2" value="chrM_mouse.fq.bz2" /> |
623 <param name="traceback" value="83886080" /> | |
624 <param name="word" value="28" /> | |
612 <param name="strand" value="--strand=both" /> | 625 <param name="strand" value="--strand=both" /> |
613 <param name="format" value="blastn" /> | 626 <param name="format" value="blastn" /> |
614 <output name="output" value="test5.out" /> | 627 <output name="output" value="test5.out" /> |
615 </test> | 628 </test> |
616 <test> | 629 <test> |
631 </test> | 644 </test> |
632 </tests> | 645 </tests> |
633 | 646 |
634 <help><![CDATA[ | 647 <help><![CDATA[ |
635 | 648 |
636 **What is does** | 649 **What is does** |
637 | 650 |
638 LASTZ is designed to preprocess one sequence or set of sequences (which we collectively call the *TARGET*) and then align several *QUERY* sequences to it. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State. | 651 LASTZ is designed to preprocess one sequence or set of sequences (which we collectively call the *TARGET*) and then align several *QUERY* sequences to it. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State. |
639 | 652 |
640 .. class:: warningmark | 653 .. class:: infomark |
641 | 654 |
642 **Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. Default parameters may be sufficient to obtain the initial idea about how similar your sequences are, but to produce reliable alignments you may need to tweak the parameters. So RTFM! | 655 **Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. Default parameters may be sufficient to obtain the initial idea about how similar your sequences are, but to produce reliable alignments you may need to tweak the parameters. So RTFM! |
643 | 656 |
644 **About LASTZ parameters** | 657 .. class:: warningmark |
645 | 658 |
646 Galaxy's version of LASTZ has nine parameter sections (*Where to look*, *Scoring*, *Seeding*, *HSPs*, *Chaining*, *Gapped extension*, *Filtering*, *Interpolation*, and *Output*). These sections closely follow parameter description in the `manual <https://lastz.github.io/lastz/#syntax>`_. | 659 Galaxy version of LASTZ sets **--ambiguous=iupac** as default (see **Scoring** section). This prevents LASTZ from erroring out if one of the DNA inputrs contains "non-strandard" nucleotides. |
647 | 660 |
648 **Defaults** | 661 **About LASTZ parameters** |
649 | 662 |
650 here are defaults for some of the most important parameters:: | 663 Galaxy's version of LASTZ has nine parameter sections (*Where to look*, *Scoring*, *Seeding*, *HSPs*, *Chaining*, *Gapped extension*, *Filtering*, *Interpolation*, and *Output*). These sections closely follow parameter description in the `manual <https://lastz.github.io/lastz/#syntax>`_. |
651 | 664 |
652 --seed=<pattern> set seed pattern (12of19, 14of22, or general pattern) | 665 **Defaults** |
653 (default is 1110100110010101111) | 666 |
654 SEE "Seeding" SECTION -> "Select seed type" | 667 here are defaults for some of the most important parameters:: |
655 | 668 |
656 --[no]transition allow (or don't) one transition in a seed hit | 669 --seed=<pattern> set seed pattern (12of19, 14of22, or general pattern) |
657 (by default a transition is allowed) | 670 (default is 1110100110010101111) |
658 SEE "Seeding" SECTION -> "Allow transitions" | 671 SEE "Seeding" SECTION -> "Select seed type" |
659 | 672 |
660 --[no]chain perform chaining | 673 --[no]transition allow (or don't) one transition in a seed hit |
661 (by default no chaining is performed) | 674 (by default a transition is allowed) |
662 SEE "Chaining" SECTION | 675 SEE "Seeding" SECTION -> "Allow transitions" |
663 | 676 |
664 --[no]gapped perform gapped alignment (instead of gap-free) | 677 --[no]chain perform chaining |
665 (by default gapped alignment is performed) | 678 (by default no chaining is performed) |
666 SEE "Gapped extension" SECTION | 679 SEE "Chaining" SECTION |
667 | 680 |
668 --strand=both search both strands | 681 --[no]gapped perform gapped alignment (instead of gap-free) |
669 --strand=plus search + strand only (matching strand of query spec) | 682 (by default gapped alignment is performed) |
670 (by default both strands are searched) | 683 SEE "Gapped extension" SECTION |
671 SEE "Where to look" SECTION | 684 |
672 | 685 --strand=both search both strands |
673 --scores=<file> read substitution and gap scores from a file | 686 --strand=plus search + strand only (matching strand of query spec) |
674 SEE "Scoring" SECTION | 687 (by default both strands are searched) |
675 | 688 SEE "Where to look" SECTION |
676 --xdrop=<score> set x-drop threshold (default is 10sub[A][A]) | 689 |
677 SEE "HSPs" SECTION | 690 --scores=<file> read substitution and gap scores from a file |
678 | 691 SEE "Scoring" SECTION |
679 --ydrop=<score> set y-drop threshold (default is open+300extend) | 692 |
680 SEE "Gapped extension" SECTION | 693 --xdrop=<score> set x-drop threshold (default is 10sub[A][A]) |
681 | 694 SEE "HSPs" SECTION |
682 --hspthresh=<score> set threshold for high scoring pairs (default is 3000) | 695 |
683 ungapped extensions scoring lower are discarded | 696 --ydrop=<score> set y-drop threshold (default is open+300extend) |
684 <score> can also be a percentage or base count | 697 SEE "Gapped extension" SECTION |
685 SEE "HSPs" SECTION | 698 |
686 | 699 --hspthresh=<score> set threshold for high scoring pairs (default is 3000) |
687 --gappedthresh=<score> set threshold for gapped alignments | 700 ungapped extensions scoring lower are discarded |
688 gapped extensions scoring lower are discarded | 701 <score> can also be a percentage or base count |
689 <score> can also be a percentage or base count | 702 SEE "HSPs" SECTION |
690 (default is to use same value as --hspthresh) | 703 |
691 SEE "Gapped extension" SECTION | 704 --gappedthresh=<score> set threshold for gapped alignments |
692 | 705 gapped extensions scoring lower are discarded |
693 | 706 <score> can also be a percentage or base count |
694 **Substitution matrix** | 707 (default is to use same value as --hspthresh) |
695 | 708 SEE "Gapped extension" SECTION |
696 By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_):: | 709 |
697 | 710 |
698 bad_score = X:-1000 # used for sub['X'][*] and sub[*]['X'] | 711 **Substitution matrix** |
699 fill_score = -100 # used when sub[*][*] is not defined | 712 |
700 gap_open_penalty = 400 | 713 By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_):: |
701 gap_extend_penalty = 30 | 714 |
702 | 715 bad_score = X:-1000 # used for sub['X'][*] and sub[*]['X'] |
703 A C G T | 716 fill_score = -100 # used when sub[*][*] is not defined |
704 A 91 -114 -31 -123 | 717 gap_open_penalty = 400 |
705 C -114 100 -125 -31 | 718 gap_extend_penalty = 30 |
706 G -31 -125 100 -114 | 719 |
707 T -123 -31 -114 91 | 720 A C G T |
708 | 721 A 91 -114 -31 -123 |
709 Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores). | 722 C -114 100 -125 -31 |
710 | 723 G -31 -125 100 -114 |
711 **Output** | 724 T -123 -31 -114 91 |
712 | 725 |
713 This version of LASTZ produces two outputs by default: a BAM alignment file and a dot-plot in PNG format. Other formats can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with '#'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_. | 726 Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores). |
727 | |
728 **Output** | |
729 | |
730 This version of LASTZ produces one output by default: a BAM alignment file. Other formats as well as a Dot Plot can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with '#'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_. | |
714 | 731 |
715 ]]> | 732 ]]> |
716 </help> | 733 </help> |
717 <expand macro="citations"/> | 734 <expand macro="citations"/> |
718 </tool> | 735 </tool> |