comparison bwa-mem.xml @ 17:23e88ff6c494 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bwa commit e953b3b7dac6cbe9509fdc673907a7c2c7183180
author iuc
date Wed, 19 Mar 2025 17:24:58 +0000
parents 22b497739c9c
children 52f5f04041f2
comparison
equal deleted inserted replaced
16:47c6967dc4e0 17:23e88ff6c494
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="bwa_mem" name="Map with BWA-MEM" version="@VERSION@.2"> 2 <tool id="bwa_mem" name="Map with BWA-MEM" version="@TOOL_VERSION@" profile="22.05">
3 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description> 3 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
4 <xrefs>
5 <xref type="bio.tools">bwa</xref>
6 </xrefs>
7 <macros> 4 <macros>
8 <import>read_group_macros.xml</import> 5 <import>read_group_macros.xml</import>
9 <import>bwa_macros.xml</import> 6 <import>bwa_macros.xml</import>
10 </macros> 7 </macros>
11 <expand macro="requirements"/> 8 <expand macro="bio_tools"/>
9 <expand macro="requirements">
10 <requirement type="package" version="1.13">samtools</requirement>
11 </expand>
12 <expand macro="stdio"/> 12 <expand macro="stdio"/>
13 <command><![CDATA[ 13 <command><![CDATA[
14 @pipefail@ 14 @pipefail@
15 @set_reference_fasta_filename@ 15 @set_reference_fasta_filename@
16 16
98 #if str( $fastq_input.iset_stats ): 98 #if str( $fastq_input.iset_stats ):
99 -I '${fastq_input.iset_stats}' 99 -I '${fastq_input.iset_stats}'
100 #end if 100 #end if
101 101
102 '${reference_fasta_filename}' 102 '${reference_fasta_filename}'
103 '${fastq_input.fastq_input1}' '${fastq_input.fastq_input2}' 103 '${fastq_input.fastq_input1}'
104 '${fastq_input.fastq_input2}'
104 #elif str( $fastq_input.fastq_input_selector ) == "paired_collection": 105 #elif str( $fastq_input.fastq_input_selector ) == "paired_collection":
105 ## check that insert statistics is used 106 ## check that insert statistics is used
106 #if str( $fastq_input.iset_stats ): 107 #if str( $fastq_input.iset_stats ):
107 -I '${fastq_input.iset_stats}' 108 -I '${fastq_input.iset_stats}'
108 #end if 109 #end if
109 110
110 '${reference_fasta_filename}' 111 '${reference_fasta_filename}'
111 '${fastq_input.fastq_input1.forward}' '${fastq_input.fastq_input1.reverse}' 112 '${fastq_input.fastq_input1.forward}'
113 '${fastq_input.fastq_input1.reverse}'
112 #else: 114 #else:
113 '${reference_fasta_filename}' 115 '${reference_fasta_filename}'
114 '${fastq_input.fastq_input1}' 116 '${fastq_input.fastq_input1}'
115 #end if 117 #end if
116 118
165 </conditional> 167 </conditional>
166 168
167 <expand macro="read_group_conditional" /> 169 <expand macro="read_group_conditional" />
168 170
169 <conditional name="analysis_type"> 171 <conditional name="analysis_type">
170 <param name="analysis_type_selector" type="select" label="Select analysis mode"> 172 <param name="analysis_type_selector" type="select" label="Select analysis mode" help="Please note that minimap2 is recommended over BWA as the aligner for long-read or contig data, for which it outperforms BWA in speed and typically in accuracy (see tool help below).">
171 <option value="illumina">1.Simple Illumina mode</option> 173 <option value="illumina">1.Simple Illumina mode</option>
172 <option value="pacbio">2.PacBio mode (-x pacbio)</option> 174 <option value="pacbio">2.PacBio mode (-x pacbio)</option>
173 <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option> 175 <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option>
174 <option value="intractg">4.Intra-species contigs mode (-x intractg)</option> 176 <option value="intractg">4.Intra-species contigs mode (-x intractg)</option>
175 <option value="full">5.Full list of options</option> 177 <option value="full">5.Full list of options</option>
277 <when input="output_sort" value="name" format="qname_sorted.bam" /> 279 <when input="output_sort" value="name" format="qname_sorted.bam" />
278 <when input="output_sort" value="unsorted" format="qname_input_sorted.bam" /> 280 <when input="output_sort" value="unsorted" format="qname_input_sorted.bam" />
279 </change_format> 281 </change_format>
280 </data> 282 </data>
281 </outputs> 283 </outputs>
282
283 <tests> 284 <tests>
284 <test> 285 <!-- `samtools sort` in the new update adds PG lines to the output so the lines_diff is changed from "2" to "4" -->
286 <test expect_num_outputs="1">
285 <param name="reference_source_selector" value="history" /> 287 <param name="reference_source_selector" value="history" />
286 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 288 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
287 <param name="fastq_input_selector" value="paired"/> 289 <param name="fastq_input_selector" value="paired"/>
288 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> 290 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
289 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> 291 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
290 <param name="analysis_type_selector" value="illumina"/> 292 <param name="analysis_type_selector" value="illumina"/>
291 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> 293 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
292 </test> 294 </test>
293 <test> 295 <test expect_num_outputs="1">
294 <param name="reference_source_selector" value="history" /> 296 <param name="reference_source_selector" value="history" />
295 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 297 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
296 <param name="fastq_input_selector" value="single"/> 298 <param name="fastq_input_selector" value="single"/>
297 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/> 299 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/>
298 <param name="analysis_type_selector" value="illumina"/> 300 <param name="analysis_type_selector" value="illumina"/>
299 <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="2" /> 301 <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="4" />
300 </test> 302 </test>
301 <test> 303 <test expect_num_outputs="1">
302 <param name="reference_source_selector" value="history" /> 304 <param name="reference_source_selector" value="history" />
303 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 305 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
304 <param name="fastq_input_selector" value="paired"/> 306 <param name="fastq_input_selector" value="paired"/>
305 <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/> 307 <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
306 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> 308 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
307 <param name="analysis_type_selector" value="illumina"/> 309 <param name="analysis_type_selector" value="illumina"/>
308 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> 310 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
309 </test> 311 </test>
310 <test> 312 <test expect_num_outputs="1">
311 <param name="reference_source_selector" value="history" /> 313 <param name="reference_source_selector" value="history" />
312 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 314 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
313 <param name="index_a" value="is"/> 315 <param name="index_a" value="is"/>
314 <param name="fastq_input_selector" value="paired"/> 316 <param name="fastq_input_selector" value="paired"/>
315 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> 317 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
317 <param name="rg_selector" value="set"/> 319 <param name="rg_selector" value="set"/>
318 <param name="ID" value="rg1"/> 320 <param name="ID" value="rg1"/>
319 <param name="PL" value="CAPILLARY"/> 321 <param name="PL" value="CAPILLARY"/>
320 <param name="LB" value="AARDVARK-1" /> 322 <param name="LB" value="AARDVARK-1" />
321 <param name="analysis_type_selector" value="illumina"/> 323 <param name="analysis_type_selector" value="illumina"/>
322 <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" /> 324 <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="4" />
323 </test> 325 </test>
324 <test> 326 <test expect_num_outputs="1">
325 <param name="reference_source_selector" value="history" /> 327 <param name="reference_source_selector" value="history" />
326 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 328 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
327 <param name="fastq_input_selector" value="paired"/> 329 <param name="fastq_input_selector" value="paired"/>
328 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> 330 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
329 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> 331 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
330 <param name="analysis_type_selector" value="illumina"/> 332 <param name="analysis_type_selector" value="illumina"/>
331 <param name="output_sort" value="unsorted"/> 333 <param name="output_sort" value="unsorted"/>
332 <output name="bam_output" ftype="qname_input_sorted.bam" file="bwa-mem-test3.bam" lines_diff="2" /> 334 <output name="bam_output" ftype="qname_input_sorted.bam" file="bwa-mem-test3.bam" lines_diff="4" />
333 </test> 335 </test>
334 <test> 336 <test expect_num_outputs="1">
335 <param name="reference_source_selector" value="history" /> 337 <param name="reference_source_selector" value="history" />
336 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 338 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
337 <param name="fastq_input_selector" value="paired"/> 339 <param name="fastq_input_selector" value="paired"/>
338 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> 340 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
339 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> 341 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
340 <param name="analysis_type_selector" value="illumina"/> 342 <param name="analysis_type_selector" value="illumina"/>
341 <param name="output_sort" value="name"/> 343 <param name="output_sort" value="name"/>
342 <output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="2" /> 344 <output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="4" />
345 </test>
346 <test expect_num_outputs="1">
347 <param name="reference_source_selector" value="history" />
348 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
349 <conditional name="fastq_input">
350 <param name="fastq_input_selector" value="paired_collection"/>
351 <param name="fastq_input1">
352 <collection type="paired">
353 <element name="forward" value="bwa-mem-fastq1.fq" />
354 <element name="reverse" value="bwa-mem-fastq2.fq" />
355 </collection>
356 </param>
357 </conditional>
358 <conditional name="analysis_type">
359 <param name="analysis_type_selector" value="illumina"/>
360 </conditional>
361 <param name="output_sort" value="name"/>
362 <output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="4" />
343 </test> 363 </test>
344 </tests> 364 </tests>
345 <help><![CDATA[ 365 <help><![CDATA[
346 **What is does** 366
367 **What it does**
368
369 This Galaxy tool wraps the bwa-mem module of the BWA_ read mapping tool. For more details about the different modules of the BWA package see the `BWA manual`_.
370
371 The Galaxy implementation takes fastq files as input and produces output in BAM format, which can be further processed using various BAM utilities existing in Galaxy (BAMTools, SAMTools, Picard).
347 372
348 From http://arxiv.org/abs/1303.3997: 373 From http://arxiv.org/abs/1303.3997:
349 374
350 BWA-MEM is an alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human. 375 BWA-MEM is an alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human.
351 It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment. 376 It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment.
352 The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases. 377 The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases.
353 378
354 This Galaxy tool wraps bwa-mem module of bwa read mapping tool. The Galaxy implementation takes fastq files as input and produces output in BAM format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
355
356 ----- 379 -----
357 380
358 **Indices: Selecting reference genomes for BWA** 381 @ref_genomes@
359
360 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:
361
362 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against.
363 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`.
364
365 If your genome of interest is not listed here you have two choices:
366
367 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
368 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.
369 382
370 ----- 383 -----
371 384
372 **Galaxy-specific option** 385 **Analysis modes**
373 386
374 Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are: 387 The tool supports different preconfigured analysis modes optimized for different types of input data. Alternatively, it allows you to take full control over all available options.
375 388
376 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] 389 The preconfigured modes are:
377 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format> 390
378 3. *Full list of options*: Allows access to all options through Galaxy interface. 391 1. *Simple Illumina mode*
379 392
380 ----- 393 This corresponds to the simplest possible and standard bwa mem application in which it aligns single or paired-end data to a reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2]
394 2. *PacBio mode*
395
396 This mode is adjusted specifically for mapping of long PacBio subreads. It is running bwa mame with the `-x pacbio` option.
397 3. *Nanopore 2D-reads mode*
398
399 This mode is running bwa mem with the `-x ont2d` option.
400 4. *Intra-sepcies contigs mode*
401
402 This mode is running bwa mem with the `-x intractg` option.
403
404 .. class:: infomark
405
406 Please note: minimap2_ is recommended over and outperforms BWA-MEM for most types of input data except for Illumina short reads. For Illumina short-read mapping you may also consider using `BWA-MEM2`_, which is about twice as fast as BWA-MEM.
407
408 -----
381 409
382 **Bam sorting mode** 410 **Bam sorting mode**
383 411
384 The generated bam files can be sorted according to three criteria: coordinates, names and input order. 412 The generated bam files can be sorted according to three criteria: coordinates, names and input order.
385 413
390 Finally, the *No sorted (sorted as input)* option yield a BAM file in which the records are sorted in an order corresponding to the order of the reads in the original input file. This option requires using a single thread to perform the conversion from SAM to BAM format, so the runtime is extended. 418 Finally, the *No sorted (sorted as input)* option yield a BAM file in which the records are sorted in an order corresponding to the order of the reads in the original input file. This option requires using a single thread to perform the conversion from SAM to BAM format, so the runtime is extended.
391 419
392 420
393 @RG@ 421 @RG@
394 422
395 @info@ 423 @links@
424 .. _minimap2: https://github.com/lh3/minimap2
425 .. _`BWA-MEM2`: https://github.com/bwa-mem2/bwa-mem2
396 ]]></help> 426 ]]></help>
397 <citations> 427 <citations>
398 <citation type="doi">10.1093/bioinformatics/btp324</citation> 428 <citation type="doi">10.1093/bioinformatics/btp324</citation>
399 <citation type="doi">10.1093/bioinformatics/btp698</citation> 429 <citation type="doi">10.1093/bioinformatics/btp698</citation>
400 <citation type="bibtex">@misc{1303.3997, 430 <citation type="bibtex">@misc{1303.3997,