comparison data_manager/kraken2_build_database.xml @ 14:1663e897c832 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_build_kraken2_database commit 797b5fc5c1b1eb7903c2ab681f399777b77501e3
author iuc
date Thu, 19 Feb 2026 12:13:34 +0000
parents 150ea60a4ebc
children
comparison
equal deleted inserted replaced
13:150ea60a4ebc 14:1663e897c832
1 <tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 1 <tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>database builder</description> 2 <description>database builder</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">2.1.6</token> 4 <token name="@TOOL_VERSION@">2.17.1</token>
5 <token name="@VERSION_SUFFIX@">0</token> 5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">24.0</token> 6 <token name="@PROFILE@">24.0</token>
7 <xml name="common_params"> 7 <xml name="common_params">
8 <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" /> 8 <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" />
9 <param name="minimizer_len" type="integer" value="31" label="Minimizer length" /> 9 <param name="minimizer_len" type="integer" value="31" label="Minimizer length" />
54 <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command> 54 <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command>
55 <command detect_errors="exit_code"><![CDATA[ 55 <command detect_errors="exit_code"><![CDATA[
56 #import datetime 56 #import datetime
57 #import re 57 #import re
58 58
59 #set now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") 59 #set now = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H%M%SZ")
60 #set commands = [] 60 #set commands = []
61 mkdir '$out_file.extra_files_path' && 61 mkdir '$out_file.extra_files_path' &&
62 62
63 #if $database_type.database_type == "standard_prebuilt" or $database_type.database_type == "special_prebuilt" or $database_type.database_type == "amplicon_prebuilt" 63 #if $database_type.database_type == "standard_prebuilt" or $database_type.database_type == "special_prebuilt" or $database_type.database_type == "amplicon_prebuilt"
64 #set prebuilt_name = { 64 #set prebuilt_name = {
73 'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)", 73 'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)",
74 'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)", 74 'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)",
75 'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)", 75 'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)",
76 } 76 }
77 #set special_name = { 77 #set special_name = {
78 "core_nt_20251015": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (October, 2025)",
78 "core_nt_20250609": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)", 79 "core_nt_20250609": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)",
79 "core_nt_20241228": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)", 80 "core_nt_20241228": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)",
80 "core_nt_20240904": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)", 81 "core_nt_20240904": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)",
81 "gtdb_genome_reps_20250609": "GTDB v226 (Bacterial and archaeal)", 82 "gtdb_genome_reps_20250609": "GTDB v226 (Bacterial and archaeal)",
82 "gtdb_genome_reps_20241109": "GTDB v220 (Bacterial and archaeal)", 83 "gtdb_genome_reps_20241109": "GTDB v220 (Bacterial and archaeal)",
142 #else 143 #else
143 >&2 echo "invalid database_type: $database_type.database_type" 144 >&2 echo "invalid database_type: $database_type.database_type"
144 #end if 145 #end if
145 146
146 #if $database_type.database_type == "custom" 147 #if $database_type.database_type == "custom"
147 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"', 148 #silent command = ["k2", "download-taxonomy",
148 "--download-taxonomy",
149 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'", 149 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'",
150 str($database_type.skip_maps)] 150 str($database_type.skip_maps)]
151 #silent commands.append(" ".join(command)) 151 #silent commands.append(" ".join(command))
152 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"', 152 #silent command = ["k2", "add-to-library", "--threads", '"${GALAXY_SLOTS:-1}"',
153 "--add-to-library", "'" + str($database_type.custom_fasta) + "'", 153 "--file", "'" + str($database_type.custom_fasta) + "'",
154 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"] 154 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"]
155 #silent commands.append(" ".join(command)) 155 #silent commands.append(" ".join(command))
156 #end if 156 #end if
157 157
158 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"'] 158 #silent command = ["k2", "build", "--threads", '"${GALAXY_SLOTS:-1}"']
159 #if $database_type.database_type == "standard_local_build" 159 #if $database_type.database_type == "standard_local_build"
160 #silent command.append("--standard") 160 #silent command.append("--standard")
161 #else if $database_type.database_type == "special" 161 #else if $database_type.database_type == "special"
162 #silent command.extend(["--special", str($database_type.special_database_type)]) 162 #silent command.extend(["--special", str($database_type.special_database_type)])
163 #else if $database_type.database_type == "custom"
164 #silent command.append("--build")
165 #end if 163 #end if
166 #silent command.extend([ 164 #silent command.extend([
167 "--kmer-len", str($database_type.kmer_len), 165 "--kmer-len", str($database_type.kmer_len),
168 "--minimizer-len", str($database_type.minimizer_len), 166 "--minimizer-len", str($database_type.minimizer_len),
169 "--minimizer-spaces", str($database_type.minimizer_spaces), 167 "--minimizer-spaces", str($database_type.minimizer_spaces),
170 "--load-factor", str($database_type.load_factor), 168 "--load-factor", str($database_type.load_factor),
171 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"]) 169 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"])
172 #silent commands.append(" ".join(command)) 170 #silent commands.append(" ".join(command))
173 171
174 #if $database_type.clean 172 #if $database_type.clean
175 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"', 173 #silent command = ["k2", "clean",
176 "--clean",
177 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"] 174 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"]
178 #silent commands.append(" ".join(command)) 175 #silent commands.append(" ".join(command))
179 #end if 176 #end if
180 #end if 177 #end if
181 178
206 <expand macro="common_params" /> 203 <expand macro="common_params" />
207 </when> 204 </when>
208 <when value="standard_prebuilt"> 205 <when value="standard_prebuilt">
209 <conditional name="prebuilt"> 206 <conditional name="prebuilt">
210 <param name="prebuilt_date" type="select" label="Select index build date"> 207 <param name="prebuilt_date" type="select" label="Select index build date">
208 <option value="2025-10-15">October 15, 2025</option>
211 <option value="2025-07-14">July 14, 2025</option> 209 <option value="2025-07-14">July 14, 2025</option>
212 <option value="2024-12-28">December 28, 2024</option> 210 <option value="2024-12-28">December 28, 2024</option>
213 <option value="2024-09-04">September 4, 2024</option> 211 <option value="2024-09-04">September 4, 2024</option>
214 <option value="2024-06-05">June 5, 2024</option> 212 <option value="2024-06-05">June 5, 2024</option>
215 <option value="2024-01-12">January 12, 2024</option> 213 <option value="2024-01-12">January 12, 2024</option>
218 <option value="2021-05-17">May 17, 2021</option> 216 <option value="2021-05-17">May 17, 2021</option>
219 <option value="2021-01-27">January 27, 2021</option> 217 <option value="2021-01-27">January 27, 2021</option>
220 <option value="2020-12-02">December 2, 2020</option> 218 <option value="2020-12-02">December 2, 2020</option>
221 <option value="2020-09-19">September 19, 2020</option> 219 <option value="2020-09-19">September 19, 2020</option>
222 </param> 220 </param>
221 <when value="2025-10-15">
222 <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
223 <expand macro="viral"/>
224 <expand macro="minusb"/>
225 <expand macro="standard"/>
226 <expand macro="standard_08gb"/>
227 <expand macro="standard_16gb"/>
228 <expand macro="pluspf"/>
229 <expand macro="pluspf_08gb"/>
230 <expand macro="pluspf_16gb"/>
231 <expand macro="pluspfp"/>
232 <expand macro="pluspfp_08gb"/>
233 <expand macro="pluspfp_16gb"/>
234 </param>
235 </when>
223 <when value="2025-07-14"> 236 <when value="2025-07-14">
224 <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> 237 <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
225 <expand macro="viral"/> 238 <expand macro="viral"/>
226 <expand macro="minusb"/> 239 <expand macro="minusb"/>
227 <expand macro="standard"/> 240 <expand macro="standard"/>
372 </conditional> 385 </conditional>
373 </when> 386 </when>
374 <when value="special_prebuilt"> 387 <when value="special_prebuilt">
375 <conditional name="prebuilt"> 388 <conditional name="prebuilt">
376 <param name="xyz" type="select" multiple="false" label="Select pre-built database to download"> 389 <param name="xyz" type="select" multiple="false" label="Select pre-built database to download">
390 <option value="core_nt_20251015">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (October, 2025)</option>
377 <option value="core_nt_20250609">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)</option> 391 <option value="core_nt_20250609">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)</option>
378 <option value="core_nt_20241228">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)</option> 392 <option value="core_nt_20241228">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)</option>
379 <option value="core_nt_20240904">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)</option> 393 <option value="core_nt_20240904">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)</option>
380 <option value="gtdb_genome_reps_20250609">GTDB v226 (Bacterial and archaeal; ~497 GB) (July, 2025)</option> 394 <option value="gtdb_genome_reps_20250609">GTDB v226 (Bacterial and archaeal; ~497 GB) (July, 2025)</option>
381 <option value="gtdb_genome_reps_20241109">GTDB v220 (Bacterial and archaeal; ~644 GB) (December 13, 2024)</option> 395 <option value="gtdb_genome_reps_20241109">GTDB v220 (Bacterial and archaeal; ~644 GB) (December 13, 2024)</option>
382 <option value="eupathdb48_20230407">EuPathDB-46 (April 18, 2023)</option> 396 <option value="eupathdb48_20230407">EuPathDB-46 (April 18, 2023)</option>
383 <option value="eupathdb48_20201113">EuPathDB-46 (November 13, 2020)</option> 397 <option value="eupathdb48_20201113">EuPathDB-46 (November 13, 2020)</option>
384 </param> 398 </param>
399 <when value="core_nt_20251015">
400 <param name="prebuilt_db" type="hidden" value="core_nt"/>
401 <param name="prebuilt_date" type="hidden" value="20251015"/>
402 </when>
385 <when value="core_nt_20250609"> 403 <when value="core_nt_20250609">
386 <param name="prebuilt_db" type="hidden" value="core_nt"/> 404 <param name="prebuilt_db" type="hidden" value="core_nt"/>
387 <param name="prebuilt_date" type="hidden" value="20250609"/> 405 <param name="prebuilt_date" type="hidden" value="20250609"/>
388 </when> 406 </when>
389 <when value="core_nt_20241228"> 407 <when value="core_nt_20241228">
458 <param name="custom_source_info" type="text" label="Database source info" help="Concise description of how this build has been sourced. This description will be appended (in parentheses) to the user-facing name of the build. Example: https://doi.org/10.5281/zenodo.8339822, from v1 assembly_summary.txt sequences" /> 476 <param name="custom_source_info" type="text" label="Database source info" help="Concise description of how this build has been sourced. This description will be appended (in parentheses) to the user-facing name of the build. Example: https://doi.org/10.5281/zenodo.8339822, from v1 assembly_summary.txt sequences" />
459 <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> 477 <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." />
460 <expand macro="common_params" /> 478 <expand macro="common_params" />
461 </when> 479 </when>
462 </conditional> 480 </conditional>
463 <param name="run_test_command" type="hidden"/> 481 <param name="run_test_command" type="hidden" />
464 </inputs> 482 </inputs>
465 <outputs> 483 <outputs>
466 <data name="out_file" format="data_manager_json" /> 484 <data name="out_file" format="data_manager_json" />
467 </outputs> 485 </outputs>
468 <tests> 486 <tests>
469 <!-- standard_local_build --> 487 <!-- standard_local_build - test 1 -->
470 488
471 <test expect_num_outputs="1"> 489 <test expect_num_outputs="1">
472 <conditional name="database_type"> 490 <conditional name="database_type">
473 <param name="database_type" value="standard_local_build" /> 491 <param name="database_type" value="standard_local_build" />
474 <param name="kmer_len" value="35" /> 492 <param name="kmer_len" value="35" />
485 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> 503 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/>
486 </assert_contents> 504 </assert_contents>
487 </output> 505 </output>
488 </test> 506 </test>
489 507
490 <!-- standard_prebuilt --> 508 <!-- standard_prebuilt - test 2 -->
491 <test> 509 <test>
492 <conditional name="database_type"> 510 <conditional name="database_type">
493 <param name="database_type" value="standard_prebuilt" /> 511 <param name="database_type" value="standard_prebuilt" />
494 <conditional name="prebuilt"> 512 <conditional name="prebuilt">
495 <param name="prebuilt_date" value="2022-06-07"/> 513 <param name="prebuilt_date" value="2022-06-07"/>
506 <has_text text="Version"/> 524 <has_text text="Version"/>
507 <has_text text="Downloaded"/> 525 <has_text text="Downloaded"/>
508 </assert_contents> 526 </assert_contents>
509 </output> 527 </output>
510 </test> 528 </test>
511 <!-- test with latest version --> 529 <!-- test with latest version - test 3 -->
512 <test> 530 <test>
513 <conditional name="database_type"> 531 <conditional name="database_type">
514 <param name="database_type" value="standard_prebuilt" /> 532 <param name="database_type" value="standard_prebuilt" />
515 <conditional name="prebuilt"> 533 <conditional name="prebuilt">
516 <param name="prebuilt_date" value="2024-01-12"/> 534 <param name="prebuilt_date" value="2024-01-12"/>
527 <has_text text="Version"/> 545 <has_text text="Version"/>
528 <has_text text="Downloaded"/> 546 <has_text text="Downloaded"/>
529 </assert_contents> 547 </assert_contents>
530 </output> 548 </output>
531 </test> 549 </test>
532 <!-- test with first 2024 version --> 550 <!-- test with first 2024 version - test 4 -->
533 <test> 551 <test>
534 <conditional name="database_type"> 552 <conditional name="database_type">
535 <param name="database_type" value="standard_prebuilt" /> 553 <param name="database_type" value="standard_prebuilt" />
536 <conditional name="prebuilt"> 554 <conditional name="prebuilt">
537 <param name="prebuilt_date" value="2024-06-05"/> 555 <param name="prebuilt_date" value="2024-06-05"/>
549 <has_text text="Downloaded"/> 567 <has_text text="Downloaded"/>
550 </assert_contents> 568 </assert_contents>
551 </output> 569 </output>
552 </test> 570 </test>
553 571
554 <!-- minikraken --> 572 <!-- minikraken - test 5 -->
555 573
556 <test> 574 <test>
557 <conditional name="database_type"> 575 <conditional name="database_type">
558 <param name="database_type" value="minikraken" /> 576 <param name="database_type" value="minikraken" />
559 <param name="minikraken2_version" value="v1"/> 577 <param name="minikraken2_version" value="v1"/>
568 <has_text text="Created"/> 586 <has_text text="Created"/>
569 </assert_contents> 587 </assert_contents>
570 </output> 588 </output>
571 </test> 589 </test>
572 590
573 <!-- special_prebuilt --> 591 <!-- special_prebuilt - test 6 -->
574 592
575 <test> 593 <test>
576 <conditional name="database_type"> 594 <conditional name="database_type">
577 <param name="database_type" value="special_prebuilt" /> 595 <param name="database_type" value="special_prebuilt" />
578 <conditional name="prebuilt"> 596 <conditional name="prebuilt">
592 <has_text text="Downloaded"/> 610 <has_text text="Downloaded"/>
593 </assert_contents> 611 </assert_contents>
594 </output> 612 </output>
595 </test> 613 </test>
596 614
597 <!-- amplicon_prebuilt --> 615 <!-- amplicon_prebuilt - test 7 -->
598 616
599 <test> 617 <test>
600 <conditional name="database_type"> 618 <conditional name="database_type">
601 <param name="database_type" value="amplicon_prebuilt" /> 619 <param name="database_type" value="amplicon_prebuilt" />
602 <conditional name="prebuilt"> 620 <conditional name="prebuilt">
613 <has_text text="16S_Greengenes13.5"/> 631 <has_text text="16S_Greengenes13.5"/>
614 </assert_contents> 632 </assert_contents>
615 </output> 633 </output>
616 </test> 634 </test>
617 635
618 <!-- special --> 636 <!-- special - test 8 -->
619 637
620 <test expect_num_outputs="1"> 638 <test expect_num_outputs="1">
621 <conditional name="database_type"> 639 <conditional name="database_type">
622 <param name="database_type" value="special" /> 640 <param name="database_type" value="special" />
623 <param name="special_database_type" value="greengenes" /> 641 <param name="special_database_type" value="greengenes" />
634 <has_text text="Greengenes"/> 652 <has_text text="Greengenes"/>
635 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> 653 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/>
636 </assert_contents> 654 </assert_contents>
637 </output> 655 </output>
638 </test> 656 </test>
657
658 <!-- special - test 9 -->
639 <test expect_num_outputs="1"> 659 <test expect_num_outputs="1">
640 <conditional name="database_type"> 660 <conditional name="database_type">
641 <param name="database_type" value="special" /> 661 <param name="database_type" value="special" />
642 <param name="special_database_type" value="silva" /> 662 <param name="special_database_type" value="silva" />
643 <param name="kmer_len" value="35" /> 663 <param name="kmer_len" value="35" />
654 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> 674 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/>
655 </assert_contents> 675 </assert_contents>
656 </output> 676 </output>
657 </test> 677 </test>
658 678
659 <!-- custom --> 679 <!-- custom - test 10 -->
660 680
661 <test expect_num_outputs="1"> 681 <test expect_num_outputs="1">
662 <conditional name="database_type"> 682 <conditional name="database_type">
663 <param name="database_type" value="custom" /> 683 <param name="database_type" value="custom" />
664 <param name="custom_fasta" value="adapter.fa" /> 684 <param name="custom_fasta" value="adapter.fa" />