Mercurial > repos > iuc > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_database.xml @ 14:1663e897c832 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_build_kraken2_database commit 797b5fc5c1b1eb7903c2ab681f399777b77501e3
| author | iuc |
|---|---|
| date | Thu, 19 Feb 2026 12:13:34 +0000 |
| parents | 150ea60a4ebc |
| children |
comparison
equal
deleted
inserted
replaced
| 13:150ea60a4ebc | 14:1663e897c832 |
|---|---|
| 1 <tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | 1 <tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
| 2 <description>database builder</description> | 2 <description>database builder</description> |
| 3 <macros> | 3 <macros> |
| 4 <token name="@TOOL_VERSION@">2.1.6</token> | 4 <token name="@TOOL_VERSION@">2.17.1</token> |
| 5 <token name="@VERSION_SUFFIX@">0</token> | 5 <token name="@VERSION_SUFFIX@">0</token> |
| 6 <token name="@PROFILE@">24.0</token> | 6 <token name="@PROFILE@">24.0</token> |
| 7 <xml name="common_params"> | 7 <xml name="common_params"> |
| 8 <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" /> | 8 <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" /> |
| 9 <param name="minimizer_len" type="integer" value="31" label="Minimizer length" /> | 9 <param name="minimizer_len" type="integer" value="31" label="Minimizer length" /> |
| 54 <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command> | 54 <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command> |
| 55 <command detect_errors="exit_code"><![CDATA[ | 55 <command detect_errors="exit_code"><![CDATA[ |
| 56 #import datetime | 56 #import datetime |
| 57 #import re | 57 #import re |
| 58 | 58 |
| 59 #set now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 59 #set now = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H%M%SZ") |
| 60 #set commands = [] | 60 #set commands = [] |
| 61 mkdir '$out_file.extra_files_path' && | 61 mkdir '$out_file.extra_files_path' && |
| 62 | 62 |
| 63 #if $database_type.database_type == "standard_prebuilt" or $database_type.database_type == "special_prebuilt" or $database_type.database_type == "amplicon_prebuilt" | 63 #if $database_type.database_type == "standard_prebuilt" or $database_type.database_type == "special_prebuilt" or $database_type.database_type == "amplicon_prebuilt" |
| 64 #set prebuilt_name = { | 64 #set prebuilt_name = { |
| 73 'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)", | 73 'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)", |
| 74 'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)", | 74 'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)", |
| 75 'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)", | 75 'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)", |
| 76 } | 76 } |
| 77 #set special_name = { | 77 #set special_name = { |
| 78 "core_nt_20251015": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (October, 2025)", | |
| 78 "core_nt_20250609": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)", | 79 "core_nt_20250609": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)", |
| 79 "core_nt_20241228": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)", | 80 "core_nt_20241228": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)", |
| 80 "core_nt_20240904": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)", | 81 "core_nt_20240904": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)", |
| 81 "gtdb_genome_reps_20250609": "GTDB v226 (Bacterial and archaeal)", | 82 "gtdb_genome_reps_20250609": "GTDB v226 (Bacterial and archaeal)", |
| 82 "gtdb_genome_reps_20241109": "GTDB v220 (Bacterial and archaeal)", | 83 "gtdb_genome_reps_20241109": "GTDB v220 (Bacterial and archaeal)", |
| 142 #else | 143 #else |
| 143 >&2 echo "invalid database_type: $database_type.database_type" | 144 >&2 echo "invalid database_type: $database_type.database_type" |
| 144 #end if | 145 #end if |
| 145 | 146 |
| 146 #if $database_type.database_type == "custom" | 147 #if $database_type.database_type == "custom" |
| 147 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"', | 148 #silent command = ["k2", "download-taxonomy", |
| 148 "--download-taxonomy", | |
| 149 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'", | 149 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'", |
| 150 str($database_type.skip_maps)] | 150 str($database_type.skip_maps)] |
| 151 #silent commands.append(" ".join(command)) | 151 #silent commands.append(" ".join(command)) |
| 152 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"', | 152 #silent command = ["k2", "add-to-library", "--threads", '"${GALAXY_SLOTS:-1}"', |
| 153 "--add-to-library", "'" + str($database_type.custom_fasta) + "'", | 153 "--file", "'" + str($database_type.custom_fasta) + "'", |
| 154 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"] | 154 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"] |
| 155 #silent commands.append(" ".join(command)) | 155 #silent commands.append(" ".join(command)) |
| 156 #end if | 156 #end if |
| 157 | 157 |
| 158 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"'] | 158 #silent command = ["k2", "build", "--threads", '"${GALAXY_SLOTS:-1}"'] |
| 159 #if $database_type.database_type == "standard_local_build" | 159 #if $database_type.database_type == "standard_local_build" |
| 160 #silent command.append("--standard") | 160 #silent command.append("--standard") |
| 161 #else if $database_type.database_type == "special" | 161 #else if $database_type.database_type == "special" |
| 162 #silent command.extend(["--special", str($database_type.special_database_type)]) | 162 #silent command.extend(["--special", str($database_type.special_database_type)]) |
| 163 #else if $database_type.database_type == "custom" | |
| 164 #silent command.append("--build") | |
| 165 #end if | 163 #end if |
| 166 #silent command.extend([ | 164 #silent command.extend([ |
| 167 "--kmer-len", str($database_type.kmer_len), | 165 "--kmer-len", str($database_type.kmer_len), |
| 168 "--minimizer-len", str($database_type.minimizer_len), | 166 "--minimizer-len", str($database_type.minimizer_len), |
| 169 "--minimizer-spaces", str($database_type.minimizer_spaces), | 167 "--minimizer-spaces", str($database_type.minimizer_spaces), |
| 170 "--load-factor", str($database_type.load_factor), | 168 "--load-factor", str($database_type.load_factor), |
| 171 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"]) | 169 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"]) |
| 172 #silent commands.append(" ".join(command)) | 170 #silent commands.append(" ".join(command)) |
| 173 | 171 |
| 174 #if $database_type.clean | 172 #if $database_type.clean |
| 175 #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"', | 173 #silent command = ["k2", "clean", |
| 176 "--clean", | |
| 177 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"] | 174 "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"] |
| 178 #silent commands.append(" ".join(command)) | 175 #silent commands.append(" ".join(command)) |
| 179 #end if | 176 #end if |
| 180 #end if | 177 #end if |
| 181 | 178 |
| 206 <expand macro="common_params" /> | 203 <expand macro="common_params" /> |
| 207 </when> | 204 </when> |
| 208 <when value="standard_prebuilt"> | 205 <when value="standard_prebuilt"> |
| 209 <conditional name="prebuilt"> | 206 <conditional name="prebuilt"> |
| 210 <param name="prebuilt_date" type="select" label="Select index build date"> | 207 <param name="prebuilt_date" type="select" label="Select index build date"> |
| 208 <option value="2025-10-15">October 15, 2025</option> | |
| 211 <option value="2025-07-14">July 14, 2025</option> | 209 <option value="2025-07-14">July 14, 2025</option> |
| 212 <option value="2024-12-28">December 28, 2024</option> | 210 <option value="2024-12-28">December 28, 2024</option> |
| 213 <option value="2024-09-04">September 4, 2024</option> | 211 <option value="2024-09-04">September 4, 2024</option> |
| 214 <option value="2024-06-05">June 5, 2024</option> | 212 <option value="2024-06-05">June 5, 2024</option> |
| 215 <option value="2024-01-12">January 12, 2024</option> | 213 <option value="2024-01-12">January 12, 2024</option> |
| 218 <option value="2021-05-17">May 17, 2021</option> | 216 <option value="2021-05-17">May 17, 2021</option> |
| 219 <option value="2021-01-27">January 27, 2021</option> | 217 <option value="2021-01-27">January 27, 2021</option> |
| 220 <option value="2020-12-02">December 2, 2020</option> | 218 <option value="2020-12-02">December 2, 2020</option> |
| 221 <option value="2020-09-19">September 19, 2020</option> | 219 <option value="2020-09-19">September 19, 2020</option> |
| 222 </param> | 220 </param> |
| 221 <when value="2025-10-15"> | |
| 222 <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> | |
| 223 <expand macro="viral"/> | |
| 224 <expand macro="minusb"/> | |
| 225 <expand macro="standard"/> | |
| 226 <expand macro="standard_08gb"/> | |
| 227 <expand macro="standard_16gb"/> | |
| 228 <expand macro="pluspf"/> | |
| 229 <expand macro="pluspf_08gb"/> | |
| 230 <expand macro="pluspf_16gb"/> | |
| 231 <expand macro="pluspfp"/> | |
| 232 <expand macro="pluspfp_08gb"/> | |
| 233 <expand macro="pluspfp_16gb"/> | |
| 234 </param> | |
| 235 </when> | |
| 223 <when value="2025-07-14"> | 236 <when value="2025-07-14"> |
| 224 <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> | 237 <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> |
| 225 <expand macro="viral"/> | 238 <expand macro="viral"/> |
| 226 <expand macro="minusb"/> | 239 <expand macro="minusb"/> |
| 227 <expand macro="standard"/> | 240 <expand macro="standard"/> |
| 372 </conditional> | 385 </conditional> |
| 373 </when> | 386 </when> |
| 374 <when value="special_prebuilt"> | 387 <when value="special_prebuilt"> |
| 375 <conditional name="prebuilt"> | 388 <conditional name="prebuilt"> |
| 376 <param name="xyz" type="select" multiple="false" label="Select pre-built database to download"> | 389 <param name="xyz" type="select" multiple="false" label="Select pre-built database to download"> |
| 390 <option value="core_nt_20251015">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (October, 2025)</option> | |
| 377 <option value="core_nt_20250609">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)</option> | 391 <option value="core_nt_20250609">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)</option> |
| 378 <option value="core_nt_20241228">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)</option> | 392 <option value="core_nt_20241228">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)</option> |
| 379 <option value="core_nt_20240904">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)</option> | 393 <option value="core_nt_20240904">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)</option> |
| 380 <option value="gtdb_genome_reps_20250609">GTDB v226 (Bacterial and archaeal; ~497 GB) (July, 2025)</option> | 394 <option value="gtdb_genome_reps_20250609">GTDB v226 (Bacterial and archaeal; ~497 GB) (July, 2025)</option> |
| 381 <option value="gtdb_genome_reps_20241109">GTDB v220 (Bacterial and archaeal; ~644 GB) (December 13, 2024)</option> | 395 <option value="gtdb_genome_reps_20241109">GTDB v220 (Bacterial and archaeal; ~644 GB) (December 13, 2024)</option> |
| 382 <option value="eupathdb48_20230407">EuPathDB-46 (April 18, 2023)</option> | 396 <option value="eupathdb48_20230407">EuPathDB-46 (April 18, 2023)</option> |
| 383 <option value="eupathdb48_20201113">EuPathDB-46 (November 13, 2020)</option> | 397 <option value="eupathdb48_20201113">EuPathDB-46 (November 13, 2020)</option> |
| 384 </param> | 398 </param> |
| 399 <when value="core_nt_20251015"> | |
| 400 <param name="prebuilt_db" type="hidden" value="core_nt"/> | |
| 401 <param name="prebuilt_date" type="hidden" value="20251015"/> | |
| 402 </when> | |
| 385 <when value="core_nt_20250609"> | 403 <when value="core_nt_20250609"> |
| 386 <param name="prebuilt_db" type="hidden" value="core_nt"/> | 404 <param name="prebuilt_db" type="hidden" value="core_nt"/> |
| 387 <param name="prebuilt_date" type="hidden" value="20250609"/> | 405 <param name="prebuilt_date" type="hidden" value="20250609"/> |
| 388 </when> | 406 </when> |
| 389 <when value="core_nt_20241228"> | 407 <when value="core_nt_20241228"> |
| 458 <param name="custom_source_info" type="text" label="Database source info" help="Concise description of how this build has been sourced. This description will be appended (in parentheses) to the user-facing name of the build. Example: https://doi.org/10.5281/zenodo.8339822, from v1 assembly_summary.txt sequences" /> | 476 <param name="custom_source_info" type="text" label="Database source info" help="Concise description of how this build has been sourced. This description will be appended (in parentheses) to the user-facing name of the build. Example: https://doi.org/10.5281/zenodo.8339822, from v1 assembly_summary.txt sequences" /> |
| 459 <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> | 477 <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> |
| 460 <expand macro="common_params" /> | 478 <expand macro="common_params" /> |
| 461 </when> | 479 </when> |
| 462 </conditional> | 480 </conditional> |
| 463 <param name="run_test_command" type="hidden"/> | 481 <param name="run_test_command" type="hidden" /> |
| 464 </inputs> | 482 </inputs> |
| 465 <outputs> | 483 <outputs> |
| 466 <data name="out_file" format="data_manager_json" /> | 484 <data name="out_file" format="data_manager_json" /> |
| 467 </outputs> | 485 </outputs> |
| 468 <tests> | 486 <tests> |
| 469 <!-- standard_local_build --> | 487 <!-- standard_local_build - test 1 --> |
| 470 | 488 |
| 471 <test expect_num_outputs="1"> | 489 <test expect_num_outputs="1"> |
| 472 <conditional name="database_type"> | 490 <conditional name="database_type"> |
| 473 <param name="database_type" value="standard_local_build" /> | 491 <param name="database_type" value="standard_local_build" /> |
| 474 <param name="kmer_len" value="35" /> | 492 <param name="kmer_len" value="35" /> |
| 485 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> | 503 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> |
| 486 </assert_contents> | 504 </assert_contents> |
| 487 </output> | 505 </output> |
| 488 </test> | 506 </test> |
| 489 | 507 |
| 490 <!-- standard_prebuilt --> | 508 <!-- standard_prebuilt - test 2 --> |
| 491 <test> | 509 <test> |
| 492 <conditional name="database_type"> | 510 <conditional name="database_type"> |
| 493 <param name="database_type" value="standard_prebuilt" /> | 511 <param name="database_type" value="standard_prebuilt" /> |
| 494 <conditional name="prebuilt"> | 512 <conditional name="prebuilt"> |
| 495 <param name="prebuilt_date" value="2022-06-07"/> | 513 <param name="prebuilt_date" value="2022-06-07"/> |
| 506 <has_text text="Version"/> | 524 <has_text text="Version"/> |
| 507 <has_text text="Downloaded"/> | 525 <has_text text="Downloaded"/> |
| 508 </assert_contents> | 526 </assert_contents> |
| 509 </output> | 527 </output> |
| 510 </test> | 528 </test> |
| 511 <!-- test with latest version --> | 529 <!-- test with latest version - test 3 --> |
| 512 <test> | 530 <test> |
| 513 <conditional name="database_type"> | 531 <conditional name="database_type"> |
| 514 <param name="database_type" value="standard_prebuilt" /> | 532 <param name="database_type" value="standard_prebuilt" /> |
| 515 <conditional name="prebuilt"> | 533 <conditional name="prebuilt"> |
| 516 <param name="prebuilt_date" value="2024-01-12"/> | 534 <param name="prebuilt_date" value="2024-01-12"/> |
| 527 <has_text text="Version"/> | 545 <has_text text="Version"/> |
| 528 <has_text text="Downloaded"/> | 546 <has_text text="Downloaded"/> |
| 529 </assert_contents> | 547 </assert_contents> |
| 530 </output> | 548 </output> |
| 531 </test> | 549 </test> |
| 532 <!-- test with first 2024 version --> | 550 <!-- test with first 2024 version - test 4 --> |
| 533 <test> | 551 <test> |
| 534 <conditional name="database_type"> | 552 <conditional name="database_type"> |
| 535 <param name="database_type" value="standard_prebuilt" /> | 553 <param name="database_type" value="standard_prebuilt" /> |
| 536 <conditional name="prebuilt"> | 554 <conditional name="prebuilt"> |
| 537 <param name="prebuilt_date" value="2024-06-05"/> | 555 <param name="prebuilt_date" value="2024-06-05"/> |
| 549 <has_text text="Downloaded"/> | 567 <has_text text="Downloaded"/> |
| 550 </assert_contents> | 568 </assert_contents> |
| 551 </output> | 569 </output> |
| 552 </test> | 570 </test> |
| 553 | 571 |
| 554 <!-- minikraken --> | 572 <!-- minikraken - test 5 --> |
| 555 | 573 |
| 556 <test> | 574 <test> |
| 557 <conditional name="database_type"> | 575 <conditional name="database_type"> |
| 558 <param name="database_type" value="minikraken" /> | 576 <param name="database_type" value="minikraken" /> |
| 559 <param name="minikraken2_version" value="v1"/> | 577 <param name="minikraken2_version" value="v1"/> |
| 568 <has_text text="Created"/> | 586 <has_text text="Created"/> |
| 569 </assert_contents> | 587 </assert_contents> |
| 570 </output> | 588 </output> |
| 571 </test> | 589 </test> |
| 572 | 590 |
| 573 <!-- special_prebuilt --> | 591 <!-- special_prebuilt - test 6 --> |
| 574 | 592 |
| 575 <test> | 593 <test> |
| 576 <conditional name="database_type"> | 594 <conditional name="database_type"> |
| 577 <param name="database_type" value="special_prebuilt" /> | 595 <param name="database_type" value="special_prebuilt" /> |
| 578 <conditional name="prebuilt"> | 596 <conditional name="prebuilt"> |
| 592 <has_text text="Downloaded"/> | 610 <has_text text="Downloaded"/> |
| 593 </assert_contents> | 611 </assert_contents> |
| 594 </output> | 612 </output> |
| 595 </test> | 613 </test> |
| 596 | 614 |
| 597 <!-- amplicon_prebuilt --> | 615 <!-- amplicon_prebuilt - test 7 --> |
| 598 | 616 |
| 599 <test> | 617 <test> |
| 600 <conditional name="database_type"> | 618 <conditional name="database_type"> |
| 601 <param name="database_type" value="amplicon_prebuilt" /> | 619 <param name="database_type" value="amplicon_prebuilt" /> |
| 602 <conditional name="prebuilt"> | 620 <conditional name="prebuilt"> |
| 613 <has_text text="16S_Greengenes13.5"/> | 631 <has_text text="16S_Greengenes13.5"/> |
| 614 </assert_contents> | 632 </assert_contents> |
| 615 </output> | 633 </output> |
| 616 </test> | 634 </test> |
| 617 | 635 |
| 618 <!-- special --> | 636 <!-- special - test 8 --> |
| 619 | 637 |
| 620 <test expect_num_outputs="1"> | 638 <test expect_num_outputs="1"> |
| 621 <conditional name="database_type"> | 639 <conditional name="database_type"> |
| 622 <param name="database_type" value="special" /> | 640 <param name="database_type" value="special" /> |
| 623 <param name="special_database_type" value="greengenes" /> | 641 <param name="special_database_type" value="greengenes" /> |
| 634 <has_text text="Greengenes"/> | 652 <has_text text="Greengenes"/> |
| 635 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> | 653 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> |
| 636 </assert_contents> | 654 </assert_contents> |
| 637 </output> | 655 </output> |
| 638 </test> | 656 </test> |
| 657 | |
| 658 <!-- special - test 9 --> | |
| 639 <test expect_num_outputs="1"> | 659 <test expect_num_outputs="1"> |
| 640 <conditional name="database_type"> | 660 <conditional name="database_type"> |
| 641 <param name="database_type" value="special" /> | 661 <param name="database_type" value="special" /> |
| 642 <param name="special_database_type" value="silva" /> | 662 <param name="special_database_type" value="silva" /> |
| 643 <param name="kmer_len" value="35" /> | 663 <param name="kmer_len" value="35" /> |
| 654 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> | 674 <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/> |
| 655 </assert_contents> | 675 </assert_contents> |
| 656 </output> | 676 </output> |
| 657 </test> | 677 </test> |
| 658 | 678 |
| 659 <!-- custom --> | 679 <!-- custom - test 10 --> |
| 660 | 680 |
| 661 <test expect_num_outputs="1"> | 681 <test expect_num_outputs="1"> |
| 662 <conditional name="database_type"> | 682 <conditional name="database_type"> |
| 663 <param name="database_type" value="custom" /> | 683 <param name="database_type" value="custom" /> |
| 664 <param name="custom_fasta" value="adapter.fa" /> | 684 <param name="custom_fasta" value="adapter.fa" /> |
