Mercurial > repos > dfornika > data_manager_build_bracken_database
changeset 10:92f9975f08e2 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
line wrap: on
line diff
--- a/data_manager/bracken_build_database.py Tue Oct 15 17:10:29 2019 -0400 +++ b/data_manager/bracken_build_database.py Thu Nov 04 21:33:16 2021 +0000 @@ -3,14 +3,10 @@ from __future__ import print_function import argparse -import datetime import errno import json import os -import shutil -import string import subprocess -import sys import uuid @@ -19,14 +15,11 @@ def bracken_build_database(target_directory, bracken_build_args, database_name, data_table_name=DATA_TABLE_NAME): - now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") - database_value = str(uuid.uuid4()) database_name = database_name - database_path = os.path.join(bracken_build_args['kraken_database'], 'database' + str(bracken_build_args['read_len']) + 'mers.kmer_distrib') - + database_path = os.path.join(bracken_build_args['kraken_database'], 'database' + str(bracken_build_args['read_len']) + 'mers.kmer_distrib') bracken_build_args_list = [ '-t', bracken_build_args['threads'], @@ -37,7 +30,6 @@ subprocess.check_call(['bracken-build'] + bracken_build_args_list) - data_table_entry = { "data_tables": { data_table_name: [ @@ -56,14 +48,15 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('data_manager_json') - parser.add_argument('--threads', dest='threads', default=1, help='threads' ) - parser.add_argument('--kmer-len', dest='kmer_len', help='K-mer length' ) - parser.add_argument('--read-len', dest='read_len', help='Read length' ) - parser.add_argument('--kraken-db', dest='kraken_database', help='Kraken Database' ) + parser.add_argument('--threads', dest='threads', default=1, help='threads') + parser.add_argument('--kmer-len', dest='kmer_len', help='K-mer length') + parser.add_argument('--read-len', dest='read_len', help='Read length') + parser.add_argument('--kraken-db', dest='kraken_database', help='Kraken Database') parser.add_argument('--database-name', dest='database_name', help='Database Name') args = parser.parse_args() - data_manager_input = json.loads(open(args.data_manager_json).read()) + with open(args.data_manager_json) as fh: + data_manager_input = json.load(fh) target_directory = data_manager_input['output_data'][0]['extra_files_path'] @@ -75,9 +68,9 @@ } try: - os.mkdir( target_directory ) + os.mkdir(target_directory) except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): + if exc.errno == errno.EEXIST and os.path.isdir(target_directory): pass else: raise @@ -90,7 +83,8 @@ args.database_name, ) - open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output)) + with open(args.data_manager_json, 'w') as fh: + json.dump(data_manager_output, fh, sort_keys=True) if __name__ == "__main__":
--- a/data_manager/bracken_build_database.xml Tue Oct 15 17:10:29 2019 -0400 +++ b/data_manager/bracken_build_database.xml Thu Nov 04 21:33:16 2021 +0000 @@ -1,16 +1,20 @@ <?xml version="1.0"?> -<tool id="bracken_build_database" name="Bracken Database Builder" tool_type="manage_data" version="2.5+galaxy0"> +<tool id="bracken_build_database" name="Bracken Database Builder" tool_type="manage_data" version="2.5+galaxy1" profile="19.01"> <description>bracken database builder</description> <requirements> <requirement type="package" version="2.5">bracken</requirement> - <requirement type="package" version="2.0.8_beta">kraken2</requirement> + <requirement type="package" version="2.1.1">kraken2</requirement> </requirements> - <version_command>bracken --version</version_command> - <command detect_errors="exit_code"> + <command> <![CDATA[ + #import os + #set db_dir = os.path.basename($kraken_db.fields.path) + + mkdir '$db_dir' && + ln -s '${kraken_db.fields.path}'/* '$db_dir/' && python '$__tool_directory__/bracken_build_database.py' '${out_file}' - --kraken-db ${kraken_db.fields.path} + --kraken-db '$db_dir' --threads \${GALAXY_SLOTS:-1} --kmer-len ${kmer_len} --read-len ${read_len} @@ -23,13 +27,29 @@ <validator type="no_options" message="No Kraken2 databases are available" /> </options> </param> - <param name="kmer_len" type="integer" min="8" max="256" value="35" label="K-mer length" /> - <param name="read_len" type="integer" min="8" max="1000" value="100" label="Read length" /> - <param name="database_name" type="text" label="Database Name" /> + <conditional name="prebuilt"> + <param name="prebuilt" type="boolean" checked="false" truevalue="--prebuilt" falsevalue="" label="Use Pre-built DB" /> + <when value=""> + <param name="kmer_len" type="integer" min="8" max="256" value="35" label="K-mer length" /> + <param name="read_len" type="integer" min="8" max="1000" value="100" label="Read length" /> + </when> + </conditional> + <param name="database_name" type="text" label="Database Name" /> </inputs> <outputs> <data name="out_file" format="data_manager_json" /> </outputs> + <tests> + <test> + <param name="kraken_db" value="test_entry" /> + <param name="database_name" value="database" /> + <output name="out_file"> + <assert_contents> + <has_text text="test_db/database100mers.kmer_distrib" /> + </assert_contents> + </output> + </test> + </tests> <help> </help> <citations>
--- a/data_manager_conf.xml Tue Oct 15 17:10:29 2019 -0400 +++ b/data_manager_conf.xml Thu Nov 04 21:33:16 2021 +0000 @@ -1,5 +1,5 @@ <data_managers> - <data_manager tool_file="data_manager/bracken_build_database.xml" id="bracken_build_database" version="2.5+galaxy0"> + <data_manager tool_file="data_manager/bracken_build_database.xml" id="bracken_build_database" version="2.6+galaxy0"> <data_table name="bracken_databases"> <output> <column name="value"/>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kraken2_databases.loc Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,6 @@ +# Tab separated with three columns: +# - value (Galaxy records this in the Galaxy DB) +# - name (Galaxy shows this in the UI) +# - path (folder name containing the Kraken DB) +# +test_entry "Test Database" ${__HERE__}/test_db
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/nodes_patterns.txt Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,15 @@ +^220341\s +^90370\s +^59201\s +^28901\s +^590\s +^543\s +^91347\s +^1236\s +^1224\s +^2\s +^131567\s +^1\s +^585057\s +^562\s +^561\s
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reproduce_test_dataset.sh Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,18 @@ +#!/bin/bash + +# This script produces a small kraken2 database containing only a ~1kb portion each of a salmonella and ecoli genome +# It requires kraken2, and entrez-direct (available on bioconda) +kraken2-build --db test_db --download_taxonomy +mv test_db/taxonomy/nucl_gb.accession2taxid test_db/taxonomy/nucl_gb.accession2taxid_full +grep -e 'NC_003198.1' -e 'NC_011750.1' test_db/taxonomy/nucl_gb.accession2taxid_full > test_db/taxonomy/nucl_gb.accession2taxid +mv test_db/taxonomy/nodes.dmp test_db/taxonomy/nodes.dmp_full +grep -f node_patterns.txt test_db/taxonomy/nodes.dmp_full > test_db/taxonomy/nodes.dmp +mv test_db/taxonomy/names.dmp test_db/taxonomy/names.dmp_full +grep -e '^220341\s' -e '^585057\s' test_db/taxonomy/names.dmp_full > test_db/taxonomy/names.dmp +esearch -db nucleotide -query "NC_003198.1" | efetch -format fasta > NC_003198.1.fasta +esearch -db nucleotide -query "NC_011750.1" | efetch -format fasta > NC_011750.1.fasta +head -n 14 NC_003198.1.fasta > NC_003198.1_1kb.fasta +head -n 14 NC_011750.1.fasta > NC_011750.1_1kb.fasta +kraken2-build --db test_db --add-to-library NC_003198.1_1kb.fasta +kraken2-build --db test_db --add-to-library NC_011750.1_1kb.fasta +kraken2-build --db test_db --build
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/library/added/9C7DdW7GAD.fna Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,17 @@ +>NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome +AGAGATTACGTCTGGTTGCAAGAGATCATAACAGGGGAAATTGATTGAAAATAAATATAT +CGCCAGCAGCACATGAACAAGTTTCGGAATGTGATCAATTTAAAAATTTATTGACTTAGG +CGGGCAGATACTTTAACCAATATAGGAATACAAGACAGACAAATAAAAATGACAGAGTAC +ACAACATCCATGAACCGCATCAGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxAGGT +AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGAACAGTGCGG +GCxxxxxxxxCGACCAGAGATCACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT +ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATTCC +AGGCAAGGGCAGGTAGCGACCGTACTTTCCGCCCCCGCGAAAATTACCAACCATCTGGTG +GCGATGATTGAAAAAACTATCGGCGGCCAGGATGCTTTGCCGAATATCAGCGATGCCGAA +CGTATTTTTTCTGACCTGCTCGCAGGACTTGCCAGCGCGCAGCCGGGATTCCCGCTTGCA +CGGTTGAAAATGGTTGTCGAACAAGAATTCGCTCAGATCAAACATGTTTTGCATGGTATC +AGCCTGCTGGGTCAGTGCCCGGATAGCATCAACGCCGCGCTGATTTGCCGTGGCGAAAAA +ATGTCGATCGCGATTATGGCGGGACTCCTGGAGGCGCGTGGACATCGCGTCACGGTGATC +GATCCGGTAGAAAAACTGCTGGCGGTGGGCCATTACCTTGAATCTACCGTCGATATCGCG +GAATCGACTCGCCGTATCGCCGCCAGCCAGATCCCGGCCGATCACATGATCCTGATGGCG +GGCTTTACTG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/library/added/cWk1IBlK73.fna Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,17 @@ +>NC_011750.1 Escherichia coli IAI39 chromosome, complete genome +GCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTxxxxxxxGAGTGTCT +GATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGT +CACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACA +CAACATCCATGAAACGCATTAGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxAGGTA +ACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGC +xxxxxxxxCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTAC +ATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAG +GCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGC +GATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACG +TATTTTTGCCGAACTTCTGACGGGACTCGCCGCTGCCCAACCGGGATTCCCGCTGGCGCA +ACTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAG +TTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAAT +GTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACCGTTATCGA +TCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGA +GTCCACCCGCCGTATTGCGGCAAGTCGTATTCCGGCTGATCACATGGTGCTGATGGCAGG +TTTCACCGCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/library/added/prelim_map.txt Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,2 @@ +ACCNUM NC_011750.1 NC_011750 +ACCNUM NC_003198.1 NC_003198
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/library/added/prelim_map_QXr8C5PiOX.txt Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,1 @@ +ACCNUM NC_003198.1 NC_003198
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/library/added/prelim_map_l8ftMYsZv0.txt Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,1 @@ +ACCNUM NC_011750.1 NC_011750
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/seqid2taxid.map Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,1 @@ +NC_011750.1 585057
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/names.dmp Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,5 @@ +220341 | Salmonella enterica subsp. enterica serovar Typhi CT18 | | equivalent name | +220341 | Salmonella enterica subsp. enterica serovar Typhi str. CT18 | | scientific name | +220341 | Salmonella enterica subsp. enterica serovar Typhi strain CT18 | | equivalent name | +220341 | Salmonella typhi CT18 | | equivalent name | +585057 | Escherichia coli IAI39 | | scientific name |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/nodes.dmp Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,15 @@ +1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | +2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | +543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +590 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +28901 | 590 | species | SE | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +59201 | 28901 | subspecies | SE | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +90370 | 59201 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | +220341 | 90370 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +585057 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/nucl_gb.accession2taxid Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,2 @@ +NC_003198 NC_003198.1 220341 16758993 +NC_011750 NC_011750.1 585057 218698419
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/prelim_map.txt Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,2 @@ +ACCNUM NC_011750.1 NC_011750 +ACCNUM NC_003198.1 NC_003198
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/unmapped.txt Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,1 @@ +NC_003198
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Nov 04 21:33:16 2021 +0000 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of Kraken database in the required format --> + <table name="kraken2_databases" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/kraken2_databases.loc" /> + </table> + <!-- Locations of bracken databases in the required format --> + <table name="bracken_databases" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/bracken_databases.loc" /> + </table> +</tables>