Previous changeset 33:eb123ede1812 (2019-04-30) Next changeset 35:40b2c0a4abd5 (2019-05-01) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 5629d9d7d9836d32b1e501277e213ef213de3e76-dirty |
modified:
data_manager/kraken2_build_database.py |
b |
diff -r eb123ede1812 -r 8d8ebf8367c8 data_manager/kraken2_build_database.py --- a/data_manager/kraken2_build_database.py Tue Apr 30 15:04:10 2019 -0400 +++ b/data_manager/kraken2_build_database.py Wed May 01 16:46:36 2019 -0400 |
[ |
@@ -9,9 +9,25 @@ import os import subprocess +from enum import Enum DATA_TABLE_NAME = "kraken2_databases" +class KrakenDatabaseTypes(Enum): + standard = 'standard' + minikraken = 'minikraken' + special = 'special' + custom = 'custom' + + def __str__(self): + return self.value + +class Minikraken2Versions(Enum): + v1 = 'v1' + v2 = 'v2' + + def __str__(self): + return self.value def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -63,6 +79,43 @@ _add_data_table_entry(data_manager_dict, data_table_entry) +def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): + + now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + + database_value = "_".join([ + now, + "minikraken2", + minikraken2_version, + "8GB", + ]) + + database_name = " ".join([ + "Minikraken2", + minikraken2_version, + "(Created:", + now + ")" + ]) + + # download the minikraken2 data + src = urlopen( + 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' + % minikraken2_version + ) + with open('tmp_data.tar.gz', 'wb') as dst: + shutil.copyfileobj(src, dst) + # unpack the downloaded archive to the target directory + with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: + fh.extractall(target_directory) + + data_table_entry = { + "value": database_value, + "name": database_name, + "path": database_value, + } + + _add_data_table_entry(data_manager_dict, data_table_entry) + def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) @@ -73,18 +126,14 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('data_manager_json') - parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' ) - parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' ) - parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' ) - parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) + parser.add_argument('--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length') + parser.add_argument('--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length') + parser.add_argument('--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces') + parser.add_argument('--threads', dest='threads', default=1, help='threads') + parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') + parser.add_argument( '--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version' ) args = parser.parse_args() - kraken2_args = { - "kmer_len": args.kmer_len, - "minimizer_len": args.minimizer_len, - "minimizer_spaces": args.minimizer_spaces, - "threads": args.threads, - } data_manager_input = json.loads(open(args.data_manager_json).read()) @@ -100,11 +149,26 @@ data_manager_output = {} - kraken2_build_standard( - data_manager_output, - kraken2_args, - target_directory, - ) + if args.database_type == 'standard': + kraken2_args = { + "kmer_len": args.kmer_len, + "minimizer_len": args.minimizer_len, + "minimizer_spaces": args.minimizer_spaces, + "threads": args.threads, + } + kraken2_build_standard( + data_manager_output, + kraken2_args, + target_directory, + ) + elif args.database_type == 'minikraken': + kraken2_build_minikraken( + data_manager_output, + args.minikraken2_version, + target_directory + ) + else: + sys.exit("Invalid database type") open(args.data_manager_json, 'w').write(json.dumps(data_manager_output)) |