Mercurial > repos > dfornika > data_manager_build_kraken2_database
changeset 64:e5eba596cc1b draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit a19fdd5d56ca4583441911d36554648130d84033-dirty"
author | dfornika |
---|---|
date | Wed, 03 Nov 2021 23:58:21 +0000 |
parents | 80e778af6c6c |
children | 1381471bb6e5 |
files | data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml |
diffstat | 2 files changed, 80 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py Wed Nov 03 22:34:28 2021 +0000 +++ b/data_manager/kraken2_build_database.py Wed Nov 03 23:58:21 2021 +0000 @@ -24,7 +24,8 @@ class KrakenDatabaseTypes(Enum): - standard = 'standard' + standard_local_build = 'standard_local_build' + standard_prebuilt = 'standard_prebuilt' minikraken = 'minikraken' special = 'special' custom = 'custom' @@ -50,6 +51,15 @@ return self.value +class StandardPrebuiltSizes(Enum): + full = 'full' + gb_16 = '16' + gb_8 = '8' + + def __str__(self): + return self.value + + def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -63,7 +73,7 @@ ]) database_name = " ".join([ - "Standard", + "Standard (Local Build)", "(Created:", now + ",", "kmer-len=" + str(kraken2_args["kmer_len"]) + ",", @@ -109,6 +119,60 @@ return data_table_entry +def kraken2_build_standard_prebuilt(standard_prebuilt_size, target_directory, data_table_name=DATA_TABLE_NAME): + + now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + + database_value = "_".join([ + now, + "standard_prebuilt", + standard_prebuilt_size + ]) + + database_name = " ".join([ + "Standard (Prebuilt)", + standard_prebuilt_size, + "(Downloaded:", + now + ")" + ]) + + database_path = database_value + + size_to_url_str = { + 'full': '', + '16': '_16gb', + '8': '_8gb', + } + date_url_str = '20210517' + standard_prebuilt_size_url = size_to_url_str[standard_prebuilt_size] + # download the pre-built database + src = urlopen( + 's3://genome-idx/kraken/k2_standard%s_%s.tar.gz' + % standard_prebuilt_size_url, date_url_str + ) + with open('tmp_data.tar.gz', 'wb') as dst: + shutil.copyfileobj(src, dst) + # unpack the downloaded archive to the target directory + with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: + for member in fh.getmembers(): + if member.isreg(): + member.name = os.path.basename(member.name) + fh.extract(member, os.path.join(target_directory, database_path)) + + data_table_entry = { + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } + } + + return data_table_entry + def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): @@ -293,6 +357,7 @@ parser.add_argument('--threads', dest='threads', default=1, help='threads') parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') + parser.add_argument('--standard-prebuilt-size', dest='standard_prebuilt_size', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Size of standard prebuilt database to download (only applies to --database-type standard_prebuilt)') parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
--- a/data_manager/kraken2_build_database.xml Wed Nov 03 22:34:28 2021 +0000 +++ b/data_manager/kraken2_build_database.xml Wed Nov 03 23:58:21 2021 +0000 @@ -23,7 +23,9 @@ --database-type ${database_type.database_type} #if $database_type.database_type == "minikraken" --minikraken2-version ${database_type.minikraken2_version} - #else if $database_type.database_type == "standard" + #else if $database_type.database_type == "standard_prebuilt" + --standard-prebuilt-size ${database_type.standard_prebuilt_size} + #else if $database_type.database_type == "standard_local_build" --threads \${GALAXY_SLOTS:-1} --kmer-len ${database_type.kmer_len} --minimizer-len ${database_type.minimizer_len} @@ -54,14 +56,22 @@ <inputs> <conditional name="database_type"> <param name="database_type" type="select" multiple="false" label="Database Type"> - <option value="standard">Standard</option> + <option value="standard_local_build">Standard, Local Build</option> + <option value="standard_prebuilt">Standard, Pre-Built</option> <option value="minikraken">MiniKraken</option> <option value="special">Special</option> <option value="custom">Custom</option> </param> - <when value="standard"> + <when value="standard_local_build"> <expand macro="common_params" /> </when> + <when value="standard_prebuilt"> + <param name="standard_prebuilt_size" type="select" multiple="false" label="Select size of prebuilt database to download"> + <option value="full">Full (~50 GB)</option> + <option value="16">Standard-16 (~16 GB)</option> + <option value="8">Standard-8 (~8 GB)</option> + </param> + </when> <when value="minikraken"> <param name="minikraken2_version" type="select" multiple="false" label="Select MiniKraken2 database version to download"> <option value="v2">Version 2</option>