# HG changeset patch # User iuc # Date 1624613796 0 # Node ID 2004bb84568580b8b440b4d6bd2388592ff8ab1e # Parent 2ffd2cdc508934df056a60852db3e7aa0c87e6fa "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit f845716f6ac93500f143a30abef97eaba406344e" diff -r 2ffd2cdc5089 -r 2004bb845685 data_manager/data_manager_fetch_mothur_reference_data.xml --- a/data_manager/data_manager_fetch_mothur_reference_data.xml Thu Sep 17 09:37:20 2020 +0000 +++ b/data_manager/data_manager_fetch_mothur_reference_data.xml Fri Jun 25 09:36:36 2021 +0000 @@ -1,8 +1,8 @@ - + Fetch and install reference data for Mothur - python + python GS FLX Titanium lookup files + + @@ -93,6 +95,18 @@ + + + + + + + + + + + + diff -r 2ffd2cdc5089 -r 2004bb845685 data_manager/fetch_mothur_reference_data.py --- a/data_manager/fetch_mothur_reference_data.py Thu Sep 17 09:37:20 2020 +0000 +++ b/data_manager/fetch_mothur_reference_data.py Fri Jun 25 09:36:36 2021 +0000 @@ -1,6 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Data manager for reference data for the 'mothur_toolsuite' Galaxy tools +import io import json import optparse import os @@ -8,7 +9,9 @@ import sys import tarfile import tempfile -import urllib2 +import urllib.error +import urllib.parse +import urllib.request import zipfile from functools import reduce @@ -38,6 +41,14 @@ }, # RDP reference files # http://www.mothur.org/wiki/RDP_reference_files + "RDP_v18": { + "16S rRNA RDP training set 18": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.rdp.tgz", ], + "16S rRNA PDS training set 18": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.pds.tgz", ], + }, "RDP_v16": { "16S rRNA RDP training set 16": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.rdp.tgz", ], @@ -76,6 +87,12 @@ }, # Silva reference files # http://www.mothur.org/wiki/Silva_reference_files + "silva_release_138.1": { + "SILVA release 138.1": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v138_1.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v138_1.tgz", ], + }, "silva_release_128": { "SILVA release 128": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v128.tgz", @@ -160,7 +177,8 @@ to create it if necessary. """ - params = json.loads(open(jsonfile).read()) + with open(jsonfile) as fh: + params = json.load(fh) return (params['param_dict'], params['output_data'][0]['extra_files_path']) @@ -172,7 +190,7 @@ # >>> add_data_table(d,'my_data') # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) -# >>> print str(json.dumps(d)) +# >>> print(json.dumps(d)) def create_data_tables_dict(): """Return a dictionary for storing data table information @@ -229,13 +247,19 @@ Returns the name that the file is saved with. """ - print("Downloading %s" % url) + print(f"Downloading {url}") if not target: target = os.path.basename(url) if wd: target = os.path.join(wd, target) - print("Saving to %s" % target) - open(target, 'wb').write(urllib2.urlopen(url).read()) + print(f"Saving to {target}") + with open(target, 'wb') as fh: + url_h = urllib.request.urlopen(url) + while True: + buffer = url_h.read(io.DEFAULT_BUFFER_SIZE) + if buffer == b"": + break + fh.write(buffer) return target @@ -255,35 +279,36 @@ """ if not zipfile.is_zipfile(filen): - print("%s: not ZIP formatted file") + print(f"{filen}: not ZIP formatted file") return [filen] file_list = [] - z = zipfile.ZipFile(filen) - for name in z.namelist(): - if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) - continue - if wd: - target = os.path.join(wd, name) - else: - target = name - if name.endswith('/'): - # Make directory - print("Creating dir %s" % target) - try: - os.makedirs(target) - except OSError: - pass - else: - # Extract file - print("Extracting %s" % name) - try: - os.makedirs(os.path.dirname(target)) - except OSError: - pass - open(target, 'wb').write(z.read(name)) - file_list.append(target) - print("Removing %s" % filen) + with zipfile.ZipFile(filen) as z: + for name in z.namelist(): + if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): + print(f"Ignoring {name}") + continue + if wd: + target = os.path.join(wd, name) + else: + target = name + if name.endswith('/'): + # Make directory + print(f"Creating dir {target}") + try: + os.makedirs(target) + except OSError: + pass + else: + # Extract file + print("Extracting {target}") + try: + os.makedirs(os.path.dirname(target)) + except OSError: + pass + with open(target, 'wb') as fh: + fh.write(z.read(name)) + file_list.append(target) + print(f"Removing {filen}") os.remove(filen) return file_list @@ -306,23 +331,23 @@ """ file_list = [] if not tarfile.is_tarfile(filen): - print("%s: not TAR file") + print(f"{filen}: not TAR file") return [filen] - t = tarfile.open(filen) - for name in t.getnames(): - # Check for unwanted files - if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) - continue - # Extract file - print("Extracting %s" % name) - t.extract(name, wd) - if wd: - target = os.path.join(wd, name) - else: - target = name - file_list.append(target) - print("Removing %s" % filen) + with tarfile.open(filen) as t: + for name in t.getnames(): + # Check for unwanted files + if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): + print(f"Ignoring {name}") + continue + # Extract file + print(f"Extracting {name}") + t.extract(name, wd) + if wd: + target = os.path.join(wd, name) + else: + target = name + file_list.append(target) + print(f"Removing {filen}") os.remove(filen) return file_list @@ -340,9 +365,9 @@ current working directory. """ - print("Unpack %s" % filen) + print(f"Unpack {filen}") ext = os.path.splitext(filen)[1] - print("Extension: %s" % ext) + print(f"Extension: {ext}") if ext == ".zip": return unpack_zip_archive(filen, wd=wd) elif ext == ".tgz": @@ -383,7 +408,7 @@ try: return MOTHUR_FILE_TYPES[ext] except KeyError: - print("WARNING: unknown file type for " + filen + ", skipping") + print(f"WARNING: unknown file type for {filen}, skipping") return None @@ -416,26 +441,27 @@ """ # Make working dir wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd()) - print("Working dir %s" % wd) + print(f"Working dir {wd}") # Iterate over all requested reference data URLs for dataset in datasets: - print("Handling dataset '%s'" % dataset) + print(f"Handling dataset '{dataset}'") for name in MOTHUR_REFERENCE_DATA[dataset]: for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd): type_ = identify_type(f) - entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name) - print("%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f))) + name_from_file = os.path.splitext(os.path.basename(f))[0] + entry_name = f"{name_from_file} ({name})" + print(f"{type_}\t\'{entry_name}'\t.../{os.path.basename(f)}") if type_ is not None: # Move to target dir ref_data_file = os.path.basename(f) f1 = os.path.join(target_dir, ref_data_file) - print("Moving %s to %s" % (f, f1)) - os.rename(f, f1) + print(f"Moving {f} to {f1}") + shutil.move(f, f1) # Add entry to data table - table_name = "mothur_%s" % type_ + table_name = f"mothur_{type_}" add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) # Remove working dir - print("Removing %s" % wd) + print(f"Removing {wd}") shutil.rmtree(wd) @@ -451,7 +477,7 @@ files = [] for path in paths: path = os.path.abspath(path) - print("Examining '%s'..." % path) + print(f"Examining '{path}'...") if os.path.isfile(path): # Store full path for file files.append(path) @@ -490,21 +516,21 @@ for f in files: type_ = identify_type(f) if type_ is None: - print("%s: unrecognised type, skipped" % f) + print(f"{f}: unrecognised type, skipped") continue ref_data_file = os.path.basename(f) target_file = os.path.join(target_dir, ref_data_file) entry_name = "%s" % os.path.splitext(ref_data_file)[0] if description: entry_name += " (%s)" % description - print("%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file)) + print(f"{type_}\t\'{entry_name}'\t.../{ref_data_file}") # Link to or copy the data if link_to_data: os.symlink(f, target_file) else: shutil.copyfile(f, target_file) # Add entry to data table - table_name = "mothur_%s" % type_ + table_name = f"mothur_{type_}" add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) @@ -519,8 +545,8 @@ parser.add_option('--description', action='store', dest='description', default='') parser.add_option('--link', action='store_true', dest='link_to_data') options, args = parser.parse_args() - print("options: %s" % options) - print("args : %s" % args) + print(f"options: {options}") + print(f"args : {args}") # Check for JSON file if len(args) != 1: @@ -533,7 +559,7 @@ params, target_dir = read_input_json(jsonfile) # Make the target directory - print("Making %s" % target_dir) + print(f"Making {target_dir}") os.mkdir(target_dir) # Set up data tables dictionary @@ -556,6 +582,6 @@ import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data) # Write output JSON print("Outputting JSON") - print(json.dumps(data_tables)) - open(jsonfile, 'w').write(json.dumps(data_tables, sort_keys=True)) + with open(jsonfile, 'w') as fh: + json.dump(data_tables, fh, sort_keys=True) print("Done.")