Mercurial > repos > iuc > data_manager_mothur_toolsuite
changeset 3:2004bb845685 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit f845716f6ac93500f143a30abef97eaba406344e"
author | iuc |
---|---|
date | Fri, 25 Jun 2021 09:36:36 +0000 |
parents | 2ffd2cdc5089 |
children | |
files | data_manager/data_manager_fetch_mothur_reference_data.xml data_manager/fetch_mothur_reference_data.py |
diffstat | 2 files changed, 112 insertions(+), 72 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_fetch_mothur_reference_data.xml Thu Sep 17 09:37:20 2020 +0000 +++ b/data_manager/data_manager_fetch_mothur_reference_data.xml Fri Jun 25 09:36:36 2021 +0000 @@ -1,8 +1,8 @@ <?xml version="1.0"?> -<tool id="data_manager_fetch_mothur_reference_data" name="Fetch Mothur toolsuite reference data" version="0.1.5" tool_type="manage_data" profile="19.05"> +<tool id="data_manager_fetch_mothur_reference_data" name="Fetch Mothur toolsuite reference data" version="0.2.1" tool_type="manage_data" profile="19.05"> <description>Fetch and install reference data for Mothur</description> <requirements> - <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="3.8">python</requirement> </requirements> <command><![CDATA[ python '$__tool_directory__/fetch_mothur_reference_data.py' @@ -31,12 +31,14 @@ <option value="lookup_titanium">GS FLX Titanium lookup files</option> <option value="lookup_gsflx">GSFLX lookup files</option> <option value="lookup_gs20">GS20 lookup files</option> + <option value="RDP_v18">RDP reference files (training set version 18)</option> <option value="RDP_v16">RDP reference files (training set version 16)</option> <option value="RDP_v14">RDP reference files (training set version 14)</option> <option value="RDP_v10">RDP reference files (training set version 10)</option> <option value="RDP_v9">RDP reference files (training set version 9)</option> <option value="RDP_v7">RDP reference files (training set version 7)</option> <option value="RDP_v6">RDP reference files (training set version 6)</option> + <option value="silva_release_138.1">SILVA reference files (release 138.1)</option> <option value="silva_release_128">SILVA reference files (release 128)</option> <option value="silva_release_123">SILVA reference files (release 123)</option> <option value="silva_release_119">SILVA reference files (release 119)</option> @@ -93,6 +95,18 @@ </output> </test> <test> + <param name="data_source|ref_data" value="RDP_v18"/> + <output name="out_file"> + <assert_contents> + <has_text text="16S rRNA RDP training set 18" /> + <has_text text="trainset18_062020.rdp.fasta" /> + <has_text text="trainset18_062020.rdp.tax" /> + <has_text text="trainset18_062020.pds.fasta" /> + <has_text text="trainset18_062020.pds.tax" /> + </assert_contents> + </output> + </test> + <test> <param name="data_source|ref_data" value="RDP_v16"/> <output name="out_file"> <assert_contents>
--- a/data_manager/fetch_mothur_reference_data.py Thu Sep 17 09:37:20 2020 +0000 +++ b/data_manager/fetch_mothur_reference_data.py Fri Jun 25 09:36:36 2021 +0000 @@ -1,6 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Data manager for reference data for the 'mothur_toolsuite' Galaxy tools +import io import json import optparse import os @@ -8,7 +9,9 @@ import sys import tarfile import tempfile -import urllib2 +import urllib.error +import urllib.parse +import urllib.request import zipfile from functools import reduce @@ -38,6 +41,14 @@ }, # RDP reference files # http://www.mothur.org/wiki/RDP_reference_files + "RDP_v18": { + "16S rRNA RDP training set 18": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.rdp.tgz", ], + "16S rRNA PDS training set 18": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.pds.tgz", ], + }, "RDP_v16": { "16S rRNA RDP training set 16": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.rdp.tgz", ], @@ -76,6 +87,12 @@ }, # Silva reference files # http://www.mothur.org/wiki/Silva_reference_files + "silva_release_138.1": { + "SILVA release 138.1": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v138_1.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v138_1.tgz", ], + }, "silva_release_128": { "SILVA release 128": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v128.tgz", @@ -160,7 +177,8 @@ to create it if necessary. """ - params = json.loads(open(jsonfile).read()) + with open(jsonfile) as fh: + params = json.load(fh) return (params['param_dict'], params['output_data'][0]['extra_files_path']) @@ -172,7 +190,7 @@ # >>> add_data_table(d,'my_data') # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) -# >>> print str(json.dumps(d)) +# >>> print(json.dumps(d)) def create_data_tables_dict(): """Return a dictionary for storing data table information @@ -229,13 +247,19 @@ Returns the name that the file is saved with. """ - print("Downloading %s" % url) + print(f"Downloading {url}") if not target: target = os.path.basename(url) if wd: target = os.path.join(wd, target) - print("Saving to %s" % target) - open(target, 'wb').write(urllib2.urlopen(url).read()) + print(f"Saving to {target}") + with open(target, 'wb') as fh: + url_h = urllib.request.urlopen(url) + while True: + buffer = url_h.read(io.DEFAULT_BUFFER_SIZE) + if buffer == b"": + break + fh.write(buffer) return target @@ -255,35 +279,36 @@ """ if not zipfile.is_zipfile(filen): - print("%s: not ZIP formatted file") + print(f"{filen}: not ZIP formatted file") return [filen] file_list = [] - z = zipfile.ZipFile(filen) - for name in z.namelist(): - if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) - continue - if wd: - target = os.path.join(wd, name) - else: - target = name - if name.endswith('/'): - # Make directory - print("Creating dir %s" % target) - try: - os.makedirs(target) - except OSError: - pass - else: - # Extract file - print("Extracting %s" % name) - try: - os.makedirs(os.path.dirname(target)) - except OSError: - pass - open(target, 'wb').write(z.read(name)) - file_list.append(target) - print("Removing %s" % filen) + with zipfile.ZipFile(filen) as z: + for name in z.namelist(): + if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): + print(f"Ignoring {name}") + continue + if wd: + target = os.path.join(wd, name) + else: + target = name + if name.endswith('/'): + # Make directory + print(f"Creating dir {target}") + try: + os.makedirs(target) + except OSError: + pass + else: + # Extract file + print("Extracting {target}") + try: + os.makedirs(os.path.dirname(target)) + except OSError: + pass + with open(target, 'wb') as fh: + fh.write(z.read(name)) + file_list.append(target) + print(f"Removing {filen}") os.remove(filen) return file_list @@ -306,23 +331,23 @@ """ file_list = [] if not tarfile.is_tarfile(filen): - print("%s: not TAR file") + print(f"{filen}: not TAR file") return [filen] - t = tarfile.open(filen) - for name in t.getnames(): - # Check for unwanted files - if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) - continue - # Extract file - print("Extracting %s" % name) - t.extract(name, wd) - if wd: - target = os.path.join(wd, name) - else: - target = name - file_list.append(target) - print("Removing %s" % filen) + with tarfile.open(filen) as t: + for name in t.getnames(): + # Check for unwanted files + if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): + print(f"Ignoring {name}") + continue + # Extract file + print(f"Extracting {name}") + t.extract(name, wd) + if wd: + target = os.path.join(wd, name) + else: + target = name + file_list.append(target) + print(f"Removing {filen}") os.remove(filen) return file_list @@ -340,9 +365,9 @@ current working directory. """ - print("Unpack %s" % filen) + print(f"Unpack {filen}") ext = os.path.splitext(filen)[1] - print("Extension: %s" % ext) + print(f"Extension: {ext}") if ext == ".zip": return unpack_zip_archive(filen, wd=wd) elif ext == ".tgz": @@ -383,7 +408,7 @@ try: return MOTHUR_FILE_TYPES[ext] except KeyError: - print("WARNING: unknown file type for " + filen + ", skipping") + print(f"WARNING: unknown file type for {filen}, skipping") return None @@ -416,26 +441,27 @@ """ # Make working dir wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd()) - print("Working dir %s" % wd) + print(f"Working dir {wd}") # Iterate over all requested reference data URLs for dataset in datasets: - print("Handling dataset '%s'" % dataset) + print(f"Handling dataset '{dataset}'") for name in MOTHUR_REFERENCE_DATA[dataset]: for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd): type_ = identify_type(f) - entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name) - print("%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f))) + name_from_file = os.path.splitext(os.path.basename(f))[0] + entry_name = f"{name_from_file} ({name})" + print(f"{type_}\t\'{entry_name}'\t.../{os.path.basename(f)}") if type_ is not None: # Move to target dir ref_data_file = os.path.basename(f) f1 = os.path.join(target_dir, ref_data_file) - print("Moving %s to %s" % (f, f1)) - os.rename(f, f1) + print(f"Moving {f} to {f1}") + shutil.move(f, f1) # Add entry to data table - table_name = "mothur_%s" % type_ + table_name = f"mothur_{type_}" add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) # Remove working dir - print("Removing %s" % wd) + print(f"Removing {wd}") shutil.rmtree(wd) @@ -451,7 +477,7 @@ files = [] for path in paths: path = os.path.abspath(path) - print("Examining '%s'..." % path) + print(f"Examining '{path}'...") if os.path.isfile(path): # Store full path for file files.append(path) @@ -490,21 +516,21 @@ for f in files: type_ = identify_type(f) if type_ is None: - print("%s: unrecognised type, skipped" % f) + print(f"{f}: unrecognised type, skipped") continue ref_data_file = os.path.basename(f) target_file = os.path.join(target_dir, ref_data_file) entry_name = "%s" % os.path.splitext(ref_data_file)[0] if description: entry_name += " (%s)" % description - print("%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file)) + print(f"{type_}\t\'{entry_name}'\t.../{ref_data_file}") # Link to or copy the data if link_to_data: os.symlink(f, target_file) else: shutil.copyfile(f, target_file) # Add entry to data table - table_name = "mothur_%s" % type_ + table_name = f"mothur_{type_}" add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) @@ -519,8 +545,8 @@ parser.add_option('--description', action='store', dest='description', default='') parser.add_option('--link', action='store_true', dest='link_to_data') options, args = parser.parse_args() - print("options: %s" % options) - print("args : %s" % args) + print(f"options: {options}") + print(f"args : {args}") # Check for JSON file if len(args) != 1: @@ -533,7 +559,7 @@ params, target_dir = read_input_json(jsonfile) # Make the target directory - print("Making %s" % target_dir) + print(f"Making {target_dir}") os.mkdir(target_dir) # Set up data tables dictionary @@ -556,6 +582,6 @@ import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data) # Write output JSON print("Outputting JSON") - print(json.dumps(data_tables)) - open(jsonfile, 'w').write(json.dumps(data_tables, sort_keys=True)) + with open(jsonfile, 'w') as fh: + json.dump(data_tables, fh, sort_keys=True) print("Done.")