# HG changeset patch
# User matthias
# Date 1551969223 18000
# Node ID 419037fe115043be1c3da1b39dcf7bb5bc1e4e52
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
diff -r 000000000000 -r 419037fe1150 data_manager/.dada_fetcher.xml.swp
Binary file data_manager/.dada_fetcher.xml.swp has changed
diff -r 000000000000 -r 419037fe1150 data_manager/.data_manager.py.swp
Binary file data_manager/.data_manager.py.swp has changed
diff -r 000000000000 -r 419037fe1150 data_manager/.megan_tools_fetcher.xml.swp
Binary file data_manager/.megan_tools_fetcher.xml.swp has changed
diff -r 000000000000 -r 419037fe1150 data_manager/dada_fetcher.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/dada_fetcher.xml Thu Mar 07 09:33:43 2019 -0500
@@ -0,0 +1,39 @@
+
+
+ Download reference data sets
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+http://www.arb-silva.de/silva-license-information
+
+
+
diff -r 000000000000 -r 419037fe1150 data_manager/data_manager.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager.py Thu Mar 07 09:33:43 2019 -0500
@@ -0,0 +1,106 @@
+import argparse
+import json
+import os
+import shutil
+import sys
+import zipfile
+try:
+ # For Python 3.0 and later
+ from urllib.request import Request, urlopen
+except ImportError:
+ # Fall back to Python 2 imports
+ from urllib2 import Request, urlopen
+
+DEFAULT_TAXLEVELS="Kingdom,Phylum,Class,Order,Family,Genus,Species"
+
+FILE2NAME = {
+ "silva132":"Silva version 132",
+ "silva128":"Silva version 128",
+ "rdp16":"RDP trainset 16",
+ "rdp14":"RDP trainset 14",
+ "gg13.84":"GreenGenes version 13.8",
+}
+
+FILE2TAXURL = {
+ "silva132":"https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
+ "silva128":"https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
+ "rdp16":"https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
+ "rdp14":"https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
+ "gg13.84":"https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
+}
+
+FILE2SPECIESURL = {
+ "silva132":"https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
+ "silva128":"https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
+ "rdp16":"https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
+ "rdp14":"https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
+}
+
+FILE2TAXLEVELS = {
+}
+
+def url_download(url, fname, workdir):
+ """
+ download url to workdir/fname
+
+ return the path to the resulting file
+ """
+ file_path = os.path.join(workdir, fname)
+ if not os.path.exists(workdir):
+ os.makedirs(workdir)
+ src = None
+ dst = None
+ try:
+ req = Request(url)
+ src = urlopen(req)
+ with open(file_path, 'wb') as dst:
+ while True:
+ chunk = src.read(2**10)
+ if chunk:
+ dst.write(chunk)
+ else:
+ break
+ finally:
+ if src:
+ src.close()
+ return os.path.join(workdir, fname)
+
+def main(dataset, outjson):
+
+ params = json.loads(open(outjson).read())
+ target_directory = params['output_data'][0]['extra_files_path']
+ os.mkdir(target_directory)
+ output_path = os.path.abspath(os.path.join(os.getcwd(), 'dada2'))
+
+ workdir = os.path.join(os.getcwd(), 'dada2')
+ path = url_download( FILE2TAXURL[dataset], taxdataset+".taxonomy", workdir)
+
+ data_manager_json = {"data_tables":{}}
+ data_manager_entry = {}
+ data_manager_entry['value'] = dataset
+ data_manager_entry['name'] = FILE2NAME[dataset]
+ data_manager_entry['path'] = path
+ data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS)
+ data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry
+
+
+ if FILE2SPECIES.get(dataset, False ):
+ path = url_download( FILE2SPECIES[dataset], taxdataset+".species", workdir)
+
+ data_manager_entry = {}
+ data_manager_entry['value'] = dataset
+ data_manager_entry['name'] = FILE2NAME[dataset]
+ data_manager_entry['path'] = path
+ data_manager_json["data_tables"]["dada2_species"] = data_manager_entry
+
+ for filename in os.listdir(workdir):
+ shutil.move(os.path.join(output_path, filename), target_directory)
+ file(outjson, 'w').write(json.dumps(data_manager_json))
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Create data manager json.')
+ parser.add_argument('--out', action='store', help='JSON filename')
+ parser.add_argument('--dataset', action='store', help='Download data set name')
+ args = parser.parse_args()
+
+ main(args.dataset, args.out)
diff -r 000000000000 -r 419037fe1150 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Thu Mar 07 09:33:43 2019 -0500
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 419037fe1150 test-data/SSURef_Nr99_132_tax_silva_to_NCBI_synonyms_json
diff -r 000000000000 -r 419037fe1150 tool-data/.dada2_species.loc.sample.swp
Binary file tool-data/.dada2_species.loc.sample.swp has changed
diff -r 000000000000 -r 419037fe1150 tool-data/.dada2_taxonomy.loc.sample.swp
Binary file tool-data/.dada2_taxonomy.loc.sample.swp has changed
diff -r 000000000000 -r 419037fe1150 tool-data/dada2_species.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dada2_species.loc.sample Thu Mar 07 09:33:43 2019 -0500
@@ -0,0 +1,9 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of dada2 reference data sets for species assignment, using three
+# tab separated columns:
+#
+#
+#
+# Datasets can be retrieved from http://busco.ezlab.org/frame_wget.html
+#
+# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
diff -r 000000000000 -r 419037fe1150 tool-data/dada2_taxonomy.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dada2_taxonomy.loc.sample Thu Mar 07 09:33:43 2019 -0500
@@ -0,0 +1,9 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of dada2 reference data sets for taxonomy assignment, using three
+# tab separated columns:
+#
+#
+#
+# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
+#
+# taxlevels is a comma separated list of taxonomy levels
diff -r 000000000000 -r 419037fe1150 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Mar 07 09:33:43 2019 -0500
@@ -0,0 +1,11 @@
+
+
+
+
+ value, name, path, taxlevels
+
+
+