# HG changeset patch
# User matthias
# Date 1560946453 14400
# Node ID 42bbbc4b4739c2931ce5428d647463d93719c344
# Parent e374299894b069c2dcf45008bc2fad01ea8d7f90
planemo upload commit 8d8a5c80b61682382aaad45f82948a9850a71e9c
diff -r e374299894b0 -r 42bbbc4b4739 data_manager/.dada2_fetcher.xml.swp
Binary file data_manager/.dada2_fetcher.xml.swp has changed
diff -r e374299894b0 -r 42bbbc4b4739 data_manager/.data_manager.py.swp
Binary file data_manager/.data_manager.py.swp has changed
diff -r e374299894b0 -r 42bbbc4b4739 data_manager/dada2_fetcher.xml
--- a/data_manager/dada2_fetcher.xml Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,180 +0,0 @@
-
-
- Download reference databases
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 10.1093/nar/gks1219
- >
- 10.1093/nar/gkt1244
-
- 10.1128/AEM.03006-05
-
- 10.15156/BIO/786343
-
-
- 10.1186/s12864-015-2265-y
-
- 10.1093/nar/gks1160
-
-
-
diff -r e374299894b0 -r 42bbbc4b4739 data_manager/data_manager.py
--- a/data_manager/data_manager.py Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-import argparse
-import json
-import os
-import shutil
-import sys
-import zipfile
-try:
- # For Python 3.0 and later
- from urllib.request import Request, urlopen
-except ImportError:
- # Fall back to Python 2 imports
- from urllib2 import Request, urlopen
-
-DEFAULT_TAXLEVELS="Kingdom,Phylum,Class,Order,Family,Genus,Species"
-
-FILE2NAME = {
- "silva_132":"Silva version 132",
- "silva_128":"Silva version 128",
- "rdp_16":"RDP trainset 16",
- "rdp_14":"RDP trainset 14",
- "greengenes_13.84":"GreenGenes version 13.84",
- "unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi",
- "unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons",
- "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)",
- "gtdb_2018_11": "GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)",
- "hitdb_1": "HitDB version 1 (Human InTestinal 16S rRNA)",
- "silva_euk_18S_132": "Silva version 132 Eukaryotic 18S",
- "PR2_4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1"
-}
-
-FILE2TAXURL = {
- "silva_132":"https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
- "silva_128":"https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
- "rdp_16":"https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
- "rdp_14":"https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
- "unite_8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip",
- "unite_8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip",
- "greengenes_13.84":"https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
- "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1",
- "gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1",
- "hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1",
- "silva_euk_18S_132": "https://zenodo.org/record/1447330/files/silva_132.18s.99_rep_set.dada2.fa.gz?download=1",
- "PR2_4.11.1": "https://github.com/pr2database/pr2database/releases/download/4.11.1/pr2_version_4.11.1_dada2.fasta.gz"
-}
-
-FILE2SPECIESURL = {
- "silva_132":"https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
- "silva_128":"https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
- "rdp_16":"https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
- "rdp_14":"https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
-}
-
-FILE2TAXLEVELS = {
- "PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species"
-}
-
-def url_download(url, fname, workdir):
- """
- download url to workdir/fname
- """
- file_path = os.path.join(workdir, fname)
- if not os.path.exists(workdir):
- os.makedirs(workdir)
- src = None
- dst = None
- try:
- req = Request(url)
- src = urlopen(req)
- with open(file_path, 'wb') as dst:
- while True:
- chunk = src.read(2**10)
- if chunk:
- dst.write(chunk)
- else:
- break
- finally:
- if src:
- src.close()
-
- #special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta)
- if fname.startswith("unite"):
- import glob
- import gzip
- import shutil
- import zipfile
- # unzip download
- zip_ref = zipfile.ZipFile(file_path, 'r')
- zip_ref.extractall(workdir)
- zip_ref.close()
- # gzip top level fasta file
- fastas = glob.glob("%s/*fasta"%workdir)
- if len(fastas) != 1:
- msg = "UNITE download %s contained %d fasta file(s): %s"%(url, len(fastas), " ".join(fastas))
- raise Exception(msg)
- with open(fastas[0], 'rb') as f_in:
- with gzip.open(file_path, 'wb') as f_out:
- shutil.copyfileobj(f_in, f_out)
-
-
-def remote_dataset(dataset, outjson):
-
- with open(outjson) as jf:
- params = json.loads(jf.read())
-
- workdir = params['output_data'][0]['extra_files_path']
- os.mkdir(workdir)
- url_download( FILE2TAXURL[dataset], dataset+".taxonomy", workdir)
-
- data_manager_json = {"data_tables":{}}
- data_manager_entry = {}
- data_manager_entry['value'] = dataset
- data_manager_entry['name'] = FILE2NAME[dataset]
- data_manager_entry['path'] = dataset+".taxonomy"
- data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS)
- data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry
-
- if FILE2SPECIESURL.get(dataset, False ):
- url_download( FILE2SPECIESURL[dataset], dataset+".species", workdir)
- data_manager_entry = {}
- data_manager_entry['value'] = dataset
- data_manager_entry['name'] = FILE2NAME[dataset]
- data_manager_entry['path'] = dataset+".species"
- data_manager_json["data_tables"]["dada2_species"] = data_manager_entry
-
- with file(outjson, 'w') as jf:
- jf.write(json.dumps(data_manager_json))
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='Create data manager json.')
- parser.add_argument('--out', action='store', help='JSON filename')
- parser.add_argument('--dataset', action='store', help='Download data set name')
- args = parser.parse_args()
-
- remote_dataset(args.dataset, args.out)
diff -r e374299894b0 -r 42bbbc4b4739 data_manager_conf.xml
--- a/data_manager_conf.xml Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
diff -r e374299894b0 -r 42bbbc4b4739 test-data/PR24.11.1_json
--- a/test-data/PR24.11.1_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "PR2_4.11.1.taxonomy", "name": "Protist Ribosomal Reference database (PR2) 4.11.1", "value": "PR2_4.11.1", "taxlevels": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/RefSeq_RDP2018_json
--- a/test-data/RefSeq_RDP2018_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "RefSeq_RDP_2018_05.taxonomy", "name": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)", "value": "RefSeq_RDP_2018_05", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/dada2_species.loc
--- a/test-data/dada2_species.loc Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-# This is a sample file distributed with Galaxy that is used to define a
-# list of dada2 reference data sets for species assignment, using three
-# tab separated columns:
-#
-#
-#
-# Datasets can be retrieved from http://busco.ezlab.org/frame_wget.html
-#
-# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
diff -r e374299894b0 -r 42bbbc4b4739 test-data/dada2_taxonomy.loc
--- a/test-data/dada2_taxonomy.loc Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-# This is a sample file distributed with Galaxy that is used to define a
-# list of dada2 reference data sets for taxonomy assignment, using three
-# tab separated columns:
-#
-#
-#
-# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
-#
-# taxlevels is a comma separated list of taxonomy levels
diff -r e374299894b0 -r 42bbbc4b4739 test-data/greengenes13.84_json
--- a/test-data/greengenes13.84_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "greengenes_13.84.taxonomy", "name": "GreenGenes version 13.84", "value": "greengenes_13.84", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/gtdb2018_json
--- a/test-data/gtdb2018_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "gtdb_2018_11.taxonomy", "name": "GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)", "value": "gtdb_2018_11", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/hitdb1_json
--- a/test-data/hitdb1_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "hitdb_1.taxonomy", "name": "HitDB version 1 (Human InTestinal 16S rRNA)", "value": "hitdb_1", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/rdp16_json
--- a/test-data/rdp16_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_species": {"path": "rdp_16.species", "name": "RDP trainset 16", "value": "rdp_16"}, "dada2_taxonomy": {"path": "rdp_16.taxonomy", "name": "RDP trainset 16", "value": "rdp_16", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/silva132_json
--- a/test-data/silva132_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_species": {"path": "silva_132.species", "name": "Silva version 132", "value": "silva_132"}, "dada2_taxonomy": {"path": "silva_132.taxonomy", "name": "Silva version 132", "value": "silva_132", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/silvaeuk132_json
--- a/test-data/silvaeuk132_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "silva_euk_18S_132.taxonomy", "name": "Silva version 132 Eukaryotic 18S", "value": "silva_euk_18S_132", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/unite8fungi_json
--- a/test-data/unite8fungi_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "unite_8.0_fungi.taxonomy", "name": "UNITE: General Fasta release 8.0 for Fungi", "value": "unite_8.0_fungi", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 test-data/unite8fungisingletons_json
--- a/test-data/unite8fungisingletons_json Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"dada2_taxonomy": {"path": "unite_8.0_fungi_singletons.taxonomy", "name": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons", "value": "unite_8.0_fungi_singletons", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
diff -r e374299894b0 -r 42bbbc4b4739 tool-data/dada2_species.loc.sample
--- a/tool-data/dada2_species.loc.sample Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-# This is a sample file distributed with Galaxy that is used to define a
-# list of dada2 reference data sets for species assignment, using three
-# tab separated columns:
-#
-#
-#
-# Datasets can be retrieved from http://busco.ezlab.org/frame_wget.html
-#
-# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
diff -r e374299894b0 -r 42bbbc4b4739 tool-data/dada2_taxonomy.loc.sample
--- a/tool-data/dada2_taxonomy.loc.sample Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-# This is a sample file distributed with Galaxy that is used to define a
-# list of dada2 reference data sets for taxonomy assignment, using three
-# tab separated columns:
-#
-#
-#
-# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
-#
-# taxlevels is a comma separated list of taxonomy levels
diff -r e374299894b0 -r 42bbbc4b4739 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-
-
-
-
- value, name, path, taxlevels
-
-
-
diff -r e374299894b0 -r 42bbbc4b4739 tool_data_table_conf.xml.test
--- a/tool_data_table_conf.xml.test Thu Jun 06 07:59:19 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-
-
-
-
- value, name, path, taxlevels
-
-
-