comparison data_manager/data_manager.py @ 0:419037fe1150 draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
author matthias
date Thu, 07 Mar 2019 09:33:43 -0500
parents
children b4c303665291
comparison
equal deleted inserted replaced
-1:000000000000 0:419037fe1150
1 import argparse
2 import json
3 import os
4 import shutil
5 import sys
6 import zipfile
7 try:
8 # For Python 3.0 and later
9 from urllib.request import Request, urlopen
10 except ImportError:
11 # Fall back to Python 2 imports
12 from urllib2 import Request, urlopen
13
14 DEFAULT_TAXLEVELS="Kingdom,Phylum,Class,Order,Family,Genus,Species"
15
16 FILE2NAME = {
17 "silva132":"Silva version 132",
18 "silva128":"Silva version 128",
19 "rdp16":"RDP trainset 16",
20 "rdp14":"RDP trainset 14",
21 "gg13.84":"GreenGenes version 13.8",
22 }
23
24 FILE2TAXURL = {
25 "silva132":"https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
26 "silva128":"https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
27 "rdp16":"https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
28 "rdp14":"https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
29 "gg13.84":"https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
30 }
31
32 FILE2SPECIESURL = {
33 "silva132":"https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
34 "silva128":"https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
35 "rdp16":"https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
36 "rdp14":"https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
37 }
38
39 FILE2TAXLEVELS = {
40 }
41
42 def url_download(url, fname, workdir):
43 """
44 download url to workdir/fname
45
46 return the path to the resulting file
47 """
48 file_path = os.path.join(workdir, fname)
49 if not os.path.exists(workdir):
50 os.makedirs(workdir)
51 src = None
52 dst = None
53 try:
54 req = Request(url)
55 src = urlopen(req)
56 with open(file_path, 'wb') as dst:
57 while True:
58 chunk = src.read(2**10)
59 if chunk:
60 dst.write(chunk)
61 else:
62 break
63 finally:
64 if src:
65 src.close()
66 return os.path.join(workdir, fname)
67
68 def main(dataset, outjson):
69
70 params = json.loads(open(outjson).read())
71 target_directory = params['output_data'][0]['extra_files_path']
72 os.mkdir(target_directory)
73 output_path = os.path.abspath(os.path.join(os.getcwd(), 'dada2'))
74
75 workdir = os.path.join(os.getcwd(), 'dada2')
76 path = url_download( FILE2TAXURL[dataset], taxdataset+".taxonomy", workdir)
77
78 data_manager_json = {"data_tables":{}}
79 data_manager_entry = {}
80 data_manager_entry['value'] = dataset
81 data_manager_entry['name'] = FILE2NAME[dataset]
82 data_manager_entry['path'] = path
83 data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS)
84 data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry
85
86
87 if FILE2SPECIES.get(dataset, False ):
88 path = url_download( FILE2SPECIES[dataset], taxdataset+".species", workdir)
89
90 data_manager_entry = {}
91 data_manager_entry['value'] = dataset
92 data_manager_entry['name'] = FILE2NAME[dataset]
93 data_manager_entry['path'] = path
94 data_manager_json["data_tables"]["dada2_species"] = data_manager_entry
95
96 for filename in os.listdir(workdir):
97 shutil.move(os.path.join(output_path, filename), target_directory)
98 file(outjson, 'w').write(json.dumps(data_manager_json))
99
100 if __name__ == '__main__':
101 parser = argparse.ArgumentParser(description='Create data manager json.')
102 parser.add_argument('--out', action='store', help='JSON filename')
103 parser.add_argument('--dataset', action='store', help='Download data set name')
104 args = parser.parse_args()
105
106 main(args.dataset, args.out)