annotate data_manager/data_manager.py @ 9:facf9e6c872c draft default tip

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
author matthias
date Tue, 15 Oct 2019 07:20:59 -0400
parents da93e6a3fe23
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
1 import argparse
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
2 import json
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
3 import os
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
4 try:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
5 # For Python 3.0 and later
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
6 from urllib.request import Request, urlopen
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
7 except ImportError:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
8 # Fall back to Python 2 imports
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
9 from urllib2 import Request, urlopen
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
10
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
11 DEFAULT_TAXLEVELS = "Kingdom,Phylum,Class,Order,Family,Genus,Species"
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
12
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
13 FILE2NAME = {
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
14 "silva_132": "Silva version 132",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
15 "silva_128": "Silva version 128",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
16 "rdp_16": "RDP trainset 16",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
17 "rdp_14": "RDP trainset 14",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
18 "greengenes_13.84": "GreenGenes version 13.84",
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
19 "unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
20 "unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
21 "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
22 "gtdb_2018_11": "GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
23 "hitdb_1": "HitDB version 1 (Human InTestinal 16S rRNA)",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
24 "silva_euk_18S_132": "Silva version 132 Eukaryotic 18S",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
25 "PR2_4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1"
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
26 }
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
27
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
28 FILE2TAXURL = {
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
29 "silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
30 "silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
31 "rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
32 "rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
33 "unite_8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
34 "unite_8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip",
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
35 "greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
36 "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
37 "gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
38 "hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
39 "silva_euk_18S_132": "https://zenodo.org/record/1447330/files/silva_132.18s.99_rep_set.dada2.fa.gz?download=1",
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
40 "PR2_4.11.1": "https://github.com/pr2database/pr2database/releases/download/4.11.1/pr2_version_4.11.1_dada2.fasta.gz"
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
41 }
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
42
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
43 FILE2SPECIESURL = {
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
44 "silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
45 "silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
46 "rdp_16": "https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
47 "rdp_14": "https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
48 }
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
49
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
50 FILE2TAXLEVELS = {
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
51 "PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species"
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
52 }
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
53
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
54
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
55 def url_download(url, fname, workdir):
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
56 """
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
57 download url to workdir/fname
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
58 """
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
59 file_path = os.path.join(workdir, fname)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
60 if not os.path.exists(workdir):
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
61 os.makedirs(workdir)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
62 src = None
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
63 dst = None
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
64 try:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
65 req = Request(url)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
66 src = urlopen(req)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
67 with open(file_path, 'wb') as dst:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
68 while True:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
69 chunk = src.read(2**10)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
70 if chunk:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
71 dst.write(chunk)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
72 else:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
73 break
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
74 finally:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
75 if src:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
76 src.close()
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
77
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
78 # special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta)
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
79 if fname.startswith("unite"):
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
80 import glob
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
81 import gzip
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
82 import shutil
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
83 import zipfile
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
84 # unzip download
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
85 zip_ref = zipfile.ZipFile(file_path, 'r')
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
86 zip_ref.extractall(workdir)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
87 zip_ref.close()
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
88 # gzip top level fasta file
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
89 fastas = glob.glob("%s/*fasta" % workdir)
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
90 if len(fastas) != 1:
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
91 msg = "UNITE download %s contained %d fasta file(s): %s" % (url, len(fastas), " ".join(fastas))
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
92 raise Exception(msg)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
93 with open(fastas[0], 'rb') as f_in:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
94 with gzip.open(file_path, 'wb') as f_out:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
95 shutil.copyfileobj(f_in, f_out)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
96
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
97
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
98 def remote_dataset(dataset, outjson):
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
99
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
100 with open(outjson) as jf:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
101 params = json.loads(jf.read())
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
102
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
103 workdir = params['output_data'][0]['extra_files_path']
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
104 os.mkdir(workdir)
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
105 url_download( FILE2TAXURL[dataset], dataset + ".taxonomy", workdir)
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
106
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
107 data_manager_json = {"data_tables": {}}
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
108 data_manager_entry = {}
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
109 data_manager_entry['value'] = dataset
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
110 data_manager_entry['name'] = FILE2NAME[dataset]
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
111 data_manager_entry['path'] = dataset + ".taxonomy"
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
112 data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS)
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
113 data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
114
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
115 if FILE2SPECIESURL.get(dataset, False ):
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
116 url_download( FILE2SPECIESURL[dataset], dataset + ".species", workdir)
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
117 data_manager_entry = {}
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
118 data_manager_entry['value'] = dataset
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
119 data_manager_entry['name'] = FILE2NAME[dataset]
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
120 data_manager_entry['path'] = dataset + ".species"
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
121 data_manager_json["data_tables"]["dada2_species"] = data_manager_entry
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
122
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
123 with file(outjson, 'w') as jf:
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
124 jf.write(json.dumps(data_manager_json))
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
125
9
facf9e6c872c planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit df2dfeb75f88b326f567cab8df4e6c4a7f2e548c
matthias
parents: 8
diff changeset
126
8
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
127 if __name__ == '__main__':
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
128 parser = argparse.ArgumentParser(description='Create data manager json.')
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
129 parser.add_argument('--out', action='store', help='JSON filename')
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
130 parser.add_argument('--dataset', action='store', help='Download data set name')
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
131 args = parser.parse_args()
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
132
da93e6a3fe23 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5d8314337211e041568578b0e85001af05b73c2a
matthias
parents:
diff changeset
133 remote_dataset(args.dataset, args.out)