annotate data_manager/data_manager.py @ 2:b4c303665291 draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
author matthias
date Fri, 08 Mar 2019 05:38:44 -0500
parents 419037fe1150
children 3a4ee8bf012a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
1 import argparse
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
2 import json
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
3 import os
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
4 import shutil
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
5 import sys
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
6 import zipfile
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
7 try:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
8 # For Python 3.0 and later
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
9 from urllib.request import Request, urlopen
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
10 except ImportError:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
11 # Fall back to Python 2 imports
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
12 from urllib2 import Request, urlopen
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
13
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
14 DEFAULT_TAXLEVELS="Kingdom,Phylum,Class,Order,Family,Genus,Species"
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
15
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
16 FILE2NAME = {
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
17 "silva132":"Silva version 132",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
18 "silva128":"Silva version 128",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
19 "rdp16":"RDP trainset 16",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
20 "rdp14":"RDP trainset 14",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
21 "gg13.84":"GreenGenes version 13.8",
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
22 "unite8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
23 "unite8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
24 "unite8.0_euka": "UNITE: General Fasta release 8.0 for all Eukaryotes",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
25 "unite8.0_euka_singletons": "UNITE: General Fasta release 8.0 for all Eukaryotes including global and 97% singletons",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
26 "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
27 "gtdb_2018_11_20": "GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
28 "hitdb1": "HitDB version 1 (Human InTestinal 16S rRNA)",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
29 "silva132_euk_18S": "Silva version 132 Eukaryotic 18S",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
30 "PR2v4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1"
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
31 }
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
32
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
33 FILE2TAXURL = {
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
34 "silva132":"https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
35 "silva128":"https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
36 "rdp16":"https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
37 "rdp14":"https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
38 "unite8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
39 "unite8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
40 "unite8.0_euka": "https://files.plutof.ut.ee/public/orig/D6/96/D69658E99589D888A207805A744019DBA4EC0F603E67E53732767B3E03A5AA86.zip",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
41 "unite8.0_euka_singletons": "https://files.plutof.ut.ee/doi/C2/20/C22034350E32D6AD7E5D1AF3F8BC487E34DA0BE25602B0E748906005CE6ADA97.zip",
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
42 "gg13.84":"https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
43 "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
44 "gtdb_2018_11_20": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
45 "hitdb1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
46 "silva132_euk_18S": "https://zenodo.org/record/1447330/files/silva_132.18s.99_rep_set.dada2.fa.gz?download=1",
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
47 "PR2v4.11.1": "https://github.com/pr2database/pr2database/releases/download/4.11.1/pr2_version_4.11.1_dada2.fasta.gz"
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
48 }
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
49
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
50 FILE2SPECIESURL = {
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
51 "silva132":"https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
52 "silva128":"https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
53 "rdp16":"https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
54 "rdp14":"https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
55 }
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
56
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
57 FILE2TAXLEVELS = {
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
58 "PR2v4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species"
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
59 }
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
60
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
61 def url_download(url, fname, workdir):
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
62 """
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
63 download url to workdir/fname
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
64
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
65 return the path to the resulting file
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
66 """
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
67 file_path = os.path.join(workdir, fname)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
68 if not os.path.exists(workdir):
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
69 os.makedirs(workdir)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
70 src = None
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
71 dst = None
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
72 try:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
73 req = Request(url)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
74 src = urlopen(req)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
75 with open(file_path, 'wb') as dst:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
76 while True:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
77 chunk = src.read(2**10)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
78 if chunk:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
79 dst.write(chunk)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
80 else:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
81 break
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
82 finally:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
83 if src:
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
84 src.close()
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
85
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
86 #special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta)
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
87 if fname.startswith("unite"):
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
88 import glob
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
89 import gzip
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
90 import shutil
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
91 import zipfile
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
92 # unzip download
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
93 zip_ref = zipfile.ZipFile(file_path, 'r')
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
94 zip_ref.extractall(workdir)
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
95 zip_ref.close()
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
96 # gzip top level fasta file
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
97 fastas = glob.glob("*fasta")
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
98 if len(fastas) != 1:
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
99 msg = "UNITE download %s contained more than one or no fasta file"
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
100 raise Exception(msg)
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
101 with open(fastas[0], 'rb') as f_in:
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
102 with gzip.open(file_path, 'wb') as f_out:
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
103 shutil.copyfileobj(f_in, f_out)
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
104
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
105 return fname
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
106
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
107 def main(dataset, outjson):
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
108
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
109 params = json.loads(open(outjson).read())
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
110 target_directory = params['output_data'][0]['extra_files_path']
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
111 os.mkdir(target_directory)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
112 output_path = os.path.abspath(os.path.join(os.getcwd(), 'dada2'))
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
113
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
114 workdir = os.path.join(os.getcwd(), 'dada2')
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
115 path = url_download( FILE2TAXURL[dataset], dataset+".taxonomy", workdir)
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
116
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
117 data_manager_json = {"data_tables":{}}
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
118 data_manager_entry = {}
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
119 data_manager_entry['value'] = dataset
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
120 data_manager_entry['name'] = FILE2NAME[dataset]
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
121 data_manager_entry['path'] = dataset+".taxonomy"
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
122 data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS)
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
123 data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
124
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
125
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
126 if FILE2SPECIESURL.get(dataset, False ):
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
127 path = url_download( FILE2SPECIESURL[dataset], dataset+".species", workdir)
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
128
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
129 data_manager_entry = {}
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
130 data_manager_entry['value'] = dataset
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
131 data_manager_entry['name'] = FILE2NAME[dataset]
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
132 data_manager_entry['path'] = dataset+".species"
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
133 data_manager_json["data_tables"]["dada2_species"] = data_manager_entry
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
134
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
135 for filename in os.listdir(workdir):
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
136 shutil.move(os.path.join(output_path, filename), target_directory)
2
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
137
b4c303665291 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit eec95ccc2189355061112ea2785b82f13a0fa077-dirty
matthias
parents: 0
diff changeset
138 sys.stderr.write("JSON %s" %json.dumps(data_manager_json))
0
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
139 file(outjson, 'w').write(json.dumps(data_manager_json))
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
140
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
141 if __name__ == '__main__':
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
142 parser = argparse.ArgumentParser(description='Create data manager json.')
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
143 parser.add_argument('--out', action='store', help='JSON filename')
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
144 parser.add_argument('--dataset', action='store', help='Download data set name')
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
145 args = parser.parse_args()
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
146
419037fe1150 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 42eb67646e47bef13eed672ff6b9d06b1d82ae3d-dirty
matthias
parents:
diff changeset
147 main(args.dataset, args.out)