comparison data_manager/ariba_database_builder.py @ 4:be46816110b4 draft

planemo upload for repository https://github.com/thanhleviet/galaxy-tools commit 98648bbb7796db528b5685d44a2a5b761c53c19b-dirty
author thanhlv
date Sun, 19 May 2019 08:10:45 -0400
parents b13f7ba85697
children 78bf08fa1e75
comparison
equal deleted inserted replaced
3:b13f7ba85697 4:be46816110b4
1 # Thanh Le Viet 1 import sys
2 # 7-Jan-2019 2 import subprocess
3 3 import shlex
4 import shutil
4 import argparse 5 import argparse
5 import json 6 import json
6 import os 7 import os
7 import shutil 8 import shutil
8 9 import errno
9 10
10 def main(args): 11
12 data_table_name = "ariba_databases"
13
14 mlst_dict = {
15 "achs" : "Achromobacter spp.",
16 "acib1" : "Acinetobacter baumannii#1",
17 "acib2" : "Acinetobacter baumannii#2",
18 "aers" : "Aeromonas spp.",
19 "anap" : "Anaplasma phagocytophilum",
20 "arcs" : "Arcobacter spp.",
21 "aspf" : "Aspergillus fumigatus",
22 "bacc" : "Bacillus cereus",
23 "bacl" : "Bacillus licheniformis",
24 "bacs" : "Bacillus subtilis",
25 "barb" : "Bartonella bacilliformis",
26 "barh" : "Bartonella henselae",
27 "bors" : "Bordetella spp.",
28 "borr" : "Borrelia spp.",
29 "brah" : "Brachyspira hampsonii",
30 "brach" : "Brachyspira hyodysenteriae",
31 "brai" : "Brachyspira intermedia",
32 "brap" : "Brachyspira pilosicoli",
33 "bras" : "Brachyspira spp.",
34 "brus" : "Brucella spp.",
35 "bucc" : "Burkholderia cepacia complex",
36 "burp" : "Burkholderia pseudomallei",
37 "camc" : "Campylobacter concisus/curvus",
38 "camf" : "Campylobacter fetus",
39 "camh" : "Campylobacter helveticus",
40 "rlis" : "Campylobacter hyointestinalis",
41 "cami" : "Campylobacter insulaenigrae",
42 "camj" : "Campylobacter jejuni",
43 "caml" : "Campylobacter lanienae",
44 "rari" : "Campylobacter lari",
45 "cams" : "Campylobacter sputorum",
46 "camu" : "Campylobacter upsaliensis",
47 "cana" : "Candida albicans",
48 "cang" : "Candida glabrata",
49 "cank" : "Candida krusei",
50 "cant" : "Candida tropicalis",
51 "cals" : "Candidatus Liberibacter solanacearum",
52 "carm" : "Carnobacterium maltaromaticum",
53 "chls" : "Chlamydiales spp.",
54 "citf" : "Citrobacter freundii",
55 "clos" : "Clonorchis sinensis",
56 "clob" : "Clostridium botulinum",
57 "clod" : "Clostridium difficile",
58 "mcum" : "Clostridium septicum",
59 "cord" : "Corynebacterium diphtheriae",
60 "cros" : "Cronobacter spp.",
61 "dicn" : "Dichelobacter nodosus",
62 "edws" : "Edwardsiella spp.",
63 "entc" : "Enterobacter cloacae",
64 "entf" : "Enterococcus faecalis",
65 "sium" : "Enterococcus faecium",
66 "escc1" : "Escherichia coli#1",
67 "escc2" : "Escherichia coli#2",
68 "flap" : "Flavobacterium psychrophilum",
69 "gala" : "Gallibacterium anatis",
70 "haei" : "Haemophilus influenzae",
71 "haep" : "Haemophilus parasuis",
72 "helc" : "Helicobacter cinaedi",
73 "help" : "Helicobacter pylori",
74 "hels" : "Helicobacter suis",
75 "kink" : "Kingella kingae",
76 "klea" : "Klebsiella aerogenes",
77 "kleo" : "Klebsiella oxytoca",
78 "klep" : "Klebsiella pneumoniae",
79 "kuds" : "Kudoa septempunctata",
80 "lacs" : "Lactobacillus salivarius",
81 "leps" : "Leptospira spp.",
82 "leps2" : "Leptospira spp.#2",
83 "leps3" : "Leptospira spp.#3",
84 "lism" : "Listeria monocytogenes",
85 "macc" : "Macrococcus canis",
86 "scus" : "Macrococcus caseolyticus",
87 "manh" : "Mannheimia haemolytica",
88 "melp" : "Melissococcus plutonius",
89 "morc" : "Moraxella catarrhalis",
90 "mycs" : "Mycobacteria spp.",
91 "myca" : "Mycobacterium abscessus",
92 "mycm" : "Mycobacterium massiliense",
93 "mycoa" : "Mycoplasma agalactiae",
94 "mycb" : "Mycoplasma bovis",
95 "mych" : "Mycoplasma hyopneumoniae",
96 "anis" : "Mycoplasma hyorhinis",
97 "myci" : "Mycoplasma iowae",
98 "mycp" : "Mycoplasma pneumoniae",
99 "mycos" : "Mycoplasma synoviae",
100 "neis" : "Neisseria spp.",
101 "orit" : "Orientia tsutsugamushi",
102 "ornr" : "Ornithobacterium rhinotracheale",
103 "pael" : "Paenibacillus larvae",
104 "pasm1" : "Pasteurella multocida#1",
105 "pasm2" : "Pasteurella multocida#2",
106 "pedp" : "Pediococcus pentosaceus",
107 "phod" : "Photobacterium damselae",
108 "piss" : "Piscirickettsia salmonis",
109 "porg" : "Porphyromonas gingivalis",
110 "proa" : "Propionibacterium acnes",
111 "psea" : "Pseudomonas aeruginosa",
112 "psef" : "Pseudomonas fluorescens",
113 "psep" : "Pseudomonas putida",
114 "rhos" : "Rhodococcus spp.",
115 "riea" : "Riemerella anatipestifer",
116 "sale" : "Salmonella enterica",
117 "sapp" : "Saprolegnia parasitica",
118 "sins" : "Sinorhizobium spp.",
119 "staa" : "Staphylococcus aureus",
120 "stae" : "Staphylococcus epidermidis",
121 "stah" : "Staphylococcus haemolyticus",
122 "snis" : "Staphylococcus hominis",
123 "stal" : "Staphylococcus lugdunensis",
124 "stap" : "Staphylococcus pseudintermedius",
125 "stem" : "Stenotrophomonas maltophilia",
126 "stra" : "Streptococcus agalactiae",
127 "sbcx" : "Streptococcus bovis/equinus complex (SBSEC)",
128 "strc" : "Streptococcus canis",
129 "stde" : "Streptococcus dysgalactiae equisimilis",
130 "strg" : "Streptococcus gallolyticus",
131 "stro" : "Streptococcus oralis",
132 "strp" : "Streptococcus pneumoniae",
133 "snes" : "Streptococcus pyogenes",
134 "strs" : "Streptococcus suis",
135 "strt" : "Streptococcus thermophilus",
136 "strt2" : "Streptococcus thermophilus#2",
137 "stru" : "Streptococcus uberis",
138 "strz" : "Streptococcus zooepidemicus",
139 "sspp" : "Streptomyces spp",
140 "tays" : "Taylorella spp.",
141 "tens" : "Tenacibaculum spp.",
142 "trep" : "Treponema pallidum",
143 "triv" : "Trichomonas vaginalis",
144 "ures" : "Ureaplasma spp.",
145 "vibc" : "Vibrio cholerae",
146 "vibc2" : "Vibrio cholerae#2",
147 "vibp" : "Vibrio parahaemolyticus",
148 "vibs" : "Vibrio spp.",
149 "vibt" : "Vibrio tapetis",
150 "vibv" : "Vibrio vulnificus",
151 "wolb" : "Wolbachia",
152 "xylf" : "Xylella fastidiosa",
153 "yerp" : "Yersinia pseudotuberculosis",
154 "yerr" : "Yersinia ruckeri",
155 "yers" : "Yersinia spp."
156 }
157
158
159 genes_dict = {
160 "card" : "CARD",
161 "resfinder" : "Resfinder",
162 "plasmidfinder" : "Plasmidfinder",
163 "megares" : "Megares",
164 "argannot" : "Argannot",
165 "vfdb_core" : "vfdb_core",
166 "vfdb_full" : "vfdb_full",
167 "virulencefinder" : "virulencefinder"
168 }
169
170 def run_ariba(cmd):
171 _cmd = shlex.split(cmd)
172 subprocess.check_call(_cmd)
173
174 def build_mlst(mlst_db):
175 mlst_species = mlst_dict[mlst_db]
176 run_ariba("ariba pubmlstget '{}' out".format(mlst_species))
11 output_path = os.getcwd() 177 output_path = os.getcwd()
12 db_folder = [d for d in os.listdir(output_path) if os.path.isdir(d)] 178 old = "{}/out/ref_db".format(output_path, mlst_db)
13 params = json.loads(open(args.output).read()) 179 new = "{}/{}".format(output_path, mlst_db)
180 shutil.move(old, new)
181
182 def build_curated_db(gen_db):
183 run_ariba("ariba getref {} out".format(gen_db))
184 run_ariba("ariba prepareref -f out.fa -m out.tsv {}".format(gen_db))
185
186 def build_custom_db(fasta, coding, db_name):
187 run_ariba("ariba prepareref --all_coding {} -f {} {}".format(coding, fasta, db_name))
188
189 def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
190 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
191 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] )
192 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry )
193 return data_manager_dict
194
195 def main():
196 parser = argparse.ArgumentParser()
197 parser.add_argument('params')
198 parser.add_argument( '-t', '--dbtype', dest='database_type', help='database type' )
199 parser.add_argument( '-d', '--db', dest='db_name', help='database name' )
200 parser.add_argument( '-c', '--coding', dest='coding', help='' )
201 parser.add_argument( '-f', '--fasta', dest='fasta', help='' )
202 args = parser.parse_args()
203
204 if args.database_type == "curated":
205 build_curated_db(args.db_name)
206 name = genes_dict[args.db_name]
207 elif args.database_type == "mlst":
208 build_mlst(args.db_name)
209 name = mlst_dict[args.db_name]
210 elif args.database_type == "fasta":
211 build_custom_db(args.fasta, args.coding, args.db_name)
212 name = args.db_name
213
214 params = json.loads(open(args.params).read())
215
14 target_directory = params['output_data'][0]['extra_files_path'] 216 target_directory = params['output_data'][0]['extra_files_path']
15 os.mkdir(target_directory) 217
16 data_manager_entry = [] 218 if not os.path.isdir(target_directory):
17 for db in db_folder: 219 os.mkdir(target_directory)
18 print("Current: ".format(os.path.join(output_path, d))) 220
19 print("Target: {}".format(target_directory)) 221 output_path = os.getcwd()
20 shutil.move(os.path.join(output_path, d), os.path.join(target_directory, d)) 222 shutil.copytree(os.path.join(output_path, args.db_name), os.path.join(target_directory, args.db_name))
21 data_manager_entry.append(dict(value=db.lower(), 223
22 name=db, 224 data_manager_dict = {}
23 path=target_directory) 225
24 ) 226 data_table_entry = {
25 data_manager_json = dict(data_tables=dict(ariba_databases=data_manager_entry)) 227 "value": args.db_name,
26 file(args.output, 'w').write(json.dumps(data_manager_json)) 228 "name": name,
27 229 "path": os.path.join(target_directory, args.db_name)
28 230 }
29 if __name__ == '__main__': 231 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
30 parser = argparse.ArgumentParser(description='Create data manager json.') 232 open(args.params, 'wb').write(json.dumps(data_manager_dict))
31 parser.add_argument('--out', dest='output', action='store', help='JSON filename') 233
32 args = parser.parse_args() 234 if __name__ == "__main__":
33 main(args) 235 main()
236