Mercurial > repos > thanhlv > data_manager_build_ariba_database
comparison data_manager/ariba_database_builder.py @ 4:be46816110b4 draft
planemo upload for repository https://github.com/thanhleviet/galaxy-tools commit 98648bbb7796db528b5685d44a2a5b761c53c19b-dirty
| author | thanhlv |
|---|---|
| date | Sun, 19 May 2019 08:10:45 -0400 |
| parents | b13f7ba85697 |
| children | 78bf08fa1e75 |
comparison
equal
deleted
inserted
replaced
| 3:b13f7ba85697 | 4:be46816110b4 |
|---|---|
| 1 # Thanh Le Viet | 1 import sys |
| 2 # 7-Jan-2019 | 2 import subprocess |
| 3 | 3 import shlex |
| 4 import shutil | |
| 4 import argparse | 5 import argparse |
| 5 import json | 6 import json |
| 6 import os | 7 import os |
| 7 import shutil | 8 import shutil |
| 8 | 9 import errno |
| 9 | 10 |
| 10 def main(args): | 11 |
| 12 data_table_name = "ariba_databases" | |
| 13 | |
| 14 mlst_dict = { | |
| 15 "achs" : "Achromobacter spp.", | |
| 16 "acib1" : "Acinetobacter baumannii#1", | |
| 17 "acib2" : "Acinetobacter baumannii#2", | |
| 18 "aers" : "Aeromonas spp.", | |
| 19 "anap" : "Anaplasma phagocytophilum", | |
| 20 "arcs" : "Arcobacter spp.", | |
| 21 "aspf" : "Aspergillus fumigatus", | |
| 22 "bacc" : "Bacillus cereus", | |
| 23 "bacl" : "Bacillus licheniformis", | |
| 24 "bacs" : "Bacillus subtilis", | |
| 25 "barb" : "Bartonella bacilliformis", | |
| 26 "barh" : "Bartonella henselae", | |
| 27 "bors" : "Bordetella spp.", | |
| 28 "borr" : "Borrelia spp.", | |
| 29 "brah" : "Brachyspira hampsonii", | |
| 30 "brach" : "Brachyspira hyodysenteriae", | |
| 31 "brai" : "Brachyspira intermedia", | |
| 32 "brap" : "Brachyspira pilosicoli", | |
| 33 "bras" : "Brachyspira spp.", | |
| 34 "brus" : "Brucella spp.", | |
| 35 "bucc" : "Burkholderia cepacia complex", | |
| 36 "burp" : "Burkholderia pseudomallei", | |
| 37 "camc" : "Campylobacter concisus/curvus", | |
| 38 "camf" : "Campylobacter fetus", | |
| 39 "camh" : "Campylobacter helveticus", | |
| 40 "rlis" : "Campylobacter hyointestinalis", | |
| 41 "cami" : "Campylobacter insulaenigrae", | |
| 42 "camj" : "Campylobacter jejuni", | |
| 43 "caml" : "Campylobacter lanienae", | |
| 44 "rari" : "Campylobacter lari", | |
| 45 "cams" : "Campylobacter sputorum", | |
| 46 "camu" : "Campylobacter upsaliensis", | |
| 47 "cana" : "Candida albicans", | |
| 48 "cang" : "Candida glabrata", | |
| 49 "cank" : "Candida krusei", | |
| 50 "cant" : "Candida tropicalis", | |
| 51 "cals" : "Candidatus Liberibacter solanacearum", | |
| 52 "carm" : "Carnobacterium maltaromaticum", | |
| 53 "chls" : "Chlamydiales spp.", | |
| 54 "citf" : "Citrobacter freundii", | |
| 55 "clos" : "Clonorchis sinensis", | |
| 56 "clob" : "Clostridium botulinum", | |
| 57 "clod" : "Clostridium difficile", | |
| 58 "mcum" : "Clostridium septicum", | |
| 59 "cord" : "Corynebacterium diphtheriae", | |
| 60 "cros" : "Cronobacter spp.", | |
| 61 "dicn" : "Dichelobacter nodosus", | |
| 62 "edws" : "Edwardsiella spp.", | |
| 63 "entc" : "Enterobacter cloacae", | |
| 64 "entf" : "Enterococcus faecalis", | |
| 65 "sium" : "Enterococcus faecium", | |
| 66 "escc1" : "Escherichia coli#1", | |
| 67 "escc2" : "Escherichia coli#2", | |
| 68 "flap" : "Flavobacterium psychrophilum", | |
| 69 "gala" : "Gallibacterium anatis", | |
| 70 "haei" : "Haemophilus influenzae", | |
| 71 "haep" : "Haemophilus parasuis", | |
| 72 "helc" : "Helicobacter cinaedi", | |
| 73 "help" : "Helicobacter pylori", | |
| 74 "hels" : "Helicobacter suis", | |
| 75 "kink" : "Kingella kingae", | |
| 76 "klea" : "Klebsiella aerogenes", | |
| 77 "kleo" : "Klebsiella oxytoca", | |
| 78 "klep" : "Klebsiella pneumoniae", | |
| 79 "kuds" : "Kudoa septempunctata", | |
| 80 "lacs" : "Lactobacillus salivarius", | |
| 81 "leps" : "Leptospira spp.", | |
| 82 "leps2" : "Leptospira spp.#2", | |
| 83 "leps3" : "Leptospira spp.#3", | |
| 84 "lism" : "Listeria monocytogenes", | |
| 85 "macc" : "Macrococcus canis", | |
| 86 "scus" : "Macrococcus caseolyticus", | |
| 87 "manh" : "Mannheimia haemolytica", | |
| 88 "melp" : "Melissococcus plutonius", | |
| 89 "morc" : "Moraxella catarrhalis", | |
| 90 "mycs" : "Mycobacteria spp.", | |
| 91 "myca" : "Mycobacterium abscessus", | |
| 92 "mycm" : "Mycobacterium massiliense", | |
| 93 "mycoa" : "Mycoplasma agalactiae", | |
| 94 "mycb" : "Mycoplasma bovis", | |
| 95 "mych" : "Mycoplasma hyopneumoniae", | |
| 96 "anis" : "Mycoplasma hyorhinis", | |
| 97 "myci" : "Mycoplasma iowae", | |
| 98 "mycp" : "Mycoplasma pneumoniae", | |
| 99 "mycos" : "Mycoplasma synoviae", | |
| 100 "neis" : "Neisseria spp.", | |
| 101 "orit" : "Orientia tsutsugamushi", | |
| 102 "ornr" : "Ornithobacterium rhinotracheale", | |
| 103 "pael" : "Paenibacillus larvae", | |
| 104 "pasm1" : "Pasteurella multocida#1", | |
| 105 "pasm2" : "Pasteurella multocida#2", | |
| 106 "pedp" : "Pediococcus pentosaceus", | |
| 107 "phod" : "Photobacterium damselae", | |
| 108 "piss" : "Piscirickettsia salmonis", | |
| 109 "porg" : "Porphyromonas gingivalis", | |
| 110 "proa" : "Propionibacterium acnes", | |
| 111 "psea" : "Pseudomonas aeruginosa", | |
| 112 "psef" : "Pseudomonas fluorescens", | |
| 113 "psep" : "Pseudomonas putida", | |
| 114 "rhos" : "Rhodococcus spp.", | |
| 115 "riea" : "Riemerella anatipestifer", | |
| 116 "sale" : "Salmonella enterica", | |
| 117 "sapp" : "Saprolegnia parasitica", | |
| 118 "sins" : "Sinorhizobium spp.", | |
| 119 "staa" : "Staphylococcus aureus", | |
| 120 "stae" : "Staphylococcus epidermidis", | |
| 121 "stah" : "Staphylococcus haemolyticus", | |
| 122 "snis" : "Staphylococcus hominis", | |
| 123 "stal" : "Staphylococcus lugdunensis", | |
| 124 "stap" : "Staphylococcus pseudintermedius", | |
| 125 "stem" : "Stenotrophomonas maltophilia", | |
| 126 "stra" : "Streptococcus agalactiae", | |
| 127 "sbcx" : "Streptococcus bovis/equinus complex (SBSEC)", | |
| 128 "strc" : "Streptococcus canis", | |
| 129 "stde" : "Streptococcus dysgalactiae equisimilis", | |
| 130 "strg" : "Streptococcus gallolyticus", | |
| 131 "stro" : "Streptococcus oralis", | |
| 132 "strp" : "Streptococcus pneumoniae", | |
| 133 "snes" : "Streptococcus pyogenes", | |
| 134 "strs" : "Streptococcus suis", | |
| 135 "strt" : "Streptococcus thermophilus", | |
| 136 "strt2" : "Streptococcus thermophilus#2", | |
| 137 "stru" : "Streptococcus uberis", | |
| 138 "strz" : "Streptococcus zooepidemicus", | |
| 139 "sspp" : "Streptomyces spp", | |
| 140 "tays" : "Taylorella spp.", | |
| 141 "tens" : "Tenacibaculum spp.", | |
| 142 "trep" : "Treponema pallidum", | |
| 143 "triv" : "Trichomonas vaginalis", | |
| 144 "ures" : "Ureaplasma spp.", | |
| 145 "vibc" : "Vibrio cholerae", | |
| 146 "vibc2" : "Vibrio cholerae#2", | |
| 147 "vibp" : "Vibrio parahaemolyticus", | |
| 148 "vibs" : "Vibrio spp.", | |
| 149 "vibt" : "Vibrio tapetis", | |
| 150 "vibv" : "Vibrio vulnificus", | |
| 151 "wolb" : "Wolbachia", | |
| 152 "xylf" : "Xylella fastidiosa", | |
| 153 "yerp" : "Yersinia pseudotuberculosis", | |
| 154 "yerr" : "Yersinia ruckeri", | |
| 155 "yers" : "Yersinia spp." | |
| 156 } | |
| 157 | |
| 158 | |
| 159 genes_dict = { | |
| 160 "card" : "CARD", | |
| 161 "resfinder" : "Resfinder", | |
| 162 "plasmidfinder" : "Plasmidfinder", | |
| 163 "megares" : "Megares", | |
| 164 "argannot" : "Argannot", | |
| 165 "vfdb_core" : "vfdb_core", | |
| 166 "vfdb_full" : "vfdb_full", | |
| 167 "virulencefinder" : "virulencefinder" | |
| 168 } | |
| 169 | |
| 170 def run_ariba(cmd): | |
| 171 _cmd = shlex.split(cmd) | |
| 172 subprocess.check_call(_cmd) | |
| 173 | |
| 174 def build_mlst(mlst_db): | |
| 175 mlst_species = mlst_dict[mlst_db] | |
| 176 run_ariba("ariba pubmlstget '{}' out".format(mlst_species)) | |
| 11 output_path = os.getcwd() | 177 output_path = os.getcwd() |
| 12 db_folder = [d for d in os.listdir(output_path) if os.path.isdir(d)] | 178 old = "{}/out/ref_db".format(output_path, mlst_db) |
| 13 params = json.loads(open(args.output).read()) | 179 new = "{}/{}".format(output_path, mlst_db) |
| 180 shutil.move(old, new) | |
| 181 | |
| 182 def build_curated_db(gen_db): | |
| 183 run_ariba("ariba getref {} out".format(gen_db)) | |
| 184 run_ariba("ariba prepareref -f out.fa -m out.tsv {}".format(gen_db)) | |
| 185 | |
| 186 def build_custom_db(fasta, coding, db_name): | |
| 187 run_ariba("ariba prepareref --all_coding {} -f {} {}".format(coding, fasta, db_name)) | |
| 188 | |
| 189 def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): | |
| 190 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | |
| 191 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) | |
| 192 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) | |
| 193 return data_manager_dict | |
| 194 | |
| 195 def main(): | |
| 196 parser = argparse.ArgumentParser() | |
| 197 parser.add_argument('params') | |
| 198 parser.add_argument( '-t', '--dbtype', dest='database_type', help='database type' ) | |
| 199 parser.add_argument( '-d', '--db', dest='db_name', help='database name' ) | |
| 200 parser.add_argument( '-c', '--coding', dest='coding', help='' ) | |
| 201 parser.add_argument( '-f', '--fasta', dest='fasta', help='' ) | |
| 202 args = parser.parse_args() | |
| 203 | |
| 204 if args.database_type == "curated": | |
| 205 build_curated_db(args.db_name) | |
| 206 name = genes_dict[args.db_name] | |
| 207 elif args.database_type == "mlst": | |
| 208 build_mlst(args.db_name) | |
| 209 name = mlst_dict[args.db_name] | |
| 210 elif args.database_type == "fasta": | |
| 211 build_custom_db(args.fasta, args.coding, args.db_name) | |
| 212 name = args.db_name | |
| 213 | |
| 214 params = json.loads(open(args.params).read()) | |
| 215 | |
| 14 target_directory = params['output_data'][0]['extra_files_path'] | 216 target_directory = params['output_data'][0]['extra_files_path'] |
| 15 os.mkdir(target_directory) | 217 |
| 16 data_manager_entry = [] | 218 if not os.path.isdir(target_directory): |
| 17 for db in db_folder: | 219 os.mkdir(target_directory) |
| 18 print("Current: ".format(os.path.join(output_path, d))) | 220 |
| 19 print("Target: {}".format(target_directory)) | 221 output_path = os.getcwd() |
| 20 shutil.move(os.path.join(output_path, d), os.path.join(target_directory, d)) | 222 shutil.copytree(os.path.join(output_path, args.db_name), os.path.join(target_directory, args.db_name)) |
| 21 data_manager_entry.append(dict(value=db.lower(), | 223 |
| 22 name=db, | 224 data_manager_dict = {} |
| 23 path=target_directory) | 225 |
| 24 ) | 226 data_table_entry = { |
| 25 data_manager_json = dict(data_tables=dict(ariba_databases=data_manager_entry)) | 227 "value": args.db_name, |
| 26 file(args.output, 'w').write(json.dumps(data_manager_json)) | 228 "name": name, |
| 27 | 229 "path": os.path.join(target_directory, args.db_name) |
| 28 | 230 } |
| 29 if __name__ == '__main__': | 231 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) |
| 30 parser = argparse.ArgumentParser(description='Create data manager json.') | 232 open(args.params, 'wb').write(json.dumps(data_manager_dict)) |
| 31 parser.add_argument('--out', dest='output', action='store', help='JSON filename') | 233 |
| 32 args = parser.parse_args() | 234 if __name__ == "__main__": |
| 33 main(args) | 235 main() |
| 236 |
