Mercurial > repos > thanhlv > data_manager_build_ariba_database
view data_manager/ariba_database_builder.py @ 5:78bf08fa1e75 draft
planemo upload for repository https://github.com/thanhleviet/galaxy-tools commit 98648bbb7796db528b5685d44a2a5b761c53c19b-dirty
author | thanhlv |
---|---|
date | Sun, 19 May 2019 08:12:57 -0400 |
parents | be46816110b4 |
children | 218301fbbb99 |
line wrap: on
line source
import sys import subprocess import shlex import shutil import argparse import json import os import shutil import errno data_table_name = "ariba_databases" mlst_dict = { "achs" : "Achromobacter spp.", "acib1" : "Acinetobacter baumannii#1", "acib2" : "Acinetobacter baumannii#2", "aers" : "Aeromonas spp.", "anap" : "Anaplasma phagocytophilum", "arcs" : "Arcobacter spp.", "aspf" : "Aspergillus fumigatus", "bacc" : "Bacillus cereus", "bacl" : "Bacillus licheniformis", "bacs" : "Bacillus subtilis", "barb" : "Bartonella bacilliformis", "barh" : "Bartonella henselae", "bors" : "Bordetella spp.", "borr" : "Borrelia spp.", "brah" : "Brachyspira hampsonii", "brach" : "Brachyspira hyodysenteriae", "brai" : "Brachyspira intermedia", "brap" : "Brachyspira pilosicoli", "bras" : "Brachyspira spp.", "brus" : "Brucella spp.", "bucc" : "Burkholderia cepacia complex", "burp" : "Burkholderia pseudomallei", "camc" : "Campylobacter concisus/curvus", "camf" : "Campylobacter fetus", "camh" : "Campylobacter helveticus", "rlis" : "Campylobacter hyointestinalis", "cami" : "Campylobacter insulaenigrae", "camj" : "Campylobacter jejuni", "caml" : "Campylobacter lanienae", "rari" : "Campylobacter lari", "cams" : "Campylobacter sputorum", "camu" : "Campylobacter upsaliensis", "cana" : "Candida albicans", "cang" : "Candida glabrata", "cank" : "Candida krusei", "cant" : "Candida tropicalis", "cals" : "Candidatus Liberibacter solanacearum", "carm" : "Carnobacterium maltaromaticum", "chls" : "Chlamydiales spp.", "citf" : "Citrobacter freundii", "clos" : "Clonorchis sinensis", "clob" : "Clostridium botulinum", "clod" : "Clostridium difficile", "mcum" : "Clostridium septicum", "cord" : "Corynebacterium diphtheriae", "cros" : "Cronobacter spp.", "dicn" : "Dichelobacter nodosus", "edws" : "Edwardsiella spp.", "entc" : "Enterobacter cloacae", "entf" : "Enterococcus faecalis", "sium" : "Enterococcus faecium", "escc1" : "Escherichia coli#1", "escc2" : "Escherichia coli#2", "flap" : "Flavobacterium psychrophilum", "gala" : "Gallibacterium anatis", "haei" : "Haemophilus influenzae", "haep" : "Haemophilus parasuis", "helc" : "Helicobacter cinaedi", "help" : "Helicobacter pylori", "hels" : "Helicobacter suis", "kink" : "Kingella kingae", "klea" : "Klebsiella aerogenes", "kleo" : "Klebsiella oxytoca", "klep" : "Klebsiella pneumoniae", "kuds" : "Kudoa septempunctata", "lacs" : "Lactobacillus salivarius", "leps" : "Leptospira spp.", "leps2" : "Leptospira spp.#2", "leps3" : "Leptospira spp.#3", "lism" : "Listeria monocytogenes", "macc" : "Macrococcus canis", "scus" : "Macrococcus caseolyticus", "manh" : "Mannheimia haemolytica", "melp" : "Melissococcus plutonius", "morc" : "Moraxella catarrhalis", "mycs" : "Mycobacteria spp.", "myca" : "Mycobacterium abscessus", "mycm" : "Mycobacterium massiliense", "mycoa" : "Mycoplasma agalactiae", "mycb" : "Mycoplasma bovis", "mych" : "Mycoplasma hyopneumoniae", "anis" : "Mycoplasma hyorhinis", "myci" : "Mycoplasma iowae", "mycp" : "Mycoplasma pneumoniae", "mycos" : "Mycoplasma synoviae", "neis" : "Neisseria spp.", "orit" : "Orientia tsutsugamushi", "ornr" : "Ornithobacterium rhinotracheale", "pael" : "Paenibacillus larvae", "pasm1" : "Pasteurella multocida#1", "pasm2" : "Pasteurella multocida#2", "pedp" : "Pediococcus pentosaceus", "phod" : "Photobacterium damselae", "piss" : "Piscirickettsia salmonis", "porg" : "Porphyromonas gingivalis", "proa" : "Propionibacterium acnes", "psea" : "Pseudomonas aeruginosa", "psef" : "Pseudomonas fluorescens", "psep" : "Pseudomonas putida", "rhos" : "Rhodococcus spp.", "riea" : "Riemerella anatipestifer", "sale" : "Salmonella enterica", "sapp" : "Saprolegnia parasitica", "sins" : "Sinorhizobium spp.", "staa" : "Staphylococcus aureus", "stae" : "Staphylococcus epidermidis", "stah" : "Staphylococcus haemolyticus", "snis" : "Staphylococcus hominis", "stal" : "Staphylococcus lugdunensis", "stap" : "Staphylococcus pseudintermedius", "stem" : "Stenotrophomonas maltophilia", "stra" : "Streptococcus agalactiae", "sbcx" : "Streptococcus bovis/equinus complex (SBSEC)", "strc" : "Streptococcus canis", "stde" : "Streptococcus dysgalactiae equisimilis", "strg" : "Streptococcus gallolyticus", "stro" : "Streptococcus oralis", "strp" : "Streptococcus pneumoniae", "snes" : "Streptococcus pyogenes", "strs" : "Streptococcus suis", "strt" : "Streptococcus thermophilus", "strt2" : "Streptococcus thermophilus#2", "stru" : "Streptococcus uberis", "strz" : "Streptococcus zooepidemicus", "sspp" : "Streptomyces spp", "tays" : "Taylorella spp.", "tens" : "Tenacibaculum spp.", "trep" : "Treponema pallidum", "triv" : "Trichomonas vaginalis", "ures" : "Ureaplasma spp.", "vibc" : "Vibrio cholerae", "vibc2" : "Vibrio cholerae#2", "vibp" : "Vibrio parahaemolyticus", "vibs" : "Vibrio spp.", "vibt" : "Vibrio tapetis", "vibv" : "Vibrio vulnificus", "wolb" : "Wolbachia", "xylf" : "Xylella fastidiosa", "yerp" : "Yersinia pseudotuberculosis", "yerr" : "Yersinia ruckeri", "yers" : "Yersinia spp." } genes_dict = { "card" : "CARD", "resfinder" : "Resfinder", "plasmidfinder" : "Plasmidfinder", "megares" : "Megares", "argannot" : "Argannot", "vfdb_core" : "vfdb_core", "vfdb_full" : "vfdb_full", "virulencefinder" : "virulencefinder" } def run_ariba(cmd): _cmd = shlex.split(cmd) subprocess.check_call(_cmd) def build_mlst(mlst_db): mlst_species = mlst_dict[mlst_db] run_ariba("ariba pubmlstget '{}' out".format(mlst_species)) output_path = os.getcwd() old = "{}/out/ref_db".format(output_path, mlst_db) new = "{}/{}".format(output_path, mlst_db) shutil.move(old, new) def build_curated_db(gen_db): run_ariba("ariba getref {} out".format(gen_db)) run_ariba("ariba prepareref -f out.fa -m out.tsv {}".format(gen_db)) def build_custom_db(fasta, coding, db_name): run_ariba("ariba prepareref --all_coding {} -f {} {}".format(coding, fasta, db_name)) def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) return data_manager_dict def main(): parser = argparse.ArgumentParser() parser.add_argument('params') parser.add_argument( '-t', '--dbtype', dest='database_type', help='database type' ) parser.add_argument( '-d', '--db', dest='db_name', help='database name' ) parser.add_argument( '-c', '--coding', dest='coding', help='' ) parser.add_argument( '-f', '--fasta', dest='fasta', help='' ) args = parser.parse_args() if args.database_type == "curated": build_curated_db(args.db_name) name = genes_dict[args.db_name] elif args.database_type == "mlst": build_mlst(args.db_name) name = mlst_dict[args.db_name] elif args.database_type == "fasta": build_custom_db(args.fasta, args.coding, args.db_name) name = args.db_name params = json.loads(open(args.params).read()) target_directory = params['output_data'][0]['extra_files_path'] if not os.path.isdir(target_directory): os.mkdir(target_directory) output_path = os.getcwd() shutil.copytree(os.path.join(output_path, args.db_name), os.path.join(target_directory, args.db_name)) data_manager_dict = {} data_table_entry = { "value": args.db_name, "name": name, "path": os.path.join(target_directory, args.db_name) } _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) open(args.params, 'wb').write(json.dumps(data_manager_dict)) if __name__ == "__main__": main()