Mercurial > repos > thanhlv > data_manager_build_ariba_database
changeset 4:be46816110b4 draft
planemo upload for repository https://github.com/thanhleviet/galaxy-tools commit 98648bbb7796db528b5685d44a2a5b761c53c19b-dirty
author | thanhlv |
---|---|
date | Sun, 19 May 2019 08:10:45 -0400 |
parents | b13f7ba85697 |
children | 78bf08fa1e75 |
files | data_manager/ariba_database_builder.py data_manager/ariba_database_builder.xml data_manager/macros.xml |
diffstat | 3 files changed, 390 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/ariba_database_builder.py Mon Jan 14 07:01:15 2019 -0500 +++ b/data_manager/ariba_database_builder.py Sun May 19 08:10:45 2019 -0400 @@ -1,33 +1,236 @@ -# Thanh Le Viet -# 7-Jan-2019 - +import sys +import subprocess +import shlex +import shutil import argparse import json import os import shutil +import errno -def main(args): - output_path = os.getcwd() - db_folder = [d for d in os.listdir(output_path) if os.path.isdir(d)] - params = json.loads(open(args.output).read()) - target_directory = params['output_data'][0]['extra_files_path'] - os.mkdir(target_directory) - data_manager_entry = [] - for db in db_folder: - print("Current: ".format(os.path.join(output_path, d))) - print("Target: {}".format(target_directory)) - shutil.move(os.path.join(output_path, d), os.path.join(target_directory, d)) - data_manager_entry.append(dict(value=db.lower(), - name=db, - path=target_directory) - ) - data_manager_json = dict(data_tables=dict(ariba_databases=data_manager_entry)) - file(args.output, 'w').write(json.dumps(data_manager_json)) +data_table_name = "ariba_databases" + +mlst_dict = { + "achs" : "Achromobacter spp.", + "acib1" : "Acinetobacter baumannii#1", + "acib2" : "Acinetobacter baumannii#2", + "aers" : "Aeromonas spp.", + "anap" : "Anaplasma phagocytophilum", + "arcs" : "Arcobacter spp.", + "aspf" : "Aspergillus fumigatus", + "bacc" : "Bacillus cereus", + "bacl" : "Bacillus licheniformis", + "bacs" : "Bacillus subtilis", + "barb" : "Bartonella bacilliformis", + "barh" : "Bartonella henselae", + "bors" : "Bordetella spp.", + "borr" : "Borrelia spp.", + "brah" : "Brachyspira hampsonii", + "brach" : "Brachyspira hyodysenteriae", + "brai" : "Brachyspira intermedia", + "brap" : "Brachyspira pilosicoli", + "bras" : "Brachyspira spp.", + "brus" : "Brucella spp.", + "bucc" : "Burkholderia cepacia complex", + "burp" : "Burkholderia pseudomallei", + "camc" : "Campylobacter concisus/curvus", + "camf" : "Campylobacter fetus", + "camh" : "Campylobacter helveticus", + "rlis" : "Campylobacter hyointestinalis", + "cami" : "Campylobacter insulaenigrae", + "camj" : "Campylobacter jejuni", + "caml" : "Campylobacter lanienae", + "rari" : "Campylobacter lari", + "cams" : "Campylobacter sputorum", + "camu" : "Campylobacter upsaliensis", + "cana" : "Candida albicans", + "cang" : "Candida glabrata", + "cank" : "Candida krusei", + "cant" : "Candida tropicalis", + "cals" : "Candidatus Liberibacter solanacearum", + "carm" : "Carnobacterium maltaromaticum", + "chls" : "Chlamydiales spp.", + "citf" : "Citrobacter freundii", + "clos" : "Clonorchis sinensis", + "clob" : "Clostridium botulinum", + "clod" : "Clostridium difficile", + "mcum" : "Clostridium septicum", + "cord" : "Corynebacterium diphtheriae", + "cros" : "Cronobacter spp.", + "dicn" : "Dichelobacter nodosus", + "edws" : "Edwardsiella spp.", + "entc" : "Enterobacter cloacae", + "entf" : "Enterococcus faecalis", + "sium" : "Enterococcus faecium", + "escc1" : "Escherichia coli#1", + "escc2" : "Escherichia coli#2", + "flap" : "Flavobacterium psychrophilum", + "gala" : "Gallibacterium anatis", + "haei" : "Haemophilus influenzae", + "haep" : "Haemophilus parasuis", + "helc" : "Helicobacter cinaedi", + "help" : "Helicobacter pylori", + "hels" : "Helicobacter suis", + "kink" : "Kingella kingae", + "klea" : "Klebsiella aerogenes", + "kleo" : "Klebsiella oxytoca", + "klep" : "Klebsiella pneumoniae", + "kuds" : "Kudoa septempunctata", + "lacs" : "Lactobacillus salivarius", + "leps" : "Leptospira spp.", + "leps2" : "Leptospira spp.#2", + "leps3" : "Leptospira spp.#3", + "lism" : "Listeria monocytogenes", + "macc" : "Macrococcus canis", + "scus" : "Macrococcus caseolyticus", + "manh" : "Mannheimia haemolytica", + "melp" : "Melissococcus plutonius", + "morc" : "Moraxella catarrhalis", + "mycs" : "Mycobacteria spp.", + "myca" : "Mycobacterium abscessus", + "mycm" : "Mycobacterium massiliense", + "mycoa" : "Mycoplasma agalactiae", + "mycb" : "Mycoplasma bovis", + "mych" : "Mycoplasma hyopneumoniae", + "anis" : "Mycoplasma hyorhinis", + "myci" : "Mycoplasma iowae", + "mycp" : "Mycoplasma pneumoniae", + "mycos" : "Mycoplasma synoviae", + "neis" : "Neisseria spp.", + "orit" : "Orientia tsutsugamushi", + "ornr" : "Ornithobacterium rhinotracheale", + "pael" : "Paenibacillus larvae", + "pasm1" : "Pasteurella multocida#1", + "pasm2" : "Pasteurella multocida#2", + "pedp" : "Pediococcus pentosaceus", + "phod" : "Photobacterium damselae", + "piss" : "Piscirickettsia salmonis", + "porg" : "Porphyromonas gingivalis", + "proa" : "Propionibacterium acnes", + "psea" : "Pseudomonas aeruginosa", + "psef" : "Pseudomonas fluorescens", + "psep" : "Pseudomonas putida", + "rhos" : "Rhodococcus spp.", + "riea" : "Riemerella anatipestifer", + "sale" : "Salmonella enterica", + "sapp" : "Saprolegnia parasitica", + "sins" : "Sinorhizobium spp.", + "staa" : "Staphylococcus aureus", + "stae" : "Staphylococcus epidermidis", + "stah" : "Staphylococcus haemolyticus", + "snis" : "Staphylococcus hominis", + "stal" : "Staphylococcus lugdunensis", + "stap" : "Staphylococcus pseudintermedius", + "stem" : "Stenotrophomonas maltophilia", + "stra" : "Streptococcus agalactiae", + "sbcx" : "Streptococcus bovis/equinus complex (SBSEC)", + "strc" : "Streptococcus canis", + "stde" : "Streptococcus dysgalactiae equisimilis", + "strg" : "Streptococcus gallolyticus", + "stro" : "Streptococcus oralis", + "strp" : "Streptococcus pneumoniae", + "snes" : "Streptococcus pyogenes", + "strs" : "Streptococcus suis", + "strt" : "Streptococcus thermophilus", + "strt2" : "Streptococcus thermophilus#2", + "stru" : "Streptococcus uberis", + "strz" : "Streptococcus zooepidemicus", + "sspp" : "Streptomyces spp", + "tays" : "Taylorella spp.", + "tens" : "Tenacibaculum spp.", + "trep" : "Treponema pallidum", + "triv" : "Trichomonas vaginalis", + "ures" : "Ureaplasma spp.", + "vibc" : "Vibrio cholerae", + "vibc2" : "Vibrio cholerae#2", + "vibp" : "Vibrio parahaemolyticus", + "vibs" : "Vibrio spp.", + "vibt" : "Vibrio tapetis", + "vibv" : "Vibrio vulnificus", + "wolb" : "Wolbachia", + "xylf" : "Xylella fastidiosa", + "yerp" : "Yersinia pseudotuberculosis", + "yerr" : "Yersinia ruckeri", + "yers" : "Yersinia spp." +} -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Create data manager json.') - parser.add_argument('--out', dest='output', action='store', help='JSON filename') +genes_dict = { + "card" : "CARD", + "resfinder" : "Resfinder", + "plasmidfinder" : "Plasmidfinder", + "megares" : "Megares", + "argannot" : "Argannot", + "vfdb_core" : "vfdb_core", + "vfdb_full" : "vfdb_full", + "virulencefinder" : "virulencefinder" +} + +def run_ariba(cmd): + _cmd = shlex.split(cmd) + subprocess.check_call(_cmd) + +def build_mlst(mlst_db): + mlst_species = mlst_dict[mlst_db] + run_ariba("ariba pubmlstget '{}' out".format(mlst_species)) + output_path = os.getcwd() + old = "{}/out/ref_db".format(output_path, mlst_db) + new = "{}/{}".format(output_path, mlst_db) + shutil.move(old, new) + +def build_curated_db(gen_db): + run_ariba("ariba getref {} out".format(gen_db)) + run_ariba("ariba prepareref -f out.fa -m out.tsv {}".format(gen_db)) + +def build_custom_db(fasta, coding, db_name): + run_ariba("ariba prepareref --all_coding {} -f {} {}".format(coding, fasta, db_name)) + +def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): + data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) + data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) + data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) + return data_manager_dict + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('params') + parser.add_argument( '-t', '--dbtype', dest='database_type', help='database type' ) + parser.add_argument( '-d', '--db', dest='db_name', help='database name' ) + parser.add_argument( '-c', '--coding', dest='coding', help='' ) + parser.add_argument( '-f', '--fasta', dest='fasta', help='' ) args = parser.parse_args() - main(args) \ No newline at end of file + + if args.database_type == "curated": + build_curated_db(args.db_name) + name = genes_dict[args.db_name] + elif args.database_type == "mlst": + build_mlst(args.db_name) + name = mlst_dict[args.db_name] + elif args.database_type == "fasta": + build_custom_db(args.fasta, args.coding, args.db_name) + name = args.db_name + + params = json.loads(open(args.params).read()) + + target_directory = params['output_data'][0]['extra_files_path'] + + if not os.path.isdir(target_directory): + os.mkdir(target_directory) + + output_path = os.getcwd() + shutil.copytree(os.path.join(output_path, args.db_name), os.path.join(target_directory, args.db_name)) + + data_manager_dict = {} + + data_table_entry = { + "value": args.db_name, + "name": name, + "path": os.path.join(target_directory, args.db_name) + } + _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) + open(args.params, 'wb').write(json.dumps(data_manager_dict)) + +if __name__ == "__main__": + main() + \ No newline at end of file
--- a/data_manager/ariba_database_builder.xml Mon Jan 14 07:01:15 2019 -0500 +++ b/data_manager/ariba_database_builder.xml Sun May 19 08:10:45 2019 -0400 @@ -1,20 +1,22 @@ <?xml version="1.0"?> <tool id="ariba_database_builder" name="ARIBA build database" tool_type="manage_data" version="@VERSION@"> - <description>Download and prepare database for use with ARIBA</description> - <macros> - <import>macros.xml</import> - </macros> + <description>database builder</description> <expand macro="requirements" /> <version_command>ariba version | head head -n 1</version_command> <command detect_errors="exit_code"> <![CDATA[ #if str($library.lib_type) == "curated" - #set $library_list = str($library.curated).split(',') - #for $library_ in $library_list: - #if str($library_) != 'None': - ariba getref '$library_' _tmp_db && ariba prepareref -f _tmp_db.fa -m _tmp_db.tsv '$library_' && - #end if - #end for + #set $library_ = str($library.curated) + #if str($library_) != 'None': + ariba getref '$library_' _tmp_db && ariba prepareref -f _tmp_db.fa -m _tmp_db.tsv '$library_' && + #end if + #end if + + #if str($library.lib_type) == "mlst" + #set $library_ = str($library.curated) + #if str($library_) != 'None': + python '$__tool_directory__/mlst.py' '$library_' && + #end if #end if #if str($library.lib_type) == "fasta" @@ -23,17 +25,18 @@ #end if #end if - python '$__tool_directory__/ariba_database_builder.py' --out '${out_file}' + python '$__tool_directory__/ariba_data_base_builder.py' --out '${out_file}' ]]> </command> <inputs> <conditional name="library"> <param name="lib_type" type="select" label="Input reads type or collection" help="Select a curated database or a fasta from the history"> <option value="curated" selected="true">Curated Database</option> + <option value="mlst" selected="true">MLST</option> <option value="fasta">A fasta file</option> </param> <when value="curated"> - <param name="curated" type="select" multiple="true" label="Select partial library to download"> + <param name="curated" type="select" multiple="false" label="Select partial library to download"> <option value="card">CARD</option> <option value="resfinder">Resfinder</option> <option value="plasmidfinder">Plasmidfinder</option> @@ -43,9 +46,154 @@ <option value="vfdb_full">vfdb_full</option> <option value="virulencefinder">virulencefinder</option> </param> + <when value="mlst"> + <param name="mlst" type="select" multiple="false" label="Select a species"> + <option value="achs">Achromobacter spp.</option> + <option value="acib1">Acinetobacter baumannii#1</option> + <option value="acib2">Acinetobacter baumannii#2</option> + <option value="aers">Aeromonas spp.</option> + <option value="anap">Anaplasma phagocytophilum</option> + <option value="arcs">Arcobacter spp.</option> + <option value="aspf">Aspergillus fumigatus</option> + <option value="bacc">Bacillus cereus</option> + <option value="bacl">Bacillus licheniformis</option> + <option value="bacs">Bacillus subtilis</option> + <option value="barb">Bartonella bacilliformis</option> + <option value="barh">Bartonella henselae</option> + <option value="bors">Bordetella spp.</option> + <option value="borr">Borrelia spp.</option> + <option value="brah">Brachyspira hampsonii</option> + <option value="brach">Brachyspira hyodysenteriae</option> + <option value="brai">Brachyspira intermedia</option> + <option value="brap">Brachyspira pilosicoli</option> + <option value="bras">Brachyspira spp.</option> + <option value="brus">Brucella spp.</option> + <option value="bucc">Burkholderia cepacia complex</option> + <option value="burp">Burkholderia pseudomallei</option> + <option value="camc">Campylobacter concisus/curvus</option> + <option value="camf">Campylobacter fetus</option> + <option value="camh">Campylobacter helveticus</option> + <option value="rlis">Campylobacter hyointestinalis</option> + <option value="cami">Campylobacter insulaenigrae</option> + <option value="camj">Campylobacter jejuni</option> + <option value="caml">Campylobacter lanienae</option> + <option value="rari">Campylobacter lari</option> + <option value="cams">Campylobacter sputorum</option> + <option value="camu">Campylobacter upsaliensis</option> + <option value="cana">Candida albicans</option> + <option value="cang">Candida glabrata</option> + <option value="cank">Candida krusei</option> + <option value="cant">Candida tropicalis</option> + <option value="cals">Candidatus Liberibacter solanacearum</option> + <option value="carm">Carnobacterium maltaromaticum</option> + <option value="chls">Chlamydiales spp.</option> + <option value="citf">Citrobacter freundii</option> + <option value="clos">Clonorchis sinensis</option> + <option value="clob">Clostridium botulinum</option> + <option value="clod">Clostridium difficile</option> + <option value="mcum">Clostridium septicum</option> + <option value="cord">Corynebacterium diphtheriae</option> + <option value="cros">Cronobacter spp.</option> + <option value="dicn">Dichelobacter nodosus</option> + <option value="edws">Edwardsiella spp.</option> + <option value="entc">Enterobacter cloacae</option> + <option value="entf">Enterococcus faecalis</option> + <option value="sium">Enterococcus faecium</option> + <option value="escc1">Escherichia coli#1</option> + <option value="escc2">Escherichia coli#2</option> + <option value="flap">Flavobacterium psychrophilum</option> + <option value="gala">Gallibacterium anatis</option> + <option value="haei">Haemophilus influenzae</option> + <option value="haep">Haemophilus parasuis</option> + <option value="helc">Helicobacter cinaedi</option> + <option value="help">Helicobacter pylori</option> + <option value="hels">Helicobacter suis</option> + <option value="kink">Kingella kingae</option> + <option value="klea">Klebsiella aerogenes</option> + <option value="kleo">Klebsiella oxytoca</option> + <option value="klep">Klebsiella pneumoniae</option> + <option value="kuds">Kudoa septempunctata</option> + <option value="lacs">Lactobacillus salivarius</option> + <option value="leps">Leptospira spp.</option> + <option value="leps2">Leptospira spp.#2</option> + <option value="leps3">Leptospira spp.#3</option> + <option value="lism">Listeria monocytogenes</option> + <option value="macc">Macrococcus canis</option> + <option value="scus">Macrococcus caseolyticus</option> + <option value="manh">Mannheimia haemolytica</option> + <option value="melp">Melissococcus plutonius</option> + <option value="morc">Moraxella catarrhalis</option> + <option value="mycs">Mycobacteria spp.</option> + <option value="myca">Mycobacterium abscessus</option> + <option value="mycm">Mycobacterium massiliense</option> + <option value="mycoa">Mycoplasma agalactiae</option> + <option value="mycb">Mycoplasma bovis</option> + <option value="mych">Mycoplasma hyopneumoniae</option> + <option value="anis">Mycoplasma hyorhinis</option> + <option value="myci">Mycoplasma iowae</option> + <option value="mycp">Mycoplasma pneumoniae</option> + <option value="mycos">Mycoplasma synoviae</option> + <option value="neis">Neisseria spp.</option> + <option value="orit">Orientia tsutsugamushi</option> + <option value="ornr">Ornithobacterium rhinotracheale</option> + <option value="pael">Paenibacillus larvae</option> + <option value="pasm1">Pasteurella multocida#1</option> + <option value="pasm2">Pasteurella multocida#2</option> + <option value="pedp">Pediococcus pentosaceus</option> + <option value="phod">Photobacterium damselae</option> + <option value="piss">Piscirickettsia salmonis</option> + <option value="porg">Porphyromonas gingivalis</option> + <option value="proa">Propionibacterium acnes</option> + <option value="psea">Pseudomonas aeruginosa</option> + <option value="psef">Pseudomonas fluorescens</option> + <option value="psep">Pseudomonas putida</option> + <option value="rhos">Rhodococcus spp.</option> + <option value="riea">Riemerella anatipestifer</option> + <option value="sale">Salmonella enterica</option> + <option value="sapp">Saprolegnia parasitica</option> + <option value="sins">Sinorhizobium spp.</option> + <option value="staa">Staphylococcus aureus</option> + <option value="stae">Staphylococcus epidermidis</option> + <option value="stah">Staphylococcus haemolyticus</option> + <option value="snis">Staphylococcus hominis</option> + <option value="stal">Staphylococcus lugdunensis</option> + <option value="stap">Staphylococcus pseudintermedius</option> + <option value="stem">Stenotrophomonas maltophilia</option> + <option value="stra">Streptococcus agalactiae</option> + <option value="sbcx">Streptococcus bovis/equinus complex (SBSEC)</option> + <option value="strc">Streptococcus canis</option> + <option value="stde">Streptococcus dysgalactiae equisimilis</option> + <option value="strg">Streptococcus gallolyticus</option> + <option value="stro">Streptococcus oralis</option> + <option value="strp">Streptococcus pneumoniae</option> + <option value="snes">Streptococcus pyogenes</option> + <option value="strs">Streptococcus suis</option> + <option value="strt">Streptococcus thermophilus</option> + <option value="strt2">Streptococcus thermophilus#2</option> + <option value="stru">Streptococcus uberis</option> + <option value="strz">Streptococcus zooepidemicus</option> + <option value="sspp">Streptomyces spp</option> + <option value="tays">Taylorella spp.</option> + <option value="tens">Tenacibaculum spp.</option> + <option value="trep">Treponema pallidum</option> + <option value="triv">Trichomonas vaginalis</option> + <option value="ures">Ureaplasma spp.</option> + <option value="vibc">Vibrio cholerae</option> + <option value="vibc2">Vibrio cholerae#2</option> + <option value="vibp">Vibrio parahaemolyticus</option> + <option value="vibs">Vibrio spp.</option> + <option value="vibt">Vibrio tapetis</option> + <option value="vibv">Vibrio vulnificus</option> + <option value="wolb">Wolbachia</option> + <option value="xylf">Xylella fastidiosa</option> + <option value="yerp">Yersinia pseudotuberculosis</option> + <option value="yerr">Yersinia ruckeri</option> + <option value="yers">Yersinia spp.</option> + </param> + </when> </when> <when value="fasta"> - <param name="fasta" type="data" format="fasta" optional="false" /> + <param name="fasta" type="data" format="fasta,fasta.gz" optional="false" /> <param name="db_name" type="text" label="DB name" help = "The DB name should include no space and special characters" /> <param name="coding" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Coding sequence?" /> </when>
--- a/data_manager/macros.xml Mon Jan 14 07:01:15 2019 -0500 +++ b/data_manager/macros.xml Sun May 19 08:10:45 2019 -0400 @@ -1,8 +1,8 @@ <macros> - <token name="@VERSION@">0.1.0</token> + <token name="@VERSION@">2.13.2</token> <xml name="requirements"> <requirements> - <requirement type="package" version="2.13.2">ariba</requirement> + <requirement type="package" version="@VERSION@">ariba</requirement> </requirements> </xml>