# HG changeset patch # User estrain # Date 1647604383 0 # Node ID 804fd4d644854a347300bf76342ba88ed112dd34 Uploaded diff -r 000000000000 -r 804fd4d64485 data_manager_amrfinderplus/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/README Fri Mar 18 11:53:03 2022 +0000 @@ -0,0 +1,1 @@ +AMRFinderPlus Data Manager diff -r 000000000000 -r 804fd4d64485 data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py Fri Mar 18 11:53:03 2022 +0000 @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# Errol Strain, estrain@gmail.com +# Database downloads for NCBI AMRFinderPlus + +import sys +import os +import tempfile +import shutil +import json +import re +import argparse +from ftplib import FTP + + +def download_from_ncbi(): + NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov' + FILENAME = 'version.txt' + NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/' + + email = 'anonymous@example.com' + + ftp = FTP( NCBI_FTP_SERVER ) + ftp.login( 'anonymous', email) + ftp.cwd(NCBI_DOWNLOAD_PATH) + + #exclude the allele counts folder + files = ftp.nlst() + files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) + + for f in files: + ftp.retrbinary("RETR " + f, open(f, 'wb').write) + + files = ftp.nlst() + files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) + pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files) + pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts) + + + # Make blast databases + blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null" + os.system(blastcmd) + blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null" + os.system(blastcmd) + + for f in pointmuts: + blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null" + os.system(blastcmd) + + # Make HMM indexes + hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null" + os.system(hmmcmd) + + # Read in version + with open("version.txt") as f: + version = f.readline().rstrip() + + ftp.quit() + + return version + +def print_json (version,argskey,argspath,argsname,argsvalue,argsout): + + valueentry="AMRFinderPlus_"+version + + data_table_entry = { + 'data_tables' : { + 'amrfinder_databases': [ + { + "dbkey":argskey, + "value":argsvalue, + "name":argsname, + "path":argspath, + } + ] + } + } + + with open(argsout, 'w') as fh: + json.dump(data_table_entry,fh, sort_keys=True) + +def main(): + + parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases') + parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name') + parser.add_argument('--path', type=str, required=True, nargs=1, help='Absolute path to new database folder') + parser.add_argument('--value', type=str, required=True, nargs=1, help='Database value') + parser.add_argument('--key', type=str, required=True, nargs=1, help='Database value') + parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') + + args = parser.parse_args() + + os.mkdir("output") + os.chdir("output") + #Fetch the files and build blast databases + version=download_from_ncbi() + os.chdir("..") + + shutil.copytree("output",args.path[0]) + print_json(version,args.key[0],args.path[0],args.name[0],args.value[0],args.out[0]) + +if __name__ == "__main__": main() diff -r 000000000000 -r 804fd4d64485 data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml Fri Mar 18 11:53:03 2022 +0000 @@ -0,0 +1,29 @@ + + Database builder + + blast + hmmer + + + + + + + + + + + + + + + + + + + Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number. + + + diff -r 000000000000 -r 804fd4d64485 data_manager_amrfinderplus/data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/data_manager_conf.xml Fri Mar 18 11:53:03 2022 +0000 @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff -r 000000000000 -r 804fd4d64485 data_manager_amrfinderplus/tool_data_table_conf_sample.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/tool_data_table_conf_sample.xml.sample Fri Mar 18 11:53:03 2022 +0000 @@ -0,0 +1,12 @@ + + + + value, name, db_path + +
+ + + value, name, path + +
+
diff -r 000000000000 -r 804fd4d64485 data_manager_amrfinderplus/tool_data_table_conf_sample.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/tool_data_table_conf_sample.xml.test Fri Mar 18 11:53:03 2022 +0000 @@ -0,0 +1,7 @@ + + + + value, name, db_path + +
+