Mercurial > repos > estrain > data_manager_amrfinderplus_database_builder
changeset 0:6ee125deee97 draft
Uploaded
author | estrain |
---|---|
date | Tue, 01 Mar 2022 03:11:12 +0000 |
parents | |
children | cc677cf77613 |
files | data_manager_amrfinderplus_database_builder/README data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.py data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.xml data_manager_amrfinderplus_database_builder/data_manager_conf.xml data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.sample data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.test |
diffstat | 6 files changed, 135 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus_database_builder/README Tue Mar 01 03:11:12 2022 +0000 @@ -0,0 +1,1 @@ +AMRFinderPlus Data Manager
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.py Tue Mar 01 03:11:12 2022 +0000 @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# Errol Strain, estrain@gmail.com +# Database downloads for NCBI AMRFinderPlus + +import sys +import os +import tempfile +import shutil +import json +import re +from ftplib import FTP + + +def download_from_ncbi( ): + NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov' + FILENAME = 'version.txt' + NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/' + + email = 'anonymous@example.com' + + ftp = FTP( NCBI_FTP_SERVER ) + ftp.login( 'anonymous', email) + ftp.cwd(NCBI_DOWNLOAD_PATH) + + #exclude the allele counts folder + files = ftp.nlst() + files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) + + for f in files: + ftp.retrbinary("RETR " + f, open(f, 'wb').write) + + ftp.quit() + + #find species specific point mutation files + pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files) + pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts) + + # Make blast databases + blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null" + os.system(blastcmd) + blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null" + os.system(blastcmd) + + for f in pointmuts: + blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null" + os.system(blastcmd) + + # Make HMM indexes + hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null" + os.system(hmmcmd) + + # Read in version + with open("version.txt") as f: + version = f.readline().rstrip() + + return version + +def print_json (version): + data_tables = {'data_tables':{'amrfinder_databases':{}}} + data_tables["data_tables"]["amrfinder_databases"]["value"]="ARMFinderPlus_" + version + data_tables["data_tables"]["amrfinder_databases"]["name"]=version + data_tables["data_tables"]["amrfinder_databases"]["path"]="/tool/tool-data/amrfinder/" + version + + with open("out_file", 'w') as out: + out.write(json.dumps(data_tables, sort_keys=True, indent=2)) + +def main(): + + os.mkdir("output") + os.chdir("output") + #Fetch the files and build blast databases + version=download_from_ncbi() + os.chdir("..") + print_json(version) + +if __name__ == "__main__": main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.xml Tue Mar 01 03:11:12 2022 +0000 @@ -0,0 +1,23 @@ +<tool id="amrfinderplus_database_builder" name="AMRFinderPlus" tool_type="manage_data" version="0.0.1" profile="16.01"> + <description> Database builder</description> + <requirements> + <requirement type="package">blast</requirement> + <requirement type="package">hmmer</requirement> + </requirements> + <command><![CDATA[ + python '$__tool_directory__/data_manager_amrfinderplus_database_builder.py' + ]]></command> + <inputs> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json"/> + </outputs> + <tests> + <test> + </test> + </tests> + <help> + </help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus_database_builder/data_manager_conf.xml Tue Mar 01 03:11:12 2022 +0000 @@ -0,0 +1,16 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager_amrfinderplus_database_builder.xml" id="armfinderplus_database_builder"> + <data_table name="amrfinder_databases"> + <output> + <column name="value" /> + <column name="name" /> + <column name="path" output_ref="out_file" > + <move type="directory" relativize_symlinks="True"> + <target base="${name}">/tool/tool-data/amrfinder_databases/${name}</target> + </move> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.sample Tue Mar 01 03:11:12 2022 +0000 @@ -0,0 +1,12 @@ +<tables> + <!-- Locations of all fasta files required to build Diamond databases --> + <table name="diamond_database" comment_char="#"> + <columns>value, name, db_path</columns> + <file path="tool-data/diamond_database.loc" /> + </table> + <!-- Locations of taxonomy data downloaded from NCBI --> + <table name="ncbi_accession2taxid" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/ncbi_accession2taxid.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.test Tue Mar 01 03:11:12 2022 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of fasta files to build amrfinderplus databases --> + <table name="amrfinderplus_databases" comment_char="#"> + <columns>value, name, db_path</columns> + <file path="${__HERE__}/test-data/amrfinderplus_database.loc" /> + </table> +</tables>