changeset 0:6ee125deee97 draft

Uploaded
author estrain
date Tue, 01 Mar 2022 03:11:12 +0000
parents
children cc677cf77613
files data_manager_amrfinderplus_database_builder/README data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.py data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.xml data_manager_amrfinderplus_database_builder/data_manager_conf.xml data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.sample data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.test
diffstat 6 files changed, 135 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus_database_builder/README	Tue Mar 01 03:11:12 2022 +0000
@@ -0,0 +1,1 @@
+AMRFinderPlus Data Manager
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.py	Tue Mar 01 03:11:12 2022 +0000
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# Errol Strain, estrain@gmail.com
+# Database downloads for NCBI AMRFinderPlus
+
+import sys
+import os
+import tempfile
+import shutil
+import json
+import re
+from ftplib import FTP
+
+
+def download_from_ncbi( ):
+    NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
+    FILENAME = 'version.txt' 
+    NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/'
+    
+    email = 'anonymous@example.com'
+
+    ftp = FTP( NCBI_FTP_SERVER )
+    ftp.login( 'anonymous', email)
+    ftp.cwd(NCBI_DOWNLOAD_PATH)
+    
+    #exclude the allele counts folder
+    files = ftp.nlst()
+    files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
+
+    for f in files:
+      ftp.retrbinary("RETR " + f, open(f, 'wb').write)
+
+    ftp.quit()
+
+    #find species specific point mutation files
+    pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files)
+    pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts)
+
+    # Make blast databases
+    blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null"
+    os.system(blastcmd)
+    blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null"
+    os.system(blastcmd)
+
+    for f in pointmuts:
+      blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null"
+      os.system(blastcmd)
+
+    # Make HMM indexes
+    hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null"
+    os.system(hmmcmd)
+ 
+    # Read in version
+    with open("version.txt") as f:
+      version = f.readline().rstrip()
+
+    return version 
+
+def print_json (version):
+    data_tables = {'data_tables':{'amrfinder_databases':{}}}
+    data_tables["data_tables"]["amrfinder_databases"]["value"]="ARMFinderPlus_" + version
+    data_tables["data_tables"]["amrfinder_databases"]["name"]=version
+    data_tables["data_tables"]["amrfinder_databases"]["path"]="/tool/tool-data/amrfinder/" + version
+
+    with open("out_file", 'w') as out:
+      out.write(json.dumps(data_tables, sort_keys=True, indent=2))
+
+def main():
+     
+    os.mkdir("output")
+    os.chdir("output") 
+    #Fetch the files and build blast databases
+    version=download_from_ncbi()    
+    os.chdir("..")
+    print_json(version)
+
+if __name__ == "__main__": main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus_database_builder/data_manager/data_manager_amrfinderplus_database_builder.xml	Tue Mar 01 03:11:12 2022 +0000
@@ -0,0 +1,23 @@
+<tool id="amrfinderplus_database_builder" name="AMRFinderPlus" tool_type="manage_data" version="0.0.1" profile="16.01">
+    <description> Database builder</description>
+    <requirements>
+        <requirement type="package">blast</requirement>
+        <requirement type="package">hmmer</requirement>
+    </requirements>
+    <command><![CDATA[
+        python '$__tool_directory__/data_manager_amrfinderplus_database_builder.py' 
+    ]]></command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+    </help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus_database_builder/data_manager_conf.xml	Tue Mar 01 03:11:12 2022 +0000
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_amrfinderplus_database_builder.xml" id="armfinderplus_database_builder">
+        <data_table name="amrfinder_databases">
+            <output>
+                <column name="value" />
+                <column name="name" />
+                <column name="path" output_ref="out_file" >
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${name}">/tool/tool-data/amrfinder_databases/${name}</target>
+                    </move>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.sample	Tue Mar 01 03:11:12 2022 +0000
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Locations of all fasta files required to build Diamond databases -->
+    <table name="diamond_database" comment_char="#">
+        <columns>value, name, db_path</columns>
+        <file path="tool-data/diamond_database.loc" />
+    </table>
+    <!-- Locations of taxonomy data downloaded from NCBI -->
+    <table name="ncbi_accession2taxid" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ncbi_accession2taxid.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus_database_builder/tool_data_table_conf_sample.xml.test	Tue Mar 01 03:11:12 2022 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of fasta files to build amrfinderplus databases -->
+    <table name="amrfinderplus_databases" comment_char="#">
+        <columns>value, name, db_path</columns>
+        <file path="${__HERE__}/test-data/amrfinderplus_database.loc" />
+    </table>
+</tables>