# HG changeset patch
# User estrain
# Date 1653850875 0
# Node ID 5dda51264a2d966ca79dc6efa173f3d25dbcc4dd
Uploaded
diff -r 000000000000 -r 5dda51264a2d data_manager_fastani/data_manager/data_manager_fastani.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.py Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# Errol Strain, estrain@gmail.com
+# Database downloads for FastANI
+
+import sys
+import os
+import tempfile
+import json
+import re
+import argparse
+import requests
+
+
+def download_D1(output_directory):
+
+ #FastANI databases from Kostas Lab
+ url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"
+
+ cwd = os.getcwd()
+ os.chdir(output_directory)
+
+ filename = url.split("/")[-1]
+ with open(filename, "wb") as f:
+ r = requests.get(url)
+ f.write(r.content)
+
+ tarcmd="tar xvzf D1.tar.gz"
+ os.system(tarcmd)
+
+ os.chdir(cwd)
+
+ return output_directory + "/D1"
+
+def download_VL(output_directory):
+
+ cwd = os.getcwd()
+ os.chdir(output_directory)
+
+ #FastANI uses folder names in output. Creating user friendly names
+ #for fish pathogens
+ accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520',
+ 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15',
+ 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1',
+ 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1',
+ 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ',
+ 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632',
+ 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04',
+ 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245',
+ 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'}
+
+ for acc in accdict:
+ ecmd="mkdir "+ accdict[acc]
+ os.system(ecmd)
+ ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + "/" + acc + ".fasta"
+ os.system(ecmd)
+
+
+ os.chdir(cwd)
+ return output_directory
+
+
+def print_json (version,argspath,argsname,argsout):
+
+ data_table_entry = {
+ 'data_tables' : {
+ 'fastani': [
+ {
+ "value":version,
+ "name":argsname,
+ "path":argspath,
+ }
+ ]
+ }
+ }
+
+ with open(argsout, 'w') as fh:
+ json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+
+def main():
+
+ parser = argparse.ArgumentParser(description='Download FastANI Databases')
+ parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
+ parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description')
+ parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+
+ args = parser.parse_args()
+
+ with open(args.out[0]) as fh:
+ params = json.load(fh)
+
+ output_directory = params['output_data'][0]['extra_files_path']
+ os.mkdir(output_directory)
+ data_manager_dict = {}
+
+ #Fetch the files and build blast databases
+ if(args.type[0]=="D1"):
+ output_directory=download_D1(output_directory)
+ version="FastANI D1"
+ elif(args.type[0]=="VL"):
+ output_directory=download_D1(output_directory)
+ output_directory=download_VL(output_directory)
+ version="FastANI D1 + VetLIRN"
+
+ print_json(version,output_directory,args.desc[0],args.out[0])
+
+if __name__ == "__main__": main()
diff -r 000000000000 -r 5dda51264a2d data_manager_fastani/data_manager/data_manager_fastani.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.xml Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number.
+
+
diff -r 000000000000 -r 5dda51264a2d data_manager_fastani/data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/data_manager_conf.xml Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 5dda51264a2d data_manager_fastani/test-data/fastani_databases.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/test-data/fastani_databases.loc Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of fastani databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# D1 FastANI D1 /tmp/tool-data/fastani/D1
diff -r 000000000000 -r 5dda51264a2d data_manager_fastani/tool-data/fastani_databases.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool-data/fastani_databases.loc Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of fastani databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# D1 FastANI D1 /tmp/tool-data/fastani/D1
diff -r 000000000000 -r 5dda51264a2d data_manager_fastani/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool_data_table_conf.xml.sample Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,6 @@
+
+
+
diff -r 000000000000 -r 5dda51264a2d data_manager_fastani/tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool_data_table_conf.xml.test Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,6 @@
+
+
+