diff data_manager_fastani.py @ 9:b265f23c7158 draft

Uploaded
author estrain
date Sun, 29 May 2022 20:33:37 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani.py	Sun May 29 20:33:37 2022 +0000
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# Errol Strain, estrain@gmail.com
+# Database downloads for FastANI 
+
+import sys
+import os
+import tempfile
+import json
+import re
+import argparse
+import requests
+
+
+def download_D1(output_directory):
+
+    #FastANI databases from Kostas Lab 
+    url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"
+
+    cwd = os.getcwd() 
+    os.chdir(output_directory)
+
+    filename = url.split("/")[-1]
+    with open(filename, "wb") as f:
+      r = requests.get(url)
+      f.write(r.content)
+ 
+    tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz"
+    os.system(tarcmd)
+
+    os.chdir(cwd)    
+   
+    return output_directory + "/D1"
+
+def download_VL(output_directory):
+
+    cwd = os.getcwd() 
+    os.chdir(output_directory)
+   
+    #FastANI uses filenames in output. Creating user friendly names
+    #for fish pathogens 
+    accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520',
+      'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15',
+      'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1',
+      'NZ_CP032159':'Staphylococcus_warneri_strain_22_1',
+      'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ',
+      'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632',
+      'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04',
+      'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245',
+      'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'}
+
+    for acc in accdict: 
+      ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna"
+
+    os.chdir(cwd)    
+    return output_directory 
+
+
+def print_json (version,argspath,argsname,argsout):
+
+    data_table_entry = {
+      'data_tables' : {
+        'fastani': [
+          {
+            "value":version,
+            "name":argsname,
+            "path":argspath,
+          }
+        ]
+      }
+    }
+
+    with open(argsout, 'w') as fh:
+      json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+        
+def main():
+   
+    parser = argparse.ArgumentParser(description='Download FastANI Databases')
+    parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
+    parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description')
+    parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+
+    args = parser.parse_args()
+
+    with open(args.out[0]) as fh:
+        params = json.load(fh)
+
+    output_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(output_directory)
+    data_manager_dict = {}
+
+    #Fetch the files and build blast databases
+    if(args.type[0]=="D1"):
+      output_directory=download_D1(output_directory)    
+      version="FastANI D1"
+    elif(args.type[0]=="VL"): 
+      output_directory=download_D1(output_directory)    
+      output_directory=download_VL(output_directory)    
+      version="FastANI D1 + VetLIRN"
+      
+    print_json(version,output_directory,args.desc[0],args.out[0])
+
+if __name__ == "__main__": main()