view data_manager_fastani.py @ 9:b265f23c7158 draft

Uploaded
author estrain
date Sun, 29 May 2022 20:33:37 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python
# Errol Strain, estrain@gmail.com
# Database downloads for FastANI 

import sys
import os
import tempfile
import json
import re
import argparse
import requests


def download_D1(output_directory):

    #FastANI databases from Kostas Lab 
    url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"

    cwd = os.getcwd() 
    os.chdir(output_directory)

    filename = url.split("/")[-1]
    with open(filename, "wb") as f:
      r = requests.get(url)
      f.write(r.content)
 
    tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz"
    os.system(tarcmd)

    os.chdir(cwd)    
   
    return output_directory + "/D1"

def download_VL(output_directory):

    cwd = os.getcwd() 
    os.chdir(output_directory)
   
    #FastANI uses filenames in output. Creating user friendly names
    #for fish pathogens 
    accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520',
      'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15',
      'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1',
      'NZ_CP032159':'Staphylococcus_warneri_strain_22_1',
      'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ',
      'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632',
      'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04',
      'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245',
      'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'}

    for acc in accdict: 
      ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna"

    os.chdir(cwd)    
    return output_directory 


def print_json (version,argspath,argsname,argsout):

    data_table_entry = {
      'data_tables' : {
        'fastani': [
          {
            "value":version,
            "name":argsname,
            "path":argspath,
          }
        ]
      }
    }

    with open(argsout, 'w') as fh:
      json.dump(data_table_entry, fh, indent=2, sort_keys=True)
        
def main():
   
    parser = argparse.ArgumentParser(description='Download FastANI Databases')
    parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
    parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description')
    parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')

    args = parser.parse_args()

    with open(args.out[0]) as fh:
        params = json.load(fh)

    output_directory = params['output_data'][0]['extra_files_path']
    os.mkdir(output_directory)
    data_manager_dict = {}

    #Fetch the files and build blast databases
    if(args.type[0]=="D1"):
      output_directory=download_D1(output_directory)    
      version="FastANI D1"
    elif(args.type[0]=="VL"): 
      output_directory=download_D1(output_directory)    
      output_directory=download_VL(output_directory)    
      version="FastANI D1 + VetLIRN"
      
    print_json(version,output_directory,args.desc[0],args.out[0])

if __name__ == "__main__": main()