#/usr/bin/env python
# Errol Strain, estrain@gmail.com
# Database downloads for FastANI 

import sys
import os
import tempfile
import json
import re
import argparse
import requests
import csv

def download_D1(output_directory):

    #FastANI databases from Kostas Lab 
    url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"

    cwd = os.getcwd() 
    os.chdir(output_directory)

    filename = url.split("/")[-1]
    with open(filename, "wb") as f:
      r = requests.get(url)
      f.write(r.content)
 
    tarcmd="tar xvzf D1.tar.gz; mv D1/* .; rm -Rf D1 D1.tar.gz"
    os.system(tarcmd)

    os.chdir(cwd)    
   
    return 

def download_VL(output_directory):

    cwd = os.getcwd() 
    os.chdir(output_directory)

    baseurl="https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/"

def download_VL(output_directory,accfile):

    cwd = os.getcwd()
    os.chdir(output_directory)

    baseurl="https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/"

    urldict = {}
    with open(accfile,mode='r') as inp:
      reader = csv.reader(inp)
      urldict = {rows[0]:rows[1] for rows in reader}

    #FastANI uses filenames in output. Creating user friendly names
    #for fish pathogens
    for key in urldict:
      url=baseurl+key+"/representative/"+urldict[key]+"/"+urldict[key]+"_genomic.fna.gz"
      filename = key + '.fna.gz'
      print(url+"\n")
      with open(filename, "wb") as f:
        r = requests.get(url)
        f.write(r.content)

    cmd="gunzip *.gz"  
    os.system(cmd)

    os.chdir(cwd)    
    return 


def print_json (version,argspath,argsname,argsout):

    data_table_entry = {
      'data_tables' : {
        'fastani': [
          {
            "value":version,
            "name":argsname,
            "path":argspath,
          }
        ]
      }
    }

    with open(argsout, 'w') as fh:
      json.dump(data_table_entry, fh, indent=2, sort_keys=True)
        
def main():
   
    parser = argparse.ArgumentParser(description='Download FastANI Databases')
    parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
    parser.add_argument('--name', type=str, required=True, nargs=1, help='Unique Database Folder Name')
    parser.add_argument('--acc', type=str, required=False, nargs=1, help='CSV Accession file with NCBI ftp folder IDs, see tool-data')
    parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')

    args = parser.parse_args()

    with open(args.out[0]) as fh:
        params = json.load(fh)

    output_directory = params['output_data'][0]['extra_files_path']
    os.mkdir(output_directory)
    data_manager_dict = {}

    #Fetch the files and build blast databases
    if(args.type[0]=="D1"):
      download_D1(output_directory)    
      version="FastANI D1 - " + args.name[0]
    elif(args.type[0]=="VL"): 
      download_D1(output_directory)    
      download_VL(output_directory,args.acc[0])    
      version="FastANI D1 + VetLIRN - " + args.name[0]
      
    print_json(version,output_directory,args.name[0],args.out[0])

if __name__ == "__main__": main()
