#!/usr/bin/env python
# Errol Strain, estrain@gmail.com
# Database downloads for NCBI AMRFinderPlus

import sys
import os
import tempfile
import shutil
import json
import re
import argparse
from ftplib import FTP


def download_from_ncbi():
    NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
    FILENAME = 'version.txt' 
    NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/'
    
    email = 'anonymous@example.com'

    ftp = FTP( NCBI_FTP_SERVER )
    ftp.login( 'anonymous', email)
    ftp.cwd(NCBI_DOWNLOAD_PATH)
    
    #exclude the allele counts folder
    files = ftp.nlst()
    files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)

    for f in files:
      ftp.retrbinary("RETR " + f, open(f, 'wb').write)

    files = ftp.nlst()
    files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
    pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files)
    pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts)


    # Make blast databases
    blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null"
    os.system(blastcmd)
    blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null"
    os.system(blastcmd)

    for f in pointmuts:
      blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null"
      os.system(blastcmd)

    # Make HMM indexes
    hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null"
    os.system(hmmcmd)
 
    # Read in version
    with open("version.txt") as f:
      version = f.readline().rstrip()

    ftp.quit()
    
    return version 

def print_json (version,argspath,argsname,argsvalue):

    valueentry="AMRFinderPlus_"+version

    data_table_entry = {
      'data_tables' : {
        'amrfinder_databases': [
          {
            "value":argsvalue,
            "name":argsname,
            "path":argspath,
          }
        ]
      }
    }

    with open("out_file", 'w') as fh:
      json.dump(data_table_entry,fh, sort_keys=True)
        
def main():
   
    parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases')
    parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name')
    parser.add_argument('--path', type=str, required=True, nargs=1, help='Absolute path to new database folder')
    parser.add_argument('--value', type=str, required=True, nargs=1, help='Database value')

    args = parser.parse_args()
  
    os.mkdir("output")
    os.chdir("output") 
    #Fetch the files and build blast databases
    version=download_from_ncbi()    
    os.chdir("..")

    shutil.copytree("output",args.path[0])
    print_json(version,args.path[0],args.name[0],args.value[0])

if __name__ == "__main__": main()
