# HG changeset patch # User estrain # Date 1653856428 0 # Node ID b68fd88c00830cdb4e23e80485594d0805a32f89 # Parent b265f23c7158ff8793045d246f3d07096bc9c498 Deleted selected files diff -r b265f23c7158 -r b68fd88c0083 data_manager_fastani.py --- a/data_manager_fastani.py Sun May 29 20:33:37 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -#!/usr/bin/env python -# Errol Strain, estrain@gmail.com -# Database downloads for FastANI - -import sys -import os -import tempfile -import json -import re -import argparse -import requests - - -def download_D1(output_directory): - - #FastANI databases from Kostas Lab - url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz" - - cwd = os.getcwd() - os.chdir(output_directory) - - filename = url.split("/")[-1] - with open(filename, "wb") as f: - r = requests.get(url) - f.write(r.content) - - tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz" - os.system(tarcmd) - - os.chdir(cwd) - - return output_directory + "/D1" - -def download_VL(output_directory): - - cwd = os.getcwd() - os.chdir(output_directory) - - #FastANI uses filenames in output. Creating user friendly names - #for fish pathogens - accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520', - 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15', - 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1', - 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1', - 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ', - 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632', - 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04', - 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245', - 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'} - - for acc in accdict: - ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna" - - os.chdir(cwd) - return output_directory - - -def print_json (version,argspath,argsname,argsout): - - data_table_entry = { - 'data_tables' : { - 'fastani': [ - { - "value":version, - "name":argsname, - "path":argspath, - } - ] - } - } - - with open(argsout, 'w') as fh: - json.dump(data_table_entry, fh, indent=2, sort_keys=True) - -def main(): - - parser = argparse.ArgumentParser(description='Download FastANI Databases') - parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type') - parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description') - parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') - - args = parser.parse_args() - - with open(args.out[0]) as fh: - params = json.load(fh) - - output_directory = params['output_data'][0]['extra_files_path'] - os.mkdir(output_directory) - data_manager_dict = {} - - #Fetch the files and build blast databases - if(args.type[0]=="D1"): - output_directory=download_D1(output_directory) - version="FastANI D1" - elif(args.type[0]=="VL"): - output_directory=download_D1(output_directory) - output_directory=download_VL(output_directory) - version="FastANI D1 + VetLIRN" - - print_json(version,output_directory,args.desc[0],args.out[0]) - -if __name__ == "__main__": main()