Mercurial > repos > estrain > data_manager_fastani
view data_manager_fastani.py @ 9:b265f23c7158 draft
Uploaded
| author | estrain |
|---|---|
| date | Sun, 29 May 2022 20:33:37 +0000 |
| parents | |
| children |
line wrap: on
line source
#!/usr/bin/env python # Errol Strain, estrain@gmail.com # Database downloads for FastANI import sys import os import tempfile import json import re import argparse import requests def download_D1(output_directory): #FastANI databases from Kostas Lab url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz" cwd = os.getcwd() os.chdir(output_directory) filename = url.split("/")[-1] with open(filename, "wb") as f: r = requests.get(url) f.write(r.content) tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz" os.system(tarcmd) os.chdir(cwd) return output_directory + "/D1" def download_VL(output_directory): cwd = os.getcwd() os.chdir(output_directory) #FastANI uses filenames in output. Creating user friendly names #for fish pathogens accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520', 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15', 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1', 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1', 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ', 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632', 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04', 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245', 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'} for acc in accdict: ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna" os.chdir(cwd) return output_directory def print_json (version,argspath,argsname,argsout): data_table_entry = { 'data_tables' : { 'fastani': [ { "value":version, "name":argsname, "path":argspath, } ] } } with open(argsout, 'w') as fh: json.dump(data_table_entry, fh, indent=2, sort_keys=True) def main(): parser = argparse.ArgumentParser(description='Download FastANI Databases') parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type') parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description') parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') args = parser.parse_args() with open(args.out[0]) as fh: params = json.load(fh) output_directory = params['output_data'][0]['extra_files_path'] os.mkdir(output_directory) data_manager_dict = {} #Fetch the files and build blast databases if(args.type[0]=="D1"): output_directory=download_D1(output_directory) version="FastANI D1" elif(args.type[0]=="VL"): output_directory=download_D1(output_directory) output_directory=download_VL(output_directory) version="FastANI D1 + VetLIRN" print_json(version,output_directory,args.desc[0],args.out[0]) if __name__ == "__main__": main()
