Mercurial > repos > estrain > data_manager_fastani
comparison data_manager_fastani.py @ 9:b265f23c7158 draft
Uploaded
| author | estrain |
|---|---|
| date | Sun, 29 May 2022 20:33:37 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 8:39c244c58948 | 9:b265f23c7158 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # Errol Strain, estrain@gmail.com | |
| 3 # Database downloads for FastANI | |
| 4 | |
| 5 import sys | |
| 6 import os | |
| 7 import tempfile | |
| 8 import json | |
| 9 import re | |
| 10 import argparse | |
| 11 import requests | |
| 12 | |
| 13 | |
| 14 def download_D1(output_directory): | |
| 15 | |
| 16 #FastANI databases from Kostas Lab | |
| 17 url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz" | |
| 18 | |
| 19 cwd = os.getcwd() | |
| 20 os.chdir(output_directory) | |
| 21 | |
| 22 filename = url.split("/")[-1] | |
| 23 with open(filename, "wb") as f: | |
| 24 r = requests.get(url) | |
| 25 f.write(r.content) | |
| 26 | |
| 27 tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz" | |
| 28 os.system(tarcmd) | |
| 29 | |
| 30 os.chdir(cwd) | |
| 31 | |
| 32 return output_directory + "/D1" | |
| 33 | |
| 34 def download_VL(output_directory): | |
| 35 | |
| 36 cwd = os.getcwd() | |
| 37 os.chdir(output_directory) | |
| 38 | |
| 39 #FastANI uses filenames in output. Creating user friendly names | |
| 40 #for fish pathogens | |
| 41 accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520', | |
| 42 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15', | |
| 43 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1', | |
| 44 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1', | |
| 45 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ', | |
| 46 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632', | |
| 47 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04', | |
| 48 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245', | |
| 49 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'} | |
| 50 | |
| 51 for acc in accdict: | |
| 52 ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna" | |
| 53 | |
| 54 os.chdir(cwd) | |
| 55 return output_directory | |
| 56 | |
| 57 | |
| 58 def print_json (version,argspath,argsname,argsout): | |
| 59 | |
| 60 data_table_entry = { | |
| 61 'data_tables' : { | |
| 62 'fastani': [ | |
| 63 { | |
| 64 "value":version, | |
| 65 "name":argsname, | |
| 66 "path":argspath, | |
| 67 } | |
| 68 ] | |
| 69 } | |
| 70 } | |
| 71 | |
| 72 with open(argsout, 'w') as fh: | |
| 73 json.dump(data_table_entry, fh, indent=2, sort_keys=True) | |
| 74 | |
| 75 def main(): | |
| 76 | |
| 77 parser = argparse.ArgumentParser(description='Download FastANI Databases') | |
| 78 parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type') | |
| 79 parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description') | |
| 80 parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') | |
| 81 | |
| 82 args = parser.parse_args() | |
| 83 | |
| 84 with open(args.out[0]) as fh: | |
| 85 params = json.load(fh) | |
| 86 | |
| 87 output_directory = params['output_data'][0]['extra_files_path'] | |
| 88 os.mkdir(output_directory) | |
| 89 data_manager_dict = {} | |
| 90 | |
| 91 #Fetch the files and build blast databases | |
| 92 if(args.type[0]=="D1"): | |
| 93 output_directory=download_D1(output_directory) | |
| 94 version="FastANI D1" | |
| 95 elif(args.type[0]=="VL"): | |
| 96 output_directory=download_D1(output_directory) | |
| 97 output_directory=download_VL(output_directory) | |
| 98 version="FastANI D1 + VetLIRN" | |
| 99 | |
| 100 print_json(version,output_directory,args.desc[0],args.out[0]) | |
| 101 | |
| 102 if __name__ == "__main__": main() |
