|
9
|
1 #!/usr/bin/env python
|
|
|
2 # Errol Strain, estrain@gmail.com
|
|
|
3 # Database downloads for FastANI
|
|
|
4
|
|
|
5 import sys
|
|
|
6 import os
|
|
|
7 import tempfile
|
|
|
8 import json
|
|
|
9 import re
|
|
|
10 import argparse
|
|
|
11 import requests
|
|
|
12
|
|
|
13
|
|
|
14 def download_D1(output_directory):
|
|
|
15
|
|
|
16 #FastANI databases from Kostas Lab
|
|
|
17 url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"
|
|
|
18
|
|
|
19 cwd = os.getcwd()
|
|
|
20 os.chdir(output_directory)
|
|
|
21
|
|
|
22 filename = url.split("/")[-1]
|
|
|
23 with open(filename, "wb") as f:
|
|
|
24 r = requests.get(url)
|
|
|
25 f.write(r.content)
|
|
|
26
|
|
|
27 tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz"
|
|
|
28 os.system(tarcmd)
|
|
|
29
|
|
|
30 os.chdir(cwd)
|
|
|
31
|
|
|
32 return output_directory + "/D1"
|
|
|
33
|
|
|
34 def download_VL(output_directory):
|
|
|
35
|
|
|
36 cwd = os.getcwd()
|
|
|
37 os.chdir(output_directory)
|
|
|
38
|
|
|
39 #FastANI uses filenames in output. Creating user friendly names
|
|
|
40 #for fish pathogens
|
|
|
41 accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520',
|
|
|
42 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15',
|
|
|
43 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1',
|
|
|
44 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1',
|
|
|
45 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ',
|
|
|
46 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632',
|
|
|
47 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04',
|
|
|
48 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245',
|
|
|
49 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'}
|
|
|
50
|
|
|
51 for acc in accdict:
|
|
|
52 ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna"
|
|
|
53
|
|
|
54 os.chdir(cwd)
|
|
|
55 return output_directory
|
|
|
56
|
|
|
57
|
|
|
58 def print_json (version,argspath,argsname,argsout):
|
|
|
59
|
|
|
60 data_table_entry = {
|
|
|
61 'data_tables' : {
|
|
|
62 'fastani': [
|
|
|
63 {
|
|
|
64 "value":version,
|
|
|
65 "name":argsname,
|
|
|
66 "path":argspath,
|
|
|
67 }
|
|
|
68 ]
|
|
|
69 }
|
|
|
70 }
|
|
|
71
|
|
|
72 with open(argsout, 'w') as fh:
|
|
|
73 json.dump(data_table_entry, fh, indent=2, sort_keys=True)
|
|
|
74
|
|
|
75 def main():
|
|
|
76
|
|
|
77 parser = argparse.ArgumentParser(description='Download FastANI Databases')
|
|
|
78 parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
|
|
|
79 parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description')
|
|
|
80 parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
|
|
|
81
|
|
|
82 args = parser.parse_args()
|
|
|
83
|
|
|
84 with open(args.out[0]) as fh:
|
|
|
85 params = json.load(fh)
|
|
|
86
|
|
|
87 output_directory = params['output_data'][0]['extra_files_path']
|
|
|
88 os.mkdir(output_directory)
|
|
|
89 data_manager_dict = {}
|
|
|
90
|
|
|
91 #Fetch the files and build blast databases
|
|
|
92 if(args.type[0]=="D1"):
|
|
|
93 output_directory=download_D1(output_directory)
|
|
|
94 version="FastANI D1"
|
|
|
95 elif(args.type[0]=="VL"):
|
|
|
96 output_directory=download_D1(output_directory)
|
|
|
97 output_directory=download_VL(output_directory)
|
|
|
98 version="FastANI D1 + VetLIRN"
|
|
|
99
|
|
|
100 print_json(version,output_directory,args.desc[0],args.out[0])
|
|
|
101
|
|
|
102 if __name__ == "__main__": main()
|