comparison data_manager_fastani.py @ 9:b265f23c7158 draft

Uploaded
author estrain
date Sun, 29 May 2022 20:33:37 +0000
parents
children
comparison
equal deleted inserted replaced
8:39c244c58948 9:b265f23c7158
1 #!/usr/bin/env python
2 # Errol Strain, estrain@gmail.com
3 # Database downloads for FastANI
4
5 import sys
6 import os
7 import tempfile
8 import json
9 import re
10 import argparse
11 import requests
12
13
14 def download_D1(output_directory):
15
16 #FastANI databases from Kostas Lab
17 url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"
18
19 cwd = os.getcwd()
20 os.chdir(output_directory)
21
22 filename = url.split("/")[-1]
23 with open(filename, "wb") as f:
24 r = requests.get(url)
25 f.write(r.content)
26
27 tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz"
28 os.system(tarcmd)
29
30 os.chdir(cwd)
31
32 return output_directory + "/D1"
33
34 def download_VL(output_directory):
35
36 cwd = os.getcwd()
37 os.chdir(output_directory)
38
39 #FastANI uses filenames in output. Creating user friendly names
40 #for fish pathogens
41 accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520',
42 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15',
43 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1',
44 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1',
45 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ',
46 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632',
47 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04',
48 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245',
49 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'}
50
51 for acc in accdict:
52 ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna"
53
54 os.chdir(cwd)
55 return output_directory
56
57
58 def print_json (version,argspath,argsname,argsout):
59
60 data_table_entry = {
61 'data_tables' : {
62 'fastani': [
63 {
64 "value":version,
65 "name":argsname,
66 "path":argspath,
67 }
68 ]
69 }
70 }
71
72 with open(argsout, 'w') as fh:
73 json.dump(data_table_entry, fh, indent=2, sort_keys=True)
74
75 def main():
76
77 parser = argparse.ArgumentParser(description='Download FastANI Databases')
78 parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
79 parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description')
80 parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
81
82 args = parser.parse_args()
83
84 with open(args.out[0]) as fh:
85 params = json.load(fh)
86
87 output_directory = params['output_data'][0]['extra_files_path']
88 os.mkdir(output_directory)
89 data_manager_dict = {}
90
91 #Fetch the files and build blast databases
92 if(args.type[0]=="D1"):
93 output_directory=download_D1(output_directory)
94 version="FastANI D1"
95 elif(args.type[0]=="VL"):
96 output_directory=download_D1(output_directory)
97 output_directory=download_VL(output_directory)
98 version="FastANI D1 + VetLIRN"
99
100 print_json(version,output_directory,args.desc[0],args.out[0])
101
102 if __name__ == "__main__": main()