annotate data_manager_fastani.py @ 9:b265f23c7158 draft

Uploaded
author estrain
date Sun, 29 May 2022 20:33:37 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
b265f23c7158 Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/env python
b265f23c7158 Uploaded
estrain
parents:
diff changeset
2 # Errol Strain, estrain@gmail.com
b265f23c7158 Uploaded
estrain
parents:
diff changeset
3 # Database downloads for FastANI
b265f23c7158 Uploaded
estrain
parents:
diff changeset
4
b265f23c7158 Uploaded
estrain
parents:
diff changeset
5 import sys
b265f23c7158 Uploaded
estrain
parents:
diff changeset
6 import os
b265f23c7158 Uploaded
estrain
parents:
diff changeset
7 import tempfile
b265f23c7158 Uploaded
estrain
parents:
diff changeset
8 import json
b265f23c7158 Uploaded
estrain
parents:
diff changeset
9 import re
b265f23c7158 Uploaded
estrain
parents:
diff changeset
10 import argparse
b265f23c7158 Uploaded
estrain
parents:
diff changeset
11 import requests
b265f23c7158 Uploaded
estrain
parents:
diff changeset
12
b265f23c7158 Uploaded
estrain
parents:
diff changeset
13
b265f23c7158 Uploaded
estrain
parents:
diff changeset
14 def download_D1(output_directory):
b265f23c7158 Uploaded
estrain
parents:
diff changeset
15
b265f23c7158 Uploaded
estrain
parents:
diff changeset
16 #FastANI databases from Kostas Lab
b265f23c7158 Uploaded
estrain
parents:
diff changeset
17 url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"
b265f23c7158 Uploaded
estrain
parents:
diff changeset
18
b265f23c7158 Uploaded
estrain
parents:
diff changeset
19 cwd = os.getcwd()
b265f23c7158 Uploaded
estrain
parents:
diff changeset
20 os.chdir(output_directory)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
21
b265f23c7158 Uploaded
estrain
parents:
diff changeset
22 filename = url.split("/")[-1]
b265f23c7158 Uploaded
estrain
parents:
diff changeset
23 with open(filename, "wb") as f:
b265f23c7158 Uploaded
estrain
parents:
diff changeset
24 r = requests.get(url)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
25 f.write(r.content)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
26
b265f23c7158 Uploaded
estrain
parents:
diff changeset
27 tarcmd="tar xvzf D1.tar.gz; rm -Rf D1.tar.gz"
b265f23c7158 Uploaded
estrain
parents:
diff changeset
28 os.system(tarcmd)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
29
b265f23c7158 Uploaded
estrain
parents:
diff changeset
30 os.chdir(cwd)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
31
b265f23c7158 Uploaded
estrain
parents:
diff changeset
32 return output_directory + "/D1"
b265f23c7158 Uploaded
estrain
parents:
diff changeset
33
b265f23c7158 Uploaded
estrain
parents:
diff changeset
34 def download_VL(output_directory):
b265f23c7158 Uploaded
estrain
parents:
diff changeset
35
b265f23c7158 Uploaded
estrain
parents:
diff changeset
36 cwd = os.getcwd()
b265f23c7158 Uploaded
estrain
parents:
diff changeset
37 os.chdir(output_directory)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
38
b265f23c7158 Uploaded
estrain
parents:
diff changeset
39 #FastANI uses filenames in output. Creating user friendly names
b265f23c7158 Uploaded
estrain
parents:
diff changeset
40 #for fish pathogens
b265f23c7158 Uploaded
estrain
parents:
diff changeset
41 accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
42 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
43 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
44 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
45 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
46 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
47 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
48 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245',
b265f23c7158 Uploaded
estrain
parents:
diff changeset
49 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'}
b265f23c7158 Uploaded
estrain
parents:
diff changeset
50
b265f23c7158 Uploaded
estrain
parents:
diff changeset
51 for acc in accdict:
b265f23c7158 Uploaded
estrain
parents:
diff changeset
52 ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + ".fna"
b265f23c7158 Uploaded
estrain
parents:
diff changeset
53
b265f23c7158 Uploaded
estrain
parents:
diff changeset
54 os.chdir(cwd)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
55 return output_directory
b265f23c7158 Uploaded
estrain
parents:
diff changeset
56
b265f23c7158 Uploaded
estrain
parents:
diff changeset
57
b265f23c7158 Uploaded
estrain
parents:
diff changeset
58 def print_json (version,argspath,argsname,argsout):
b265f23c7158 Uploaded
estrain
parents:
diff changeset
59
b265f23c7158 Uploaded
estrain
parents:
diff changeset
60 data_table_entry = {
b265f23c7158 Uploaded
estrain
parents:
diff changeset
61 'data_tables' : {
b265f23c7158 Uploaded
estrain
parents:
diff changeset
62 'fastani': [
b265f23c7158 Uploaded
estrain
parents:
diff changeset
63 {
b265f23c7158 Uploaded
estrain
parents:
diff changeset
64 "value":version,
b265f23c7158 Uploaded
estrain
parents:
diff changeset
65 "name":argsname,
b265f23c7158 Uploaded
estrain
parents:
diff changeset
66 "path":argspath,
b265f23c7158 Uploaded
estrain
parents:
diff changeset
67 }
b265f23c7158 Uploaded
estrain
parents:
diff changeset
68 ]
b265f23c7158 Uploaded
estrain
parents:
diff changeset
69 }
b265f23c7158 Uploaded
estrain
parents:
diff changeset
70 }
b265f23c7158 Uploaded
estrain
parents:
diff changeset
71
b265f23c7158 Uploaded
estrain
parents:
diff changeset
72 with open(argsout, 'w') as fh:
b265f23c7158 Uploaded
estrain
parents:
diff changeset
73 json.dump(data_table_entry, fh, indent=2, sort_keys=True)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
74
b265f23c7158 Uploaded
estrain
parents:
diff changeset
75 def main():
b265f23c7158 Uploaded
estrain
parents:
diff changeset
76
b265f23c7158 Uploaded
estrain
parents:
diff changeset
77 parser = argparse.ArgumentParser(description='Download FastANI Databases')
b265f23c7158 Uploaded
estrain
parents:
diff changeset
78 parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
b265f23c7158 Uploaded
estrain
parents:
diff changeset
79 parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description')
b265f23c7158 Uploaded
estrain
parents:
diff changeset
80 parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
b265f23c7158 Uploaded
estrain
parents:
diff changeset
81
b265f23c7158 Uploaded
estrain
parents:
diff changeset
82 args = parser.parse_args()
b265f23c7158 Uploaded
estrain
parents:
diff changeset
83
b265f23c7158 Uploaded
estrain
parents:
diff changeset
84 with open(args.out[0]) as fh:
b265f23c7158 Uploaded
estrain
parents:
diff changeset
85 params = json.load(fh)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
86
b265f23c7158 Uploaded
estrain
parents:
diff changeset
87 output_directory = params['output_data'][0]['extra_files_path']
b265f23c7158 Uploaded
estrain
parents:
diff changeset
88 os.mkdir(output_directory)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
89 data_manager_dict = {}
b265f23c7158 Uploaded
estrain
parents:
diff changeset
90
b265f23c7158 Uploaded
estrain
parents:
diff changeset
91 #Fetch the files and build blast databases
b265f23c7158 Uploaded
estrain
parents:
diff changeset
92 if(args.type[0]=="D1"):
b265f23c7158 Uploaded
estrain
parents:
diff changeset
93 output_directory=download_D1(output_directory)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
94 version="FastANI D1"
b265f23c7158 Uploaded
estrain
parents:
diff changeset
95 elif(args.type[0]=="VL"):
b265f23c7158 Uploaded
estrain
parents:
diff changeset
96 output_directory=download_D1(output_directory)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
97 output_directory=download_VL(output_directory)
b265f23c7158 Uploaded
estrain
parents:
diff changeset
98 version="FastANI D1 + VetLIRN"
b265f23c7158 Uploaded
estrain
parents:
diff changeset
99
b265f23c7158 Uploaded
estrain
parents:
diff changeset
100 print_json(version,output_directory,args.desc[0],args.out[0])
b265f23c7158 Uploaded
estrain
parents:
diff changeset
101
b265f23c7158 Uploaded
estrain
parents:
diff changeset
102 if __name__ == "__main__": main()