comparison data_manager/fetch_ganon.py @ 0:a50614a513f3 draft

Uploaded
author estrain
date Thu, 04 Jul 2019 21:42:31 -0400
parents
children c220590bde7a
comparison
equal deleted inserted replaced
-1:000000000000 0:a50614a513f3
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5 import os
6 import os.path
7 import sys
8 import ftplib
9 import socket
10
11 def get_refseq_rrna(rrna):
12
13 host = 'ftp.ncbi.nlm.nih.gov'
14 folder_path = 'refseq/TargetedLoci/Bacteria/'
15 file_name="bacteria."+rrna+"rRNA.fna.gz"
16
17 try:
18 f = ftplib.FTP(host)
19 except (socket.error, socket.gaierror), e:
20 print 'ERROR: cannot reach "%s"' % host
21 return
22 print '*** Connected to host "%s"' % host
23
24 try:
25 f.login()
26 except ftplib.error_perm:
27 print 'ERROR: cannot login anonymously'
28 f.quit()
29 return
30 print '*** Logged in as "anonymous"'
31
32 try:
33 f.cwd(folder_path)
34 except ftplib.error_perm:
35 print 'ERROR: cannot CD to "%s"' % folder_path
36 f.quit()
37 return
38 print '*** Changed to "%s" folder' % folder_path
39
40 try:
41 f.retrbinary('RETR %s' % file_name,
42 open(file_name, 'wb').write)
43 except ftplib.error_perm:
44 print 'ERROR: cannot read file "%s"' % file_name
45 os.unlink(file_name)
46 else:
47 print '*** Downloaded "%s" to CWD' % file_name
48 f.quit()
49
50 return[file_name]
51
52 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name):
53 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
54 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', [])
55 data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
56 return data_manager_dict
57
58 if __name__ == '__main__':
59 parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases')
60 parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to')
61 parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)')
62 args = parser.parse_args()
63
64 output_directory = args.output_directory
65 if not os.path.exists(output_directory):
66 os.mkdir(output_directory)
67
68 outfile=get_refseq_rrna(args.rrna)
69
70
71 data_manager_dict = {}
72 _add_data_table_entry(data_manager_dict=data_manager_dict,
73 data_table_entry=dict(value=args.rrna, dbkey=args.rrna, name=args.rrna, path=args.output_directory),
74 data_table_name='ganon_databases')
75 open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True))
76