0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import argparse
|
|
4 import json
|
|
5 import os
|
|
6 import os.path
|
|
7 import sys
|
|
8 import ftplib
|
|
9 import socket
|
|
10
|
|
11 def get_refseq_rrna(rrna):
|
|
12
|
|
13 host = 'ftp.ncbi.nlm.nih.gov'
|
|
14 folder_path = 'refseq/TargetedLoci/Bacteria/'
|
|
15 file_name="bacteria."+rrna+"rRNA.fna.gz"
|
|
16
|
|
17 try:
|
|
18 f = ftplib.FTP(host)
|
|
19 except (socket.error, socket.gaierror), e:
|
|
20 print 'ERROR: cannot reach "%s"' % host
|
|
21 return
|
|
22 print '*** Connected to host "%s"' % host
|
|
23
|
|
24 try:
|
|
25 f.login()
|
|
26 except ftplib.error_perm:
|
|
27 print 'ERROR: cannot login anonymously'
|
|
28 f.quit()
|
|
29 return
|
|
30 print '*** Logged in as "anonymous"'
|
|
31
|
|
32 try:
|
|
33 f.cwd(folder_path)
|
|
34 except ftplib.error_perm:
|
|
35 print 'ERROR: cannot CD to "%s"' % folder_path
|
|
36 f.quit()
|
|
37 return
|
|
38 print '*** Changed to "%s" folder' % folder_path
|
|
39
|
|
40 try:
|
|
41 f.retrbinary('RETR %s' % file_name,
|
|
42 open(file_name, 'wb').write)
|
|
43 except ftplib.error_perm:
|
|
44 print 'ERROR: cannot read file "%s"' % file_name
|
|
45 os.unlink(file_name)
|
|
46 else:
|
|
47 print '*** Downloaded "%s" to CWD' % file_name
|
|
48 f.quit()
|
|
49
|
|
50 return[file_name]
|
|
51
|
|
52 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name):
|
|
53 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
|
|
54 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', [])
|
|
55 data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
|
|
56 return data_manager_dict
|
|
57
|
|
58 if __name__ == '__main__':
|
|
59 parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases')
|
|
60 parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to')
|
|
61 parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)')
|
|
62 args = parser.parse_args()
|
|
63
|
|
64 output_directory = args.output_directory
|
|
65 if not os.path.exists(output_directory):
|
|
66 os.mkdir(output_directory)
|
|
67
|
|
68 outfile=get_refseq_rrna(args.rrna)
|
|
69
|
|
70
|
|
71 data_manager_dict = {}
|
|
72 _add_data_table_entry(data_manager_dict=data_manager_dict,
|
|
73 data_table_entry=dict(value=args.rrna, dbkey=args.rrna, name=args.rrna, path=args.output_directory),
|
|
74 data_table_name='ganon_databases')
|
|
75 open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True))
|
|
76
|