Mercurial > repos > estrain > data_manager_ganon
comparison data_manager/fetch_ganon.py @ 0:a50614a513f3 draft
Uploaded
author | estrain |
---|---|
date | Thu, 04 Jul 2019 21:42:31 -0400 |
parents | |
children | c220590bde7a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a50614a513f3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import json | |
5 import os | |
6 import os.path | |
7 import sys | |
8 import ftplib | |
9 import socket | |
10 | |
11 def get_refseq_rrna(rrna): | |
12 | |
13 host = 'ftp.ncbi.nlm.nih.gov' | |
14 folder_path = 'refseq/TargetedLoci/Bacteria/' | |
15 file_name="bacteria."+rrna+"rRNA.fna.gz" | |
16 | |
17 try: | |
18 f = ftplib.FTP(host) | |
19 except (socket.error, socket.gaierror), e: | |
20 print 'ERROR: cannot reach "%s"' % host | |
21 return | |
22 print '*** Connected to host "%s"' % host | |
23 | |
24 try: | |
25 f.login() | |
26 except ftplib.error_perm: | |
27 print 'ERROR: cannot login anonymously' | |
28 f.quit() | |
29 return | |
30 print '*** Logged in as "anonymous"' | |
31 | |
32 try: | |
33 f.cwd(folder_path) | |
34 except ftplib.error_perm: | |
35 print 'ERROR: cannot CD to "%s"' % folder_path | |
36 f.quit() | |
37 return | |
38 print '*** Changed to "%s" folder' % folder_path | |
39 | |
40 try: | |
41 f.retrbinary('RETR %s' % file_name, | |
42 open(file_name, 'wb').write) | |
43 except ftplib.error_perm: | |
44 print 'ERROR: cannot read file "%s"' % file_name | |
45 os.unlink(file_name) | |
46 else: | |
47 print '*** Downloaded "%s" to CWD' % file_name | |
48 f.quit() | |
49 | |
50 return[file_name] | |
51 | |
52 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name): | |
53 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | |
54 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', []) | |
55 data_manager_dict['data_tables'][data_table_name].append(data_table_entry) | |
56 return data_manager_dict | |
57 | |
58 if __name__ == '__main__': | |
59 parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases') | |
60 parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to') | |
61 parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)') | |
62 args = parser.parse_args() | |
63 | |
64 output_directory = args.output_directory | |
65 if not os.path.exists(output_directory): | |
66 os.mkdir(output_directory) | |
67 | |
68 outfile=get_refseq_rrna(args.rrna) | |
69 | |
70 | |
71 data_manager_dict = {} | |
72 _add_data_table_entry(data_manager_dict=data_manager_dict, | |
73 data_table_entry=dict(value=args.rrna, dbkey=args.rrna, name=args.rrna, path=args.output_directory), | |
74 data_table_name='ganon_databases') | |
75 open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True)) | |
76 |