| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 import argparse | 
|  | 4 import json | 
|  | 5 import os | 
|  | 6 import os.path | 
|  | 7 import sys | 
|  | 8 import ftplib | 
|  | 9 import socket | 
|  | 10 | 
|  | 11 def get_refseq_rrna(rrna): | 
|  | 12 | 
|  | 13     host = 'ftp.ncbi.nlm.nih.gov' | 
|  | 14     folder_path = 'refseq/TargetedLoci/Bacteria/' | 
|  | 15     file_name="bacteria."+rrna+"rRNA.fna.gz" | 
|  | 16 | 
|  | 17     try: | 
|  | 18       f = ftplib.FTP(host) | 
| 3 | 19     except (socket.error, socket.gaierror) as e: | 
|  | 20       print('ERROR: cannot reach "%s"' % host) | 
| 0 | 21       return | 
| 3 | 22     print('*** Connected to host "%s"' % host) | 
| 0 | 23 | 
|  | 24     try: | 
|  | 25       f.login() | 
|  | 26     except ftplib.error_perm: | 
| 3 | 27       print('ERROR: cannot login anonymously') | 
| 0 | 28       f.quit() | 
|  | 29       return | 
| 3 | 30     print('*** Logged in as "anonymous"') | 
| 0 | 31 | 
|  | 32     try: | 
|  | 33       f.cwd(folder_path) | 
|  | 34     except ftplib.error_perm: | 
| 3 | 35       print('ERROR: cannot CD to "%s"' % folder_path) | 
| 0 | 36       f.quit() | 
|  | 37       return | 
| 3 | 38     print('*** Changed to "%s" folder' % folder_path) | 
| 0 | 39 | 
|  | 40     try: | 
|  | 41       f.retrbinary('RETR %s' % file_name, | 
|  | 42           open(file_name, 'wb').write) | 
|  | 43     except ftplib.error_perm: | 
| 3 | 44       print('ERROR: cannot read file "%s"' % file_name) | 
| 0 | 45       os.unlink(file_name) | 
|  | 46     else: | 
| 3 | 47       print('*** Downloaded "%s" to CWD' % file_name) | 
| 0 | 48     f.quit() | 
|  | 49 | 
|  | 50     return[file_name] | 
|  | 51 | 
|  | 52 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name): | 
|  | 53     data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | 
|  | 54     data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', []) | 
|  | 55     data_manager_dict['data_tables'][data_table_name].append(data_table_entry) | 
|  | 56     return data_manager_dict | 
|  | 57 | 
|  | 58 if __name__ == '__main__': | 
|  | 59     parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases') | 
|  | 60     parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to') | 
|  | 61     parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)') | 
|  | 62     args = parser.parse_args() | 
|  | 63 | 
|  | 64     output_directory = args.output_directory | 
|  | 65     if not os.path.exists(output_directory): | 
|  | 66         os.mkdir(output_directory) | 
|  | 67 | 
|  | 68     outfile=get_refseq_rrna(args.rrna) | 
|  | 69 | 
|  | 70 | 
|  | 71     data_manager_dict = {} | 
|  | 72     _add_data_table_entry(data_manager_dict=data_manager_dict, | 
| 3 | 73       data_table_entry=dict(value="ganon"+args.rrna, dbkey="gannon"+args.rrna, name=args.rrna, path=args.output_directory), | 
| 0 | 74       data_table_name='ganon_databases') | 
|  | 75     open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True)) | 
|  | 76 |