Mercurial > repos > estrain > data_manager_ganon
comparison data_manager/fetch_ganon.py @ 0:a50614a513f3 draft
Uploaded
| author | estrain |
|---|---|
| date | Thu, 04 Jul 2019 21:42:31 -0400 |
| parents | |
| children | c220590bde7a |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a50614a513f3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import json | |
| 5 import os | |
| 6 import os.path | |
| 7 import sys | |
| 8 import ftplib | |
| 9 import socket | |
| 10 | |
| 11 def get_refseq_rrna(rrna): | |
| 12 | |
| 13 host = 'ftp.ncbi.nlm.nih.gov' | |
| 14 folder_path = 'refseq/TargetedLoci/Bacteria/' | |
| 15 file_name="bacteria."+rrna+"rRNA.fna.gz" | |
| 16 | |
| 17 try: | |
| 18 f = ftplib.FTP(host) | |
| 19 except (socket.error, socket.gaierror), e: | |
| 20 print 'ERROR: cannot reach "%s"' % host | |
| 21 return | |
| 22 print '*** Connected to host "%s"' % host | |
| 23 | |
| 24 try: | |
| 25 f.login() | |
| 26 except ftplib.error_perm: | |
| 27 print 'ERROR: cannot login anonymously' | |
| 28 f.quit() | |
| 29 return | |
| 30 print '*** Logged in as "anonymous"' | |
| 31 | |
| 32 try: | |
| 33 f.cwd(folder_path) | |
| 34 except ftplib.error_perm: | |
| 35 print 'ERROR: cannot CD to "%s"' % folder_path | |
| 36 f.quit() | |
| 37 return | |
| 38 print '*** Changed to "%s" folder' % folder_path | |
| 39 | |
| 40 try: | |
| 41 f.retrbinary('RETR %s' % file_name, | |
| 42 open(file_name, 'wb').write) | |
| 43 except ftplib.error_perm: | |
| 44 print 'ERROR: cannot read file "%s"' % file_name | |
| 45 os.unlink(file_name) | |
| 46 else: | |
| 47 print '*** Downloaded "%s" to CWD' % file_name | |
| 48 f.quit() | |
| 49 | |
| 50 return[file_name] | |
| 51 | |
| 52 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name): | |
| 53 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | |
| 54 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', []) | |
| 55 data_manager_dict['data_tables'][data_table_name].append(data_table_entry) | |
| 56 return data_manager_dict | |
| 57 | |
| 58 if __name__ == '__main__': | |
| 59 parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases') | |
| 60 parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to') | |
| 61 parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)') | |
| 62 args = parser.parse_args() | |
| 63 | |
| 64 output_directory = args.output_directory | |
| 65 if not os.path.exists(output_directory): | |
| 66 os.mkdir(output_directory) | |
| 67 | |
| 68 outfile=get_refseq_rrna(args.rrna) | |
| 69 | |
| 70 | |
| 71 data_manager_dict = {} | |
| 72 _add_data_table_entry(data_manager_dict=data_manager_dict, | |
| 73 data_table_entry=dict(value=args.rrna, dbkey=args.rrna, name=args.rrna, path=args.output_directory), | |
| 74 data_table_name='ganon_databases') | |
| 75 open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True)) | |
| 76 |
