Mercurial > repos > estrain > data_manager_ganon
view data_manager/fetch_ganon.py @ 1:9de84cd78a82 draft
Uploaded
author | estrain |
---|---|
date | Fri, 05 Jul 2019 07:56:23 -0400 |
parents | a50614a513f3 |
children | c220590bde7a |
line wrap: on
line source
#!/usr/bin/env python import argparse import json import os import os.path import sys import ftplib import socket def get_refseq_rrna(rrna): host = 'ftp.ncbi.nlm.nih.gov' folder_path = 'refseq/TargetedLoci/Bacteria/' file_name="bacteria."+rrna+"rRNA.fna.gz" try: f = ftplib.FTP(host) except (socket.error, socket.gaierror), e: print 'ERROR: cannot reach "%s"' % host return print '*** Connected to host "%s"' % host try: f.login() except ftplib.error_perm: print 'ERROR: cannot login anonymously' f.quit() return print '*** Logged in as "anonymous"' try: f.cwd(folder_path) except ftplib.error_perm: print 'ERROR: cannot CD to "%s"' % folder_path f.quit() return print '*** Changed to "%s" folder' % folder_path try: f.retrbinary('RETR %s' % file_name, open(file_name, 'wb').write) except ftplib.error_perm: print 'ERROR: cannot read file "%s"' % file_name os.unlink(file_name) else: print '*** Downloaded "%s" to CWD' % file_name f.quit() return[file_name] def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name): data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', []) data_manager_dict['data_tables'][data_table_name].append(data_table_entry) return data_manager_dict if __name__ == '__main__': parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases') parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to') parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)') args = parser.parse_args() output_directory = args.output_directory if not os.path.exists(output_directory): os.mkdir(output_directory) outfile=get_refseq_rrna(args.rrna) data_manager_dict = {} _add_data_table_entry(data_manager_dict=data_manager_dict, data_table_entry=dict(value=args.rrna, dbkey=args.rrna, name=args.rrna, path=args.output_directory), data_table_name='ganon_databases') open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True))