Mercurial > repos > estrain > data_manager_ganon
changeset 2:b3585ca01066 draft
Deleted selected files
author | estrain |
---|---|
date | Fri, 05 Jul 2019 07:56:34 -0400 |
parents | 9de84cd78a82 |
children | c220590bde7a |
files | fetch_ganon.py |
diffstat | 1 files changed, 0 insertions(+), 76 deletions(-) [+] |
line wrap: on
line diff
--- a/fetch_ganon.py Fri Jul 05 07:56:23 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -#!/usr/bin/env python - -import argparse -import json -import os -import os.path -import sys -import ftplib -import socket - -def get_refseq_rrna(rrna): - - host = 'ftp.ncbi.nlm.nih.gov' - folder_path = 'refseq/TargetedLoci/Bacteria/' - file_name="bacteria."+rrna+"rRNA.fna.gz" - - try: - f = ftplib.FTP(host) - except (socket.error, socket.gaierror) as e: - print('ERROR: cannot reach "%s"' % host) - return - print('*** Connected to host "%s"' % host) - - try: - f.login() - except ftplib.error_perm: - print('ERROR: cannot login anonymously') - f.quit() - return - print('*** Logged in as "anonymous"') - - try: - f.cwd(folder_path) - except ftplib.error_perm: - print('ERROR: cannot CD to "%s"' % folder_path) - f.quit() - return - print('*** Changed to "%s" folder' % folder_path) - - try: - f.retrbinary('RETR %s' % file_name, - open(file_name, 'wb').write) - except ftplib.error_perm: - print('ERROR: cannot read file "%s"' % file_name) - os.unlink(file_name) - else: - print('*** Downloaded "%s" to CWD' % file_name) - f.quit() - - return[file_name] - -def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name): - data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) - data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', []) - data_manager_dict['data_tables'][data_table_name].append(data_table_entry) - return data_manager_dict - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases') - parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to') - parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)') - args = parser.parse_args() - - output_directory = args.output_directory - if not os.path.exists(output_directory): - os.mkdir(output_directory) - - outfile=get_refseq_rrna(args.rrna) - - - data_manager_dict = {} - _add_data_table_entry(data_manager_dict=data_manager_dict, - data_table_entry=dict(value="ganon"+args.rrna, dbkey="gannon"+args.rrna, name=args.rrna, path=args.output_directory), - data_table_name='ganon_databases') - open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True)) -