Mercurial > repos > estrain > data_manager_ganon
changeset 0:a50614a513f3 draft
Uploaded
author | estrain |
---|---|
date | Thu, 04 Jul 2019 21:42:31 -0400 |
parents | |
children | 9de84cd78a82 |
files | data_manager/fetch_ganon.py data_manager/ganon_data_manager.xml data_manager_conf.xml tool-data/ganon_databases.loc.sample tool_data_table_conf.xml.sample |
diffstat | 5 files changed, 132 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/fetch_ganon.py Thu Jul 04 21:42:31 2019 -0400 @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import os.path +import sys +import ftplib +import socket + +def get_refseq_rrna(rrna): + + host = 'ftp.ncbi.nlm.nih.gov' + folder_path = 'refseq/TargetedLoci/Bacteria/' + file_name="bacteria."+rrna+"rRNA.fna.gz" + + try: + f = ftplib.FTP(host) + except (socket.error, socket.gaierror), e: + print 'ERROR: cannot reach "%s"' % host + return + print '*** Connected to host "%s"' % host + + try: + f.login() + except ftplib.error_perm: + print 'ERROR: cannot login anonymously' + f.quit() + return + print '*** Logged in as "anonymous"' + + try: + f.cwd(folder_path) + except ftplib.error_perm: + print 'ERROR: cannot CD to "%s"' % folder_path + f.quit() + return + print '*** Changed to "%s" folder' % folder_path + + try: + f.retrbinary('RETR %s' % file_name, + open(file_name, 'wb').write) + except ftplib.error_perm: + print 'ERROR: cannot read file "%s"' % file_name + os.unlink(file_name) + else: + print '*** Downloaded "%s" to CWD' % file_name + f.quit() + + return[file_name] + +def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', []) + data_manager_dict['data_tables'][data_table_name].append(data_table_entry) + return data_manager_dict + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases') + parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to') + parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)') + args = parser.parse_args() + + output_directory = args.output_directory + if not os.path.exists(output_directory): + os.mkdir(output_directory) + + outfile=get_refseq_rrna(args.rrna) + + + data_manager_dict = {} + _add_data_table_entry(data_manager_dict=data_manager_dict, + data_table_entry=dict(value=args.rrna, dbkey=args.rrna, name=args.rrna, path=args.output_directory), + data_table_name='ganon_databases') + open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True)) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/ganon_data_manager.xml Thu Jul 04 21:42:31 2019 -0400 @@ -0,0 +1,29 @@ +<tool id="ganon_data_manager" name="ganon data manager" version="0.0.1" tool_type="manage_data"> + <description>Fetch rRNA data from NCBI RefSeq and create ganon databases</description> + <requirements> + <requirement type="package">python</requirement> + <requirement type="package">ganon</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/fetch_ganon.py --rrna $rrna --output_directory $out_dir; + ganon build -d $rrna -i *.gz; + mv $rrna.* $out_dir; + ]]></command> + <inputs> + <param argument="rrna" type="select" label="Bacterial rRNA loci" multiple="true"> + <option value="5S">5S</option> + <option value="16S">16S</option> + <option value="23S">23S</option> + </param> + <param argument="out_dir" type="text" label="Output folder" value="/tool-data/ganon"/> + </inputs> + <outputs> + <data name="output_file" format="data_manager_json"/> + </outputs> + <help><![CDATA[ + Retrieve RefSeq Bacterial rRNA Targeted Loci + ]]> + </help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Thu Jul 04 21:42:31 2019 -0400 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/ganon_data_manager.xml" id="ganon_data_manager" version="0.0.1"> + <data_table name="ganon_databases"> + <output> + <column name="value" /> + <column name="name" /> + <column name="dbkey" /> + <column name="path" /> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ganon_databases.loc.sample Thu Jul 04 21:42:31 2019 -0400 @@ -0,0 +1,8 @@ +# Tab separated with three columns: +# - value (Galaxy records this in the Galaxy DB) +# - name (Galaxy shows this in the UI) +# - dbkey (database key) +# - path (Name of the ganon DB) +# +#e.g. +#ganon5S<tab>5S rRNA<tab>dbkey<tab>/path/to/ganonDB/5S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jul 04 21:42:31 2019 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="ganon_databases" comment_char="#"> + <columns>value, name, dbkey, path</columns> + <file path="tool-data/ganon_databases.loc" /> + </table> + </tables>