# HG changeset patch
# User estrain
# Date 1562290951 14400
# Node ID a50614a513f30ad22ee69401e4e0003989a9c056
Uploaded
diff -r 000000000000 -r a50614a513f3 data_manager/fetch_ganon.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/fetch_ganon.py Thu Jul 04 21:42:31 2019 -0400
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+import os.path
+import sys
+import ftplib
+import socket
+
+def get_refseq_rrna(rrna):
+
+ host = 'ftp.ncbi.nlm.nih.gov'
+ folder_path = 'refseq/TargetedLoci/Bacteria/'
+ file_name="bacteria."+rrna+"rRNA.fna.gz"
+
+ try:
+ f = ftplib.FTP(host)
+ except (socket.error, socket.gaierror), e:
+ print 'ERROR: cannot reach "%s"' % host
+ return
+ print '*** Connected to host "%s"' % host
+
+ try:
+ f.login()
+ except ftplib.error_perm:
+ print 'ERROR: cannot login anonymously'
+ f.quit()
+ return
+ print '*** Logged in as "anonymous"'
+
+ try:
+ f.cwd(folder_path)
+ except ftplib.error_perm:
+ print 'ERROR: cannot CD to "%s"' % folder_path
+ f.quit()
+ return
+ print '*** Changed to "%s" folder' % folder_path
+
+ try:
+ f.retrbinary('RETR %s' % file_name,
+ open(file_name, 'wb').write)
+ except ftplib.error_perm:
+ print 'ERROR: cannot read file "%s"' % file_name
+ os.unlink(file_name)
+ else:
+ print '*** Downloaded "%s" to CWD' % file_name
+ f.quit()
+
+ return[file_name]
+
+def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name):
+ data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+ data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get('ganon_databases', [])
+ data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
+ return data_manager_dict
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Download RefSeq rRNA bacterial databases')
+ parser.add_argument('--output_directory', default='/tool-data/ganon', help='Directory to write output to')
+ parser.add_argument('--rrna', help='rRNA sequences to download (5S, 16S, or 23S)')
+ args = parser.parse_args()
+
+ output_directory = args.output_directory
+ if not os.path.exists(output_directory):
+ os.mkdir(output_directory)
+
+ outfile=get_refseq_rrna(args.rrna)
+
+
+ data_manager_dict = {}
+ _add_data_table_entry(data_manager_dict=data_manager_dict,
+ data_table_entry=dict(value=args.rrna, dbkey=args.rrna, name=args.rrna, path=args.output_directory),
+ data_table_name='ganon_databases')
+ open("output_file", 'w').write(json.dumps(data_manager_dict, sort_keys=True))
+
diff -r 000000000000 -r a50614a513f3 data_manager/ganon_data_manager.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/ganon_data_manager.xml Thu Jul 04 21:42:31 2019 -0400
@@ -0,0 +1,29 @@
+
+ Fetch rRNA data from NCBI RefSeq and create ganon databases
+
+ python
+ ganon
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r a50614a513f3 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Thu Jul 04 21:42:31 2019 -0400
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r a50614a513f3 tool-data/ganon_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ganon_databases.loc.sample Thu Jul 04 21:42:31 2019 -0400
@@ -0,0 +1,8 @@
+# Tab separated with three columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - name (Galaxy shows this in the UI)
+# - dbkey (database key)
+# - path (Name of the ganon DB)
+#
+#e.g.
+#ganon5S5S rRNAdbkey/path/to/ganonDB/5S
diff -r 000000000000 -r a50614a513f3 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Jul 04 21:42:31 2019 -0400
@@ -0,0 +1,6 @@
+
+
+ value, name, dbkey, path
+
+
+