Mercurial > repos > devteam > data_manager_fetch_ncbi_taxonomy
changeset 0:078e803565cf draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/blob/master/data_managers/data_manager_fetch_ncbi_taxonomy/ commit 86cf90107482cab1cb47fc0d42d6705f8077daa7
author | devteam |
---|---|
date | Fri, 06 Nov 2015 14:16:28 -0500 |
parents | |
children | 8dd4076813ef |
files | data_manager/data_manager.py data_manager/ncbi_taxonomy_fetcher.xml data_manager_conf.xml tool-data/ncbi_taxonomy.loc.sample tool_data_table_conf.xml.sample |
diffstat | 4 files changed, 119 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager.py Fri Nov 06 14:16:28 2015 -0500 @@ -0,0 +1,68 @@ +import argparse +import datetime +import json +import os +import shutil +import sys +import tarfile +import urllib2 +import zipfile + +parser = argparse.ArgumentParser(description='Create data manager json.') +parser.add_argument('--out', dest='output', action='store', help='JSON filename') +parser.add_argument('--name', dest='name', action='store', default=str(datetime.date.today()), help='Data table entry unique ID') +parser.add_argument('--url', dest='url', action='store', default='ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', help='Download URL') + +args = parser.parse_args() + +def url_download(url, workdir): + file_path = os.path.join(workdir, 'download.dat') + if not os.path.exists(workdir): + os.makedirs(workdir) + src = None + dst = None + try: + req = urllib2.Request(url) + src = urllib2.urlopen(req) + dst = open(file_path, 'wb') + while True: + chunk = src.read(2**10) + if chunk: + dst.write(chunk) + else: + break + except Exception, e: + print >>sys.stderr, str(e) + finally: + if src: + src.close() + if dst: + dst.close() + if tarfile.is_tarfile(file_path): + fh = tarfile.open(file_path, 'r:*') + elif zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(workdir) + os.remove(file_path) + + +def main(args): + workdir = os.path.join(os.getcwd(), 'taxonomy') + url_download(args.url, workdir) + data_manager_entry = {} + data_manager_entry['value'] = args.name.lower() + data_manager_entry['name'] = args.name + data_manager_entry['path'] = '.' + data_manager_json = dict(data_tables=dict(ncbi_taxonomy=data_manager_entry)) + params = json.loads(open(args.output).read()) + target_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(target_directory) + output_path = os.path.abspath(os.path.join(os.getcwd(), 'taxonomy')) + for filename in os.listdir(workdir): + shutil.move(os.path.join(output_path, filename), target_directory) + file(args.output, 'w').write(json.dumps(data_manager_json)) + +if __name__ == '__main__': + main(args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/ncbi_taxonomy_fetcher.xml Fri Nov 06 14:16:28 2015 -0500 @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<tool id="ncbi_taxonomy_fetcher" name="NCBI" tool_type="manage_data" version="1.0.0"> + <description>taxonomy downloader</description> + <stdio> + <exit_code description="Error" level="fatal" range="1:" /> + </stdio> + <command interpreter="python"> + <![CDATA[ + data_manager.py --out "${out_file}" + #if $taxonomy_url: + --url "${taxonomy_url}" + #end if + #if $database_name: + --name "${database_name}" + #end if + ]]> + </command> + <inputs> + <param help="Enter a unique identifier, or leave blank for today's date" label="Name for this database" name="database_name" type="text" optional="True" /> + <param label="Enter URL for taxonomy files" name="taxonomy_url" type="text" /> + </inputs> + <outputs> + <data format="data_manager_json" name="out_file" /> + </outputs> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Fri Nov 06 14:16:28 2015 -0500 @@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/ncbi_taxonomy_fetcher.xml" id="ncbi_taxonomy_fetcher" version="1.0.0"> + <data_table name="ncbi_taxonomy"> + <output> + <column name="value" /> + <column name="name" /> + <column name="path" output_ref="out_file"> + <move type="directory" relativize_symlinks="True"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">ncbi_taxonomy/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/ncbi_taxonomy/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Nov 06 14:16:28 2015 -0500 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of taxonomy data downloaded from NCBI --> + <table name="ncbi_taxonomy" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/ncbi_taxonomy.loc" /> + </table> +</tables>