Mercurial > repos > matthias > data_manager_megan_tools
changeset 0:1f839ba466da draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_megan_tools commit d7a7a198e8f8c9b95491f1520d478e7400a1f59c-dirty
author | matthias |
---|---|
date | Thu, 01 Nov 2018 12:25:53 -0400 |
parents | |
children | f7ad11d31098 |
files | data_manager/.megan_tools_fetcher.xml.swp data_manager/data_manager.py data_manager/megan_tools_fetcher.xml data_manager_conf.xml test-data/SSURef_Nr99_132_tax_silva_to_NCBI_synonyms_json tool-data/megan_tools.loc.sample tool_data_table_conf.xml.sample |
diffstat | 5 files changed, 192 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager.py Thu Nov 01 12:25:53 2018 -0400 @@ -0,0 +1,103 @@ +import argparse +import datetime +import json +import os +import shutil +import tarfile +import zipfile +try: + # For Python 3.0 and later + from urllib.request import Request, urlopen +except ImportError: + # Fall back to Python 2 imports + from urllib2 import Request, urlopen + +FILE2NAME = { + "prot_acc2tax-June2018X1.abin.zip":"Protein accession to NCBI-taxonomy (June2018X1)", + "nucl_acc2tax-June2018.abin.zip":"Nucleotide accession to NCBI-taxonomy (June2018)", + "acc2interpro-June2018X.abin.zip":"Protein accession to InterPro (June2018X)", + "acc2eggnog-Oct2016X.abin.zip":"Protein accession to eggNOG (Oct2016X)", + "acc2seed-May2015XX.abin.zip":"Protein accession to SEED (May2015XX)", + "acc2kegg-Dec2017X1-ue.abin.zip":"Protein accession to KEGG (Dec2017X1). Only for use with the Ultimate Edition of MEGAN.", + "SSURef_Nr99_132_tax_silva_to_NCBI_synonyms.map.gz":"SSURef_Nr99_132_tax_silva_to_NCBI_synonyms.map.gz", + "SSURef_NR99_128_tax_silva_to_NCBI_synonyms.map.gz":"SSURef_NR99_128_tax_silva_to_NCBI_synonyms.map.gz", + "prot_gi2tax-Aug2016X.bin.zip":"Protein accession to NCBI-taxonomy (Aug2016X)", + "nucl_gi2tax-Aug2016.bin.zip":"Nucleotide accession to NCBI-taxonomy (Aug2016)", + "gi2eggnog-June2016X.bin.zip":"Protein accession to InterPro (June2016X)", + "gi2interpro-June2016X.bin.zip":"Protein accession to eggNOG (June2016X)", + "gi2seed-May2015X.bin.zip":"Protein accession to SEED (May2015X)", + "gi2kegg-Aug2016X-ue.bin.zip":"Protein accession to KEGG (Aug2016X). Only for use with the Ultimate Edition of MEGAN." +} + +FILE2TYPE = { + "prot_acc2tax-June2018X1.abin.zip":"acc2tax", + "nucl_acc2tax-June2018.abin.zip":"acc2tax", + "acc2interpro-June2018X.abin.zip":"acc2interpro", + "acc2eggnog-Oct2016X.abin.zip":"acc2eggnog", + "acc2seed-May2015XX.abin.zip":"acc2seed", + "acc2kegg-Dec2017X1-ue.abin.zip":"acc2kegg", + "SSURef_Nr99_132_tax_silva_to_NCBI_synonyms.map.gz":"syn2taxa", + "SSURef_NR99_128_tax_silva_to_NCBI_synonyms.map.gz":"syn2taxa", + "prot_gi2tax-Aug2016X.bin.zip":"gi2tax", + "nucl_gi2tax-Aug2016.bin.zip":"gi2tax", + "gi2eggnog-June2016X.bin.zip":"gi2eggnog", + "gi2interpro-June2016X.bin.zip":"gi2interpro", + "gi2seed-May2015X.bin.zip":"gi2seed-", + "gi2kegg-Aug2016X-ue.bin.zip":"gi2kegg" +} + +def url_download(fname, workdir): + file_path = os.path.join(workdir, 'download.dat') + if not os.path.exists(workdir): + os.makedirs(workdir) + src = None + dst = None + try: + req = Request("http://ab.inf.uni-tuebingen.de/data/software/megan6/download/"+fname) + src = urlopen(req) + with open(file_path, 'wb') as dst: + while True: + chunk = src.read(2**10) + if chunk: + dst.write(chunk) + else: + break + finally: + if src: + src.close() + if zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(workdir) + os.remove(file_path) + + +def main(fname, outjson): + workdir = os.path.join(os.getcwd(), 'megan_tools') + url_download(fname, workdir) + + data_manager_entry = {} + data_manager_entry['value'] = fname.split(".")[0] + data_manager_entry['name'] = FILE2NAME[fname] + data_manager_entry['type'] = FILE2TYPE[fname] + data_manager_entry['path'] = '.' + + data_manager_json = dict(data_tables=dict(megan_tools=data_manager_entry)) + + params = json.loads(open(outjson).read()) + target_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(target_directory) + output_path = os.path.abspath(os.path.join(os.getcwd(), 'megan_tools')) + for filename in os.listdir(workdir): + shutil.move(os.path.join(output_path, filename), target_directory) + file(outjson, 'w').write(json.dumps(data_manager_json)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Create data manager json.') + parser.add_argument('--out', action='store', help='JSON filename') + parser.add_argument('--file', action='store', help='Download filename') + args = parser.parse_args() + + main(args.file, args.out)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/megan_tools_fetcher.xml Thu Nov 01 12:25:53 2018 -0400 @@ -0,0 +1,62 @@ +<?xml version="1.0"?> +<tool id="megan_tools_fetcher" name="megan tools" tool_type="manage_data" version="0.0.1"> + <description>Mapping files downloader</description> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/data_manager.py' + --out '${out_file}' + --file '$type_cond.database_name' + ]]> + </command> + <inputs> + <conditional name="type_cond"> + <param name="type_select" type="select" label="mapping type"> + <option value="ncbi_current">Mapping files for current NCBI-nr protein database (not containing GI numbers)</option> + <option value="silva">Mapping file for working with the SILVA SSU database</option> + <option value="ncbi_deprecated">Deprecated mapping files for NCBI-nr protein database (releases until August 2016, containing GI numbers):</option> + </param> + <when value="ncbi_current"> + <param name="database_name" type="select" label="mapping data"> + <option value="prot_acc2tax-June2018X1.abin.zip">Protein accession to NCBI-taxonomy (June2018X1)</option> + <option value="nucl_acc2tax-June2018.abin.zip">Nucleotide accession to NCBI-taxonomy (June2018)</option> + <option value="acc2interpro-June2018X.abin.zip">Protein accession to InterPro (June2018X)</option> + <option value="acc2eggnog-Oct2016X.abin.zip">Protein accession to eggNOG (Oct2016X)</option> + <option value="acc2seed-May2015XX.abin.zip">Protein accession to SEED (May2015XX)</option> + <option value="acc2kegg-Dec2017X1-ue.abin.zip">Protein accession to KEGG (Dec2017X1). Only for use with the Ultimate Edition of MEGAN.</option> + </param> + </when> + <when value="silva"> + <param name="database_name" type="select" label="mapping data"> + <option value="SSURef_Nr99_132_tax_silva_to_NCBI_synonyms.map.gz">SSURef_Nr99_132_tax_silva_to_NCBI_synonyms.map.gz</option> + <option value="SSURef_NR99_128_tax_silva_to_NCBI_synonyms.map.gz">SSURef_NR99_128_tax_silva_to_NCBI_synonyms.map.gz</option> + </param> + </when> + <when value="ncbi_deprecated"> + <param name="database_name" type="select" label="mapping data"> + <option value="prot_gi2tax-Aug2016X.bin.zip">Protein accession to NCBI-taxonomy (Aug2016X)</option> + <option value="nucl_gi2tax-Aug2016.bin.zip">Nucleotide accession to NCBI-taxonomy (Aug2016)</option> + <option value="gi2eggnog-June2016X.bin.zip">Protein accession to InterPro (June2016X)</option> + <option value="gi2interpro-June2016X.bin.zip">Protein accession to eggNOG (June2016X)</option> + <option value="gi2seed-May2015X.bin.zip">Protein accession to SEED (May2015X)</option> + <option value="gi2kegg-Aug2016X-ue.bin.zip">Protein accession to KEGG (Aug2016X). Only for use with the Ultimate Edition of MEGAN.</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" /> + </outputs> + <tests> + <test> + <conditional name="type_cond"> + <param name="type_select" value="silva"/> + <param name="database_name" value="SSURef_Nr99_132_tax_silva_to_NCBI_synonyms.map.gz"/> + </conditional> + <output name="out_file" file="SSURef_Nr99_132_tax_silva_to_NCBI_synonyms_json"/> + </test> + </tests> + <help> +TODO + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Thu Nov 01 12:25:53 2018 -0400 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/megan_tools_fetcher.xml" id="megan_tools_fetcher" version="0.0.1"> + <data_table name="megan_tools"> + <output> + <column name="value" /> + <column name="name" /> + <column name="type" /> + <column name="path" output_ref="out_file"> + <move type="directory" relativize_symlinks="True"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">megan_tools/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/megan_tools/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Nov 01 12:25:53 2018 -0400 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of data downloaded for the megan tools --> + <table name="megan_tools" comment_char="#"> + <columns>value, name, type, path</columns> + <file path="tool-data/megan_tools.loc" /> + </table> +</tables>