changeset 4:48c524dc6922 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_qiime_database_downloader commit 9a8e5333c047c6a5c18ca8f139a979704644ad87-dirty
author bebatut
date Wed, 03 May 2017 12:09:20 -0400
parents 80cfdc6e9ea6
children 6c2db7877763
files data_manager/data_manager_qiime_download.py data_manager/data_manager_qiime_download.xml tool-data/qiime_rep_set.loc.sample tool-data/qiime_rep_set_aligned.loc.sample tool-data/qiime_taxonomy.loc.sample tool-data/qiime_trees.loc.sample tool_data_table_conf.xml.sample
diffstat 7 files changed, 0 insertions(+), 506 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/data_manager_qiime_download.py	Wed May 03 12:01:04 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,368 +0,0 @@
-#!/usr/bin/env python
-#
-# Data manager for reference data for the QIIME Galaxy tools
-import json
-import argparse
-import os
-import subprocess
-import sys
-import tarfile
-import zipfile
-import requests
-import ftplib
-
-
-protocol = {
-    "unite": "http",
-    "greengenes": "ftp",
-    "silva": "http",
-    "img": "ftp"
-}
-baseUrl = {
-    "unite": "http://unite.ut.ee/sh_files/sh_qiime_release_",
-    "greengenes": "greengenes.microbio.me",
-    "silva": "http://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_",
-    "img": "ftp.microbio.me"
-}
-ftp_dir = {
-    "greengenes": "/greengenes_release/gg_",
-    "img": ""
-}
-ftp_file_prefix = {
-    "greengenes": "gg_",
-    "img": ""
-}
-ftp_file_suffix = {
-    "greengenes": "_otus",
-    "img": ""
-}
-extension = {
-    "unite": "zip",
-    "greengenes": "tar.gz",
-    "silva": {
-        "104_release": "tgz",
-        "108_release": "tgz",
-        "108_release_curated": "tgz",
-        "111_release": "tgz",
-        "119_consensus_majority_taxonomy": "zip",
-        "119_release": "zip",
-        "119_release_aligned_rep_files": "tar.gz",
-        "123_release": "zip",
-        "128_release": "tgz"},
-    "img": "tgz"
-}
-filetypes = ["rep_set", "rep_set_aligned", "taxonomy", "trees"]
-
-
-# Utility functions for interacting with Galaxy JSON
-def read_input_json(jsonfile):
-    """Read the JSON supplied from the data manager tool
-
-    Returns a tuple (param_dict,extra_files_path)
-
-    'param_dict' is an arbitrary dictionary of parameters
-    input into the tool; 'extra_files_path' is the path
-    to a directory where output files must be put for the
-    receiving data manager to pick them up.
-
-    NB the directory pointed to by 'extra_files_path'
-    doesn't exist initially, it is the job of the script
-    to create it if necessary.
-
-    """
-    params = json.loads(open(jsonfile).read())
-    return (params['param_dict'],
-            params['output_data'][0]['extra_files_path'])
-
-
-# Utility functions for creating data table dictionaries
-#
-# Example usage:
-# >>> d = create_data_tables_dict()
-# >>> add_data_table(d,'my_data')
-# >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
-# >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
-# >>> print str(json.dumps(d))
-def create_data_tables_dict():
-    """Return a dictionary for storing data table information
-
-    Returns a dictionary that can be used with 'add_data_table'
-    and 'add_data_table_entry' to store information about a
-    data table. It can be converted to JSON to be sent back to
-    the data manager.
-
-    """
-    d = {}
-    d['data_tables'] = {}
-    return d
-
-
-def add_data_table(d, table):
-    """Add a data table to the data tables dictionary
-
-    Creates a placeholder for a data table called 'table'.
-
-    """
-    d['data_tables'][table] = []
-
-
-def add_data_table_entry(d, table, entry):
-    """Add an entry to a data table
-
-    Appends an entry to the data table 'table'. 'entry'
-    should be a dictionary where the keys are the names of
-    columns in the data table.
-
-    Raises an exception if the named data table doesn't
-    exist.
-
-    """
-    try:
-        d['data_tables'][table].append(entry)
-    except KeyError:
-        raise Exception("add_data_table_entry: no table '%s'" % table)
-
-
-def get_ftp_file(ftp, filename):
-    """
-    """
-    try:
-        ftp.retrbinary("RETR " + filename, open(filename, 'wb').write)
-    except:
-        print("Error")
-
-
-def download_archive(db, version):
-    """
-
-    """
-    filepath = "archive"
-    if protocol[db] == "http":
-        url = "%s%s.%s" % (baseUrl[db], version, extension[db])
-        r = requests.get(url, stream=True)
-        r.raise_for_status()
-        with open(filepath, "wb") as fd:
-            for chunk in r.iter_content(chunk_size=128):
-                fd.write(chunk)
-    elif protocol[db] == "ftp":
-        ftp = ftplib.FTP(baseUrl[db])
-        ftp.login("anonymous", "ftplib-example-1")
-        ftp.cwd("%s%s" % (ftp_dir[db], version))
-        filepath = "%s%s%s.%s" % (
-            ftp_file_prefix[db],
-            version,
-            ftp_file_suffix[db],
-            extension[db])
-        get_ftp_file(ftp, filepath)
-        ftp.quit()
-    return filepath
-
-
-def extract_archive(filepath, ext):
-    """
-    """
-    archive_content_path = "tmp"
-    if ext == "tar.gz" or ext == "tgz":
-        tar = tarfile.open(filepath)
-        tar.extractall(path=archive_content_path)
-        tar.close()
-        content = os.listdir(archive_content_path)
-        archive_content = []
-        for x in content:
-            if not x.startswith("."):
-               archive_content.append(x) 
-        if len(archive_content) == 1:
-            archive_content_path = os.path.join(
-                archive_content_path,
-                archive_content[0])
-    elif ext == "zip":
-        zip_ref = zipfile.ZipFile(filepath, 'r')
-        zip_ref.extractall(archive_content_path)
-        zip_ref.close()
-    return archive_content_path
-
-
-def move_unite_files(archive_content_path, filename_prefix, 
-name_prefix, data_tables, target_dir):
-    """
-
-    """
-    archive_content = os.listdir(archive_content_path)
-    for content in archive_content:
-        content_filepath = os.path.join(archive_content_path, content)
-        content_name_prefix = "%s - %s" % (name_prefix, content.split(".")[0])
-        content_filename_prefix = "%s_%s" % (filename_prefix, content)
-        if content.find("refs") != -1:
-            move_file(
-                content_filepath,
-                content_filename_prefix,
-                content_name_prefix,
-                data_tables,
-                os.path.join(target_dir, "rep_set"),
-                "rep_set")
-        elif content.find("taxonomy") != -1:
-            move_file(
-                content_filepath,
-                content_filename_prefix,
-                content_name_prefix,
-                data_tables,
-                os.path.join(target_dir, "taxonomy"),
-                "taxonomy")
-
-
-def move_file(input_filepath, filename, name, data_tables, target_dir,
-filetype):
-    """
-    """
-    output_filepath = os.path.join(target_dir, filename)
-    os.rename(input_filepath, output_filepath)
-    add_data_table_entry(
-        data_tables,
-        "qiime_%s" % (filetype),
-        dict(
-            dbkey=filename.split(".")[0],
-            value="1.0",
-            name=name,
-            path=output_filepath))
-
-
-def move_dir_content(input_path, filename_prefix, name_prefix, data_tables,
-target_dir, filetype):
-    """
-    """
-    for content in os.listdir(input_path):
-        if content.startswith("."):
-            continue
-        content_path = os.path.join(input_path, content)
-        content_name_prefix = "%s - %s" % (name_prefix, content.split(".")[0])
-        content_filename_prefix = "%s_%s" % (filename_prefix, content)
-        if os.path.isdir(content_path):
-            move_dir_content(
-                content_path,
-                content_filename_prefix,
-                content_name_prefix,
-                data_tables,
-                target_dir,
-                filetype)
-        else:
-            move_file(
-                content_path,
-                content_filename_prefix,
-                content_name_prefix,
-                data_tables,
-                target_dir,
-                filetype)
-
-
-def move_files(archive_content_path, filename_prefix, 
-name_prefix, data_tables, target_dir, db):
-    """
-    """
-    for filetype in filetypes:
-        filetype_target_dir = os.path.join(
-            target_dir,
-            filetype)
-        filetype_path = os.path.join(
-            archive_content_path,
-            filetype)
-        move_dir_content(
-            filetype_path,
-            filename_prefix,
-            name_prefix,
-            data_tables,
-            filetype_target_dir,
-            filetype)
-
-
-def download_db(data_tables, db, version, target_dir):
-    """Download QIIME database
-
-    Creates references to the specified file(s) on the Galaxy
-    server in the appropriate data table (determined from the
-    file extension).
-
-    The 'data_tables' dictionary should have been created using
-    the 'create_data_tables_dict' and 'add_data_table' functions.
-
-    Arguments:
-      data_tables: a dictionary containing the data table info
-      db: name of the database
-      version: version of the database
-      table_name: name of the table
-      target_dir: directory to put copy or link to the data file
-
-    """
-    ext = extension[db]
-    if db == "silva":
-        ext = ext[version]
-
-    print("Download archive")
-    filepath = download_archive(db, version)
-
-    print("Extract archive %s" % filepath)
-    archive_content_path = extract_archive(filepath, ext)
-
-    print("Moving file from %s" % archive_content_path)
-    filename_prefix = "%s_%s" % (db, version)
-    name_prefix = "%s (%s)" % (db, version)
-    if db == "greengenes" or db == "silva":
-        move_files(
-            archive_content_path,
-            filename_prefix,
-            name_prefix, 
-            data_tables,
-            target_dir,
-            db)
-    elif db == "unite":
-        move_unite_files(
-            archive_content_path,
-            filename_prefix,
-            name_prefix,
-            data_tables,
-            target_dir)
-
-
-if __name__ == "__main__":
-    print("Starting...")
-
-    # Read command line
-    parser = argparse.ArgumentParser(
-        description='Download QIIME reference database')
-    parser.add_argument('--database', help="Database name")
-    parser.add_argument('--version', help="Database version")
-    parser.add_argument('--jsonfile', help="Output JSON file")
-    args = parser.parse_args()
-
-    jsonfile = args.jsonfile
-
-    # Read the input JSON
-    params, target_dir = read_input_json(jsonfile)
-
-    # Make the target directory
-    print("Making %s" % target_dir)
-    os.mkdir(target_dir)
-    os.mkdir(os.path.join(target_dir, "rep_set"))
-    os.mkdir(os.path.join(target_dir, "rep_set_aligned"))
-    os.mkdir(os.path.join(target_dir, "taxonomy"))
-    os.mkdir(os.path.join(target_dir, "trees"))
-
-    # Set up data tables dictionary
-    data_tables = create_data_tables_dict()
-    add_data_table(data_tables, "qiime_rep_set")
-    add_data_table(data_tables, "qiime_rep_set_aligned")
-    add_data_table(data_tables, "qiime_taxonomy")
-    add_data_table(data_tables, "qiime_trees")
-
-    # Fetch data from specified data sources
-    download_db(
-        data_tables,
-        args.database,
-        args.version,
-        target_dir)
-
-    # Write output JSON
-    print("Outputting JSON")
-    print(str(json.dumps(data_tables)))
-    with open(jsonfile, 'w') as out:
-        json.dump(data_tables, out)
-    print("Done.")
\ No newline at end of file
--- a/data_manager/data_manager_qiime_download.xml	Wed May 03 12:01:04 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-<tool id="data_manager_qiime_download" name="Download QIIME" version="1.9.1" tool_type="manage_data">
-    <description>reference databases</description>
-    <requirements>
-        <requirement type="package" version="2.13.0">requests</requirement>
-    </requirements>
-    <stdio>
-        <exit_code range=":-1" level="fatal" description="Error: Cannot open file" />
-        <exit_code range="1:" level="fatal" description="Error" />
-    </stdio>
-    <command><![CDATA[
-        python '$__tool_directory__/data_manager_qiime_download.py'
-            --database '$db.database'
-            --version '$db.version'
-            --jsonfile '${out_file}'
-    ]]></command>
-    <inputs>
-        <conditional name="db">
-            <param name="database" type="select" label="Database to download">
-                <option value="greengenes" selected="true">Greengenes OTUs</option>
-                <option value="silva">SILVA OTUs (16S/18S)</option>
-                <option value="unite">UNITE OTUs (ITS)</option>
-                <!--<option value="img">IMG/QIIME reference protein sequences</option>-->
-            </param>
-            <when value="greengenes">
-                <param name="version" type="select" label="Version of Greengenes">
-                    <option value="13_8" selected="true">13.8</option>
-                    <option value="13_5">13.5</option>
-                    <option value="12_10">12.10</option>
-                </param>
-            </when>
-            <when value="silva">
-                <param name="version" type="select" label="Version of SILVA OTUs">
-                    <option value="128_release" selected="true">128</option>
-                    <option value="123_release">123</option>
-                    <option value="119_release_aligned_rep_files">119 (aligned rep)</option>
-                    <option value="119_release">119</option>
-                    <option value="119_consensus_majority_taxonomy">119 (consensus majority taxonomy)</option>
-                    <option value="111_release">111</option>
-                    <option value="108_release">108</option>
-                    <option value="108_release_curated">108 (curated)</option>
-                    <option value="108_release">108</option>
-                    <option value="104_release">104</option>
-                </param>
-            </when>
-            <when value="unite">
-                <param name="version" type="select" label="Version of UNITE OTUs">
-                    <option value="20.11.2016">7.1 (2016-11-20, with singletons set as RefS)</option>
-                    <option value="s_20.11.2016">7.1 (2016-11-20, with global and 97% singletons)</option>
-                    <option value="22.08.2016">7.1 (2016-08-22, with singletons set as RefS)</option>
-                    <option value="s_22.08.2016">7.1 (2016-08-22, with global and 97% singletons)</option>
-                    <option value="31.01.2016">7.0 (2016-01-31, with singletons set as RefS)</option> 
-                    <option value="s_31.01.2016">7.0 (2016-01-31, with global and 97% singletons)</option> 
-                    <option value="01.08.2015">7.0 (2015-08-01, with singletons set as RefS)</option>
-                    <option value="s_01.08.2015">7.0 (2015-08-01, with global and 97% singletons)</option>
-                    <option value="02.03.2015">7.0 (2015-03-02, with singletons set as RefS)</option>
-                    <option value="s_02.03.2015">7.0 (2015-03-02, with global and 97% singletons)</option>
-                    <option value="30.12.2014">6.0 (2014-12-30, with singletons set as RefS)</option>
-                    <option value="s_30.12.2014">6.0 (2014-12-30, with global and 97% singletons)</option>
-                    <option value="10.09.2014">6.0 (2014-09-10, with singletons set as RefS)</option>
-                    <option value="s_10.09.2014">6.0 (2014-09-10, with global and 97% singletons)</option>
-                    <option value="04.07.2014">6.0 (2014-07-04, with singletons set as RefS)</option>
-                    <option value="s_04.07.2014">6.0 (2014-07-04, with global and 97% singletons)</option>
-                    <option value="13.05.2014">6.0 (2014-05-13, with singletons set as RefS)</option> 
-                    <option value="s_13.05.2014">6.0 (2014-05-13, with global and 97% singletons)</option>
-                    <option value="09.02.2014">6.0 (2014-02-09, with singletons set as RefS)</option>
-                    <option value="s_09.02.2014">6.0 (2014-02-09, with global and 97% singletons)</option>
-                    <option value="15.01.2014">6.0 (2014-01-15, with singletons set as RefS)</option>
-                    <option value="s_15.01.2014">6.0 (2014-01-15, with global and 97% singletons)</option>
-                    <option value="19.12.2013">6.0 (2013-12-19, with singletons set as RefS)</option>
-                    <option value="s_19.12.2013">6.0 (2013-12-19, with global and 97% singletons)</option>
-                    <option value="08.12.2013">6.0 (2013-12-08, with singletons set as RefS)</option>
-                    <option value="s_08.12.2013">6.0 (2013-12-08, with global and 97% singletons)</option>
-                    <option value="15.10.2013">5.0 (2013-10-15, with singletons set as RefS)</option>   
-                    <option value="s_15.10.2013">5.0 (2013-10-15, with global and 97% singletons)</option>
-                </param>
-            </when>
-            <!--<when value="img">
-                <param name="version" type="select" label="Version of IMG/QIIME reference protein sequences">
-                    <option value="img-qiime-25oct2012" selected="true">img-qiime-25oct2012</option>
-                </param>
-            </when>-->
-        </conditional>
-    </inputs>
-    <outputs>
-           <data name="out_file" format="data_manager_json" label="${tool.name}"/>
-    </outputs>
-    <tests>
-    </tests>
-    <help><![CDATA[
-This tool downloads the reference databases for QIIME
-    ]]></help>
-    <citations>
-        <citation type="doi"></citation>
-        <yield />
-    </citations>
-</tool>
\ No newline at end of file
--- a/tool-data/qiime_rep_set.loc.sample	Wed May 03 12:01:04 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-#<unique_id>  <name>  <database_caption>  <fasta_file_path>
-#
-#For each reference database, you need to download the fasta file in qiime path
-#
-#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
-#
\ No newline at end of file
--- a/tool-data/qiime_rep_set_aligned.loc.sample	Wed May 03 12:01:04 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-#<unique_id>  <name>  <database_caption>  <fasta_file_path>
-#
-#For each reference database, you need to download the fasta file in qiime path
-#
-#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
-#
\ No newline at end of file
--- a/tool-data/qiime_taxonomy.loc.sample	Wed May 03 12:01:04 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-#<unique_id>  <name>  <database_caption>  <fasta_file_path>
-#
-#For each reference database, you need to download the fasta file in qiime path
-#
-#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
-#
\ No newline at end of file
--- a/tool-data/qiime_trees.loc.sample	Wed May 03 12:01:04 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-#<unique_id>  <name>  <database_caption>  <fasta_file_path>
-#
-#For each reference database, you need to download the fasta file in qiime path
-#
-#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
-#
\ No newline at end of file
--- a/tool_data_table_conf.xml.sample	Wed May 03 12:01:04 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-<tables>
-    <table name="qiime_rep_set" comment_char="#">
-        <columns>value, name, dbkey, path</columns>
-        <file path="tool-data/qiime_rep_set.loc" />
-    </table>
-    <table name="qiime_rep_set_aligned" comment_char="#">
-        <columns>value, name, dbkey, path</columns>
-        <file path="tool-data/qiime_rep_set_aligned.loc" />
-    </table>
-    <table name="qiime_taxonomy" comment_char="#">
-        <columns>value, name, dbkey, path</columns>
-        <file path="tool-data/qiime_taxonomy.loc" />
-    </table>
-    <table name="qiime_trees" comment_char="#">
-        <columns>value, name, dbkey, path</columns>
-        <file path="tool-data/qiime_trees.loc" />
-    </table>
-</tables>
\ No newline at end of file