changeset 0:4ea70130da21 draft

Uploaded
author iuc
date Mon, 12 Mar 2018 13:07:43 -0400
parents
children c8f5faccb7ae
files .shed.yml data_manager/data_manager_meme_motif_databases_downloader.py data_manager/data_manager_meme_motif_databases_downloader.xml data_manager_conf.xml tool-data/meme_motif_database.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 212 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Mon Mar 12 13:07:43 2018 -0400
@@ -0,0 +1,19 @@
+categories:
+- Data Managers
+description: |
+  Motif database downloader for the MEME Suite, which allows the biologist to discover novel motifs in collections
+  of unaligned nucleotide or protein sequences, and to perform a wide variety of other motif-based analyses.
+homepage_url: http://meme-suite.org/meme_4.11.3/db/motifs
+long_description: |
+  Motif database downloader for the MEME Suite, which supports motif-based analysis of DNA, RNA and protein sequences.
+  It provides motif discovery algorithms using both probabilistic (MEME) and discrete models (MEME), which have
+  complementary strengths.  It also allows discovery of motifs with arbitrary insertions and deletions (GLAM2).
+  In addition to motif discovery, the MEME Suite provides tools for scanning sequences for matches to motifs (FIMO,
+  MAST and GLAM2Scan), scanning for clusters of motifs (MCAST), comparing motifs to known motifs (Tomtom), finding
+  preferred spacings between motifs (SpaMo), predicting the biological roles of motifs (GOMo), measuring the positional
+  enrichment of sequences for known motifs (CentriMo), and analyzing ChIP-seq and other large datasets (MEME-ChIP).
+  The MEME Suite is comprised of a collection of tools that work together.
+name: data_manager_meme_motif_database_downloader
+owner: iuc
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_meme_motif_database_downloader
+type: unrestricted
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_meme_motif_databases_downloader.py	Mon Mar 12 13:07:43 2018 -0400
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+#
+# Data manager for downloading MEME Motif Databases.
+import argparse
+import json
+import os
+import shutil
+import sys
+import tarfile
+import urllib2
+import zipfile
+
+DEFAULT_DATA_TABLE_NAMES = ["meme_motif_databases"]
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--description', dest='description', default=None, help='Description')
+parser.add_argument('--name', dest='name', help='Data table entry unique ID')
+parser.add_argument('--out_file', dest='out_file', help='JSON output file')
+parser.add_argument('--web_url', dest='web_url', help='URL for downloading MEME motif databases')
+
+args = parser.parse_args()
+
+def add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
+    data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+    data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
+    data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
+    return data_manager_dict
+
+
+def make_directory(dir):
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+
+
+def remove_directory(dir):
+    if os.path.exists(dir):
+        shutil.rmtree(dir)
+
+
+def extract_archive(file_path, work_directory):
+    if tarfile.is_tarfile(file_path):
+        fh = tarfile.open(file_path, 'r:*')
+    elif zipfile.is_zipfile(file_path):
+        fh = zipfile.ZipFile(file_path, 'r')
+    else:
+        return
+    fh.extractall(work_directory)
+
+
+def move_files(source_directory, target_directory):
+    # Move the files into defined output directory.
+    for filename in os.listdir(source_directory):
+        shutil.move(os.path.join(source_directory, filename), target_directory)
+
+
+def url_download(url, work_directory):
+    file_path = os.path.join(work_directory, os.path.basename(url))
+    src = None
+    dst = None
+    try:
+        req = urllib2.Request(url)
+        src = urllib2.urlopen(req)
+        dst = open(file_path, 'wb')
+        while True:
+            chunk = src.read(2**10)
+            if chunk:
+                dst.write(chunk)
+            else:
+                break
+    except Exception, e:
+        print >>sys.stderr, str(e)
+    finally:
+        if src:
+            src.close()
+        if dst:
+            dst.close()
+    return file_path
+
+
+def download(target_directory, web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES):
+    data_manager_dict = {}
+    data_table_entry = {}
+    # Download the databases.
+    work_directory = os.path.abspath(os.path.join(os.getcwd(), 'meme_motif_databases'))
+    make_directory(work_directory)
+    file_path = url_download(web_url, work_directory)
+    extract_archive(file_path, work_directory)
+    os.remove(file_path)
+    # Move the database files into the defined output directory.
+    move_files(work_directory, target_directory)
+    remove_directory(work_directory)
+    # Populate the data_manager_dict with the database data entry.
+    for file_path in os.listdir(target_directory):
+        full_path = os.path.abspath(os.path.join(target_directory, file_path))
+        entry_name = "%s" % os.path.basename(file_path)
+        data_table_entry['value'] = entry_name
+        data_table_entry['name'] = entry_name
+        data_table_entry['path'] = full_path
+        data_table_entry['description'] = description
+        # Populate the data_manager_dict.
+        for data_table_name in data_table_names:
+            data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
+    return data_manager_dict
+
+params = json.loads(open(args.out_file).read())
+target_directory = params['output_data'][0]['extra_files_path']
+make_directory(target_directory)
+
+if args.description is None:
+    description = ''
+else:
+    description = args.description.strip()
+
+# Get the databases.
+data_manager_dict = download(target_directory, args.web_url, description)
+# Write the JSON output dataset.
+fh = open(args.out_file, 'wb')
+fh.write(json.dumps(data_manager_dict))
+fh.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_meme_motif_databases_downloader.xml	Mon Mar 12 13:07:43 2018 -0400
@@ -0,0 +1,45 @@
+<tool id="data_manager_meme_motif_databases_downloader" name="MEME Motif Databases Download" version="1.1.0" tool_type="manage_data">
+    <description></description>
+    <stdio>
+        <exit_code range=":-1" level="fatal" description="Error: Cannot open file" />
+        <exit_code range="1:" level="fatal" description="Error" />
+    </stdio>
+    <command>
+        <![CDATA[
+            python $__tool_directory__/data_manager_meme_motif_databases_downloader.py
+            --name '$name'
+            --description '$description'
+            --web_url '$web_url'
+            --out_file '$out_file'
+        ]]>
+    </command>
+    <inputs>
+        <param name="name" type="text" value="" label="Data table entry unique ID"/>
+        <param name="description" type="text" value="" label="Description of the data" help="Value is optional"/>
+        <param name="web_url" type="text" value="" label="URL for downloading MEME motif databases"/>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+.. class:: infomark
+
+**What it does**
+
+This tool fetches MEME motif databases used by some of the MEME Galaxy tools and populates the meme_motif_databases
+data table.  The databases can be imported using a URL, and an optional description can be provided that will appear
+next to the database file name in the data table entry.
+
+MEME Motif Databases can be downloaded using this URL:
+
+ * December, 2017:        http://meme-suite.org/meme-software/Databases/motifs/motif_databases.12.17.tgz
+
+    </help>
+    <citations>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btr064</citation>
+    </citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Mon Mar 12 13:07:43 2018 -0400
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_meme_motif_databases_downloader.xml" id="data_manager_meme_motif_databases_download" >
+        <data_table name="meme_motif_databases">
+            <output>
+                <column name="value" />
+                <column name="name"/>
+                <column name="path" output_ref="out_file">
+                    <move type="file">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">meme_motif_databases/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/meme_motif_databases/${value}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+                <column name="description" />
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/meme_motif_database.loc.sample	Mon Mar 12 13:07:43 2018 -0400
@@ -0,0 +1,3 @@
+## MEME Motif Databases
+#Value	Name	Path	Description
+#2017_12	2017_12	/meme_motif_databases/2017_12	December 2017
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Mar 12 13:07:43 2018 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="meme_motif_databases" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/meme_motif_databases.loc" />
+    </table>
+</tables>