# HG changeset patch # User iuc # Date 1520874463 14400 # Node ID 4ea70130da21a3b0e96bcab1555d8c1034901e89 Uploaded diff -r 000000000000 -r 4ea70130da21 .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Mon Mar 12 13:07:43 2018 -0400 @@ -0,0 +1,19 @@ +categories: +- Data Managers +description: | + Motif database downloader for the MEME Suite, which allows the biologist to discover novel motifs in collections + of unaligned nucleotide or protein sequences, and to perform a wide variety of other motif-based analyses. +homepage_url: http://meme-suite.org/meme_4.11.3/db/motifs +long_description: | + Motif database downloader for the MEME Suite, which supports motif-based analysis of DNA, RNA and protein sequences. + It provides motif discovery algorithms using both probabilistic (MEME) and discrete models (MEME), which have + complementary strengths. It also allows discovery of motifs with arbitrary insertions and deletions (GLAM2). + In addition to motif discovery, the MEME Suite provides tools for scanning sequences for matches to motifs (FIMO, + MAST and GLAM2Scan), scanning for clusters of motifs (MCAST), comparing motifs to known motifs (Tomtom), finding + preferred spacings between motifs (SpaMo), predicting the biological roles of motifs (GOMo), measuring the positional + enrichment of sequences for known motifs (CentriMo), and analyzing ChIP-seq and other large datasets (MEME-ChIP). + The MEME Suite is comprised of a collection of tools that work together. +name: data_manager_meme_motif_database_downloader +owner: iuc +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_meme_motif_database_downloader +type: unrestricted diff -r 000000000000 -r 4ea70130da21 data_manager/data_manager_meme_motif_databases_downloader.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_meme_motif_databases_downloader.py Mon Mar 12 13:07:43 2018 -0400 @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# +# Data manager for downloading MEME Motif Databases. +import argparse +import json +import os +import shutil +import sys +import tarfile +import urllib2 +import zipfile + +DEFAULT_DATA_TABLE_NAMES = ["meme_motif_databases"] + +parser = argparse.ArgumentParser() +parser.add_argument('--description', dest='description', default=None, help='Description') +parser.add_argument('--name', dest='name', help='Data table entry unique ID') +parser.add_argument('--out_file', dest='out_file', help='JSON output file') +parser.add_argument('--web_url', dest='web_url', help='URL for downloading MEME motif databases') + +args = parser.parse_args() + +def add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) + data_manager_dict['data_tables'][data_table_name].append(data_table_entry) + return data_manager_dict + + +def make_directory(dir): + if not os.path.exists(dir): + os.makedirs(dir) + + +def remove_directory(dir): + if os.path.exists(dir): + shutil.rmtree(dir) + + +def extract_archive(file_path, work_directory): + if tarfile.is_tarfile(file_path): + fh = tarfile.open(file_path, 'r:*') + elif zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(work_directory) + + +def move_files(source_directory, target_directory): + # Move the files into defined output directory. + for filename in os.listdir(source_directory): + shutil.move(os.path.join(source_directory, filename), target_directory) + + +def url_download(url, work_directory): + file_path = os.path.join(work_directory, os.path.basename(url)) + src = None + dst = None + try: + req = urllib2.Request(url) + src = urllib2.urlopen(req) + dst = open(file_path, 'wb') + while True: + chunk = src.read(2**10) + if chunk: + dst.write(chunk) + else: + break + except Exception, e: + print >>sys.stderr, str(e) + finally: + if src: + src.close() + if dst: + dst.close() + return file_path + + +def download(target_directory, web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): + data_manager_dict = {} + data_table_entry = {} + # Download the databases. + work_directory = os.path.abspath(os.path.join(os.getcwd(), 'meme_motif_databases')) + make_directory(work_directory) + file_path = url_download(web_url, work_directory) + extract_archive(file_path, work_directory) + os.remove(file_path) + # Move the database files into the defined output directory. + move_files(work_directory, target_directory) + remove_directory(work_directory) + # Populate the data_manager_dict with the database data entry. + for file_path in os.listdir(target_directory): + full_path = os.path.abspath(os.path.join(target_directory, file_path)) + entry_name = "%s" % os.path.basename(file_path) + data_table_entry['value'] = entry_name + data_table_entry['name'] = entry_name + data_table_entry['path'] = full_path + data_table_entry['description'] = description + # Populate the data_manager_dict. + for data_table_name in data_table_names: + data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) + return data_manager_dict + +params = json.loads(open(args.out_file).read()) +target_directory = params['output_data'][0]['extra_files_path'] +make_directory(target_directory) + +if args.description is None: + description = '' +else: + description = args.description.strip() + +# Get the databases. +data_manager_dict = download(target_directory, args.web_url, description) +# Write the JSON output dataset. +fh = open(args.out_file, 'wb') +fh.write(json.dumps(data_manager_dict)) +fh.close() diff -r 000000000000 -r 4ea70130da21 data_manager/data_manager_meme_motif_databases_downloader.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_meme_motif_databases_downloader.xml Mon Mar 12 13:07:43 2018 -0400 @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What it does** + +This tool fetches MEME motif databases used by some of the MEME Galaxy tools and populates the meme_motif_databases +data table. The databases can be imported using a URL, and an optional description can be provided that will appear +next to the database file name in the data table entry. + +MEME Motif Databases can be downloaded using this URL: + + * December, 2017: http://meme-suite.org/meme-software/Databases/motifs/motif_databases.12.17.tgz + + + + + 10.1093/bioinformatics/btr064 + + + diff -r 000000000000 -r 4ea70130da21 data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Mon Mar 12 13:07:43 2018 -0400 @@ -0,0 +1,20 @@ + + + + + + + + + + ${path} + meme_motif_databases/${value} + + ${GALAXY_DATA_MANAGER_DATA_PATH}/meme_motif_databases/${value} + abspath + + + + + + diff -r 000000000000 -r 4ea70130da21 tool-data/meme_motif_database.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/meme_motif_database.loc.sample Mon Mar 12 13:07:43 2018 -0400 @@ -0,0 +1,3 @@ +## MEME Motif Databases +#Value Name Path Description +#2017_12 2017_12 /meme_motif_databases/2017_12 December 2017 diff -r 000000000000 -r 4ea70130da21 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Mar 12 13:07:43 2018 -0400 @@ -0,0 +1,6 @@ + + + value, name, path, description + +
+