# HG changeset patch # User chrisw # Date 1573532146 18000 # Node ID 87dd7ef3887729f268f19965cc429c3997fb09b0 Uploaded diff -r 000000000000 -r 87dd7ef38877 data_manager_monorail_index/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/README.md Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,2 @@ +# monorail_dm_galaxy +Index DataManager for Monorail pipeline diff -r 000000000000 -r 87dd7ef38877 data_manager_monorail_index/data_manager/data_manager.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager/data_manager.py Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,37 @@ +#based off of "Gene Annotation Fetcher" https://testtoolshed.g2.bx.psu.edu/repository?repository_id=c0f5466df187cc04&changeset_revision=444300ec9185 +import argparse +import datetime +import json +import os +import shutil +import sys +import tarfile +import urllib2 +import zipfile + +parser = argparse.ArgumentParser(description='Create data manager json.') +parser.add_argument('--out', dest='output', action='store', help='JSON filename') +parser.add_argument('--name', dest='name', action='store', default=None, help='Data table entry unique ID') +parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/ath10") + +args = parser.parse_args() + +def main(args): + workdir = os.path.join(os.getcwd(), 'monorail_index') + data_manager_entry = {} + if args.name is None: + args.name = args.url.split('/')[-1] + data_manager_entry['value'] = args.name.lower() + data_manager_entry['name'] = args.name + data_manager_entry['path'] = args.output + data_manager_json = dict(data_tables=dict(monorail_index=data_manager_entry)) + params = json.loads(open(args.output).read()) + target_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(target_directory) + output_path = os.path.abspath(os.path.join(os.getcwd(), 'monorail_index')) + for filename in os.listdir(workdir): + shutil.move(os.path.join(output_path, filename), target_directory) + file(args.output, 'w').write(json.dumps(data_manager_json)) + +if __name__ == '__main__': + main(args) diff -r 000000000000 -r 87dd7ef38877 data_manager_monorail_index/data_manager/download_and_extract_monorail_index.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager/download_and_extract_monorail_index.sh Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,20 @@ +#!/bin/bash + +target_dir=$1 +#e.g. http://snaptron.cs.jhu.edu/data/monorail/ath10 or http://snaptron.cs.jhu.edu/data/monorail/hg38 +ref=$2 + +if [[ -z $ref ]]; then + ref='http://snaptron.cs.jhu.edu/data/monorail/ath10' +fi + +url=$ref +ref=$(basename $url) + +mkdir -p ${target_dir}/${ref} +for f in gtf.tar.gz unmapped_hisat2_idx.tar.gz salmon_index.tar.gz star_idx.tar.gz ; do + curl ${url}/${f} > ${target_dir}/${ref}/${f} + pushd ${target_dir}/${ref} + tar -zxvf ${f} + popd +done diff -r 000000000000 -r 87dd7ef38877 data_manager_monorail_index/data_manager/monorail_index_fetcher.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager/monorail_index_fetcher.xml Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,28 @@ + + + monorail indexes fetcher + + + + + + + + + + + + diff -r 000000000000 -r 87dd7ef38877 data_manager_monorail_index/data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager_conf.xml Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff -r 000000000000 -r 87dd7ef38877 data_manager_monorail_index/tool-data/monorail_index.loc.sample diff -r 000000000000 -r 87dd7ef38877 data_manager_monorail_index/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/tool_data_table_conf.xml.sample Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,8 @@ + + + + + value, dbkey, name, path + +
+
\ No newline at end of file