Mercurial > repos > chrisw > data_manager_monorail_index_fetcher
changeset 0:87dd7ef38877 draft
Uploaded
author | chrisw |
---|---|
date | Mon, 11 Nov 2019 23:15:46 -0500 |
parents | |
children | 014bed8546e9 |
files | data_manager_monorail_index/README.md data_manager_monorail_index/data_manager/data_manager.py data_manager_monorail_index/data_manager/download_and_extract_monorail_index.sh data_manager_monorail_index/data_manager/monorail_index_fetcher.xml data_manager_monorail_index/data_manager_conf.xml data_manager_monorail_index/tool-data/monorail_index.loc.sample data_manager_monorail_index/tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 110 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/README.md Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,2 @@ +# monorail_dm_galaxy +Index DataManager for Monorail pipeline
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager/data_manager.py Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,37 @@ +#based off of "Gene Annotation Fetcher" https://testtoolshed.g2.bx.psu.edu/repository?repository_id=c0f5466df187cc04&changeset_revision=444300ec9185 +import argparse +import datetime +import json +import os +import shutil +import sys +import tarfile +import urllib2 +import zipfile + +parser = argparse.ArgumentParser(description='Create data manager json.') +parser.add_argument('--out', dest='output', action='store', help='JSON filename') +parser.add_argument('--name', dest='name', action='store', default=None, help='Data table entry unique ID') +parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/ath10") + +args = parser.parse_args() + +def main(args): + workdir = os.path.join(os.getcwd(), 'monorail_index') + data_manager_entry = {} + if args.name is None: + args.name = args.url.split('/')[-1] + data_manager_entry['value'] = args.name.lower() + data_manager_entry['name'] = args.name + data_manager_entry['path'] = args.output + data_manager_json = dict(data_tables=dict(monorail_index=data_manager_entry)) + params = json.loads(open(args.output).read()) + target_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(target_directory) + output_path = os.path.abspath(os.path.join(os.getcwd(), 'monorail_index')) + for filename in os.listdir(workdir): + shutil.move(os.path.join(output_path, filename), target_directory) + file(args.output, 'w').write(json.dumps(data_manager_json)) + +if __name__ == '__main__': + main(args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager/download_and_extract_monorail_index.sh Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,20 @@ +#!/bin/bash + +target_dir=$1 +#e.g. http://snaptron.cs.jhu.edu/data/monorail/ath10 or http://snaptron.cs.jhu.edu/data/monorail/hg38 +ref=$2 + +if [[ -z $ref ]]; then + ref='http://snaptron.cs.jhu.edu/data/monorail/ath10' +fi + +url=$ref +ref=$(basename $url) + +mkdir -p ${target_dir}/${ref} +for f in gtf.tar.gz unmapped_hisat2_idx.tar.gz salmon_index.tar.gz star_idx.tar.gz ; do + curl ${url}/${f} > ${target_dir}/${ref}/${f} + pushd ${target_dir}/${ref} + tar -zxvf ${f} + popd +done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager/monorail_index_fetcher.xml Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,28 @@ +<?xml version="1.0"?> +<tool id="monorail_index_fetcher" name="Monorail Index Fetch" tool_type="manage_data" version="1.0.0"> + <description>monorail indexes fetcher</description> + <stdio> + <exit_code description="Error" level="fatal" range="1:" /> + </stdio> + <command><![CDATA[ + #import os + #set $target_directory = str($out_file.extra_files_path) + #set $subdir = os.path.basename($target_directory) + mkdir '${target_directory}' && + bash '${__tool_directory__}/download_and_extract_monorail_index.sh' '${target_directory}' '${monorail_index_url}' && + python data_manager.py --out "${out_file}" + #if $monorail_index_url: + --url "${monorail_index_url}" + #end if + #if $index_name: + --name "${index_name}" + #end if + ]]></command> + <inputs> + <param help="Enter a unique identifier, or leave blank to use the reference ID at end of base URL below" label="Name for this Monorail index set" name="index_name" type="text" optional="True" /> + <param label="Enter base URL for Monorail index directories (e.g. http://snaptron.cs.jhu.edu/data/monorail/ath10)" name="monorail_index_url" type="text" /> + </inputs> + <outputs> + <data format="data_manager_json" name="out_file" /> + </outputs> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/data_manager_conf.xml Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<data_managers> + + <data_manager tool_file="data_manager/monorail_index_fetcher.xml" id="monorail_index_fetcher" version="1.0.0"> + <data_table name="monorail_index"> + <output> + <column name="value" /> + <column name="dbkey" /> + <column name="name" /> + <column name="path"/> + </output> + </data_table> + </data_manager> + +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_monorail_index/tool_data_table_conf.xml.sample Mon Nov 11 23:15:46 2019 -0500 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of gene annotation data --> + <table name="gene_annotation" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/gene_annotation.loc" /> + </table> +</tables> \ No newline at end of file