changeset 0:87dd7ef38877 draft

Uploaded
author chrisw
date Mon, 11 Nov 2019 23:15:46 -0500
parents
children 014bed8546e9
files data_manager_monorail_index/README.md data_manager_monorail_index/data_manager/data_manager.py data_manager_monorail_index/data_manager/download_and_extract_monorail_index.sh data_manager_monorail_index/data_manager/monorail_index_fetcher.xml data_manager_monorail_index/data_manager_conf.xml data_manager_monorail_index/tool-data/monorail_index.loc.sample data_manager_monorail_index/tool_data_table_conf.xml.sample
diffstat 6 files changed, 110 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_monorail_index/README.md	Mon Nov 11 23:15:46 2019 -0500
@@ -0,0 +1,2 @@
+# monorail_dm_galaxy
+Index DataManager for Monorail pipeline
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_monorail_index/data_manager/data_manager.py	Mon Nov 11 23:15:46 2019 -0500
@@ -0,0 +1,37 @@
+#based off of "Gene Annotation Fetcher" https://testtoolshed.g2.bx.psu.edu/repository?repository_id=c0f5466df187cc04&changeset_revision=444300ec9185
+import argparse
+import datetime
+import json
+import os
+import shutil
+import sys
+import tarfile
+import urllib2
+import zipfile
+
+parser = argparse.ArgumentParser(description='Create data manager json.')
+parser.add_argument('--out', dest='output', action='store', help='JSON filename')
+parser.add_argument('--name', dest='name', action='store', default=None, help='Data table entry unique ID')
+parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/ath10")
+
+args = parser.parse_args()
+
+def main(args):
+    workdir = os.path.join(os.getcwd(), 'monorail_index')
+    data_manager_entry = {}
+    if args.name is None:
+        args.name = args.url.split('/')[-1]
+    data_manager_entry['value'] = args.name.lower()
+    data_manager_entry['name'] = args.name
+    data_manager_entry['path'] = args.output
+    data_manager_json = dict(data_tables=dict(monorail_index=data_manager_entry))
+    params = json.loads(open(args.output).read())
+    target_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(target_directory)
+    output_path = os.path.abspath(os.path.join(os.getcwd(), 'monorail_index'))
+    for filename in os.listdir(workdir):
+        shutil.move(os.path.join(output_path, filename), target_directory)
+    file(args.output, 'w').write(json.dumps(data_manager_json))
+
+if __name__ == '__main__':
+    main(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_monorail_index/data_manager/download_and_extract_monorail_index.sh	Mon Nov 11 23:15:46 2019 -0500
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+target_dir=$1
+#e.g. http://snaptron.cs.jhu.edu/data/monorail/ath10 or http://snaptron.cs.jhu.edu/data/monorail/hg38
+ref=$2
+
+if [[ -z $ref ]]; then
+   ref='http://snaptron.cs.jhu.edu/data/monorail/ath10'
+fi 
+
+url=$ref
+ref=$(basename $url)
+
+mkdir -p ${target_dir}/${ref}
+for f in gtf.tar.gz unmapped_hisat2_idx.tar.gz salmon_index.tar.gz star_idx.tar.gz ; do
+    curl ${url}/${f} > ${target_dir}/${ref}/${f}
+    pushd ${target_dir}/${ref}
+    tar -zxvf ${f}
+    popd
+done 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_monorail_index/data_manager/monorail_index_fetcher.xml	Mon Nov 11 23:15:46 2019 -0500
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<tool id="monorail_index_fetcher" name="Monorail Index Fetch" tool_type="manage_data" version="1.0.0">
+    <description>monorail indexes fetcher</description>
+    <stdio>
+        <exit_code description="Error" level="fatal" range="1:" />
+    </stdio>
+    <command><![CDATA[
+        #import os
+        #set $target_directory = str($out_file.extra_files_path)
+        #set $subdir = os.path.basename($target_directory)
+        mkdir '${target_directory}' &&
+        bash '${__tool_directory__}/download_and_extract_monorail_index.sh' '${target_directory}' '${monorail_index_url}' &&
+        python data_manager.py --out "${out_file}"
+        #if $monorail_index_url:
+            --url "${monorail_index_url}"
+        #end if
+        #if $index_name:
+            --name "${index_name}"
+        #end if
+        ]]></command>
+    <inputs>
+        <param help="Enter a unique identifier, or leave blank to use the reference ID at end of base URL below" label="Name for this Monorail index set" name="index_name" type="text" optional="True" />
+        <param label="Enter base URL for Monorail index directories (e.g. http://snaptron.cs.jhu.edu/data/monorail/ath10)" name="monorail_index_url" type="text" />
+    </inputs>
+    <outputs>
+        <data format="data_manager_json" name="out_file" />
+    </outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_monorail_index/data_manager_conf.xml	Mon Nov 11 23:15:46 2019 -0500
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<data_managers>
+
+    <data_manager tool_file="data_manager/monorail_index_fetcher.xml" id="monorail_index_fetcher" version="1.0.0">
+		<data_table name="monorail_index">
+			<output>
+				<column name="value" />
+				<column name="dbkey" />
+				<column name="name" />
+				<column name="path"/>
+			</output>
+		</data_table>
+	</data_manager>
+
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_monorail_index/tool_data_table_conf.xml.sample	Mon Nov 11 23:15:46 2019 -0500
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of gene annotation data -->
+    <table name="gene_annotation" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gene_annotation.loc" />
+    </table>
+</tables>
\ No newline at end of file