changeset 18:402c6528fb6f draft

Uploaded
author chrisw
date Thu, 14 Nov 2019 02:12:39 +0000
parents aa1eda63b53d
children e7d440a3b439
files data_manager_monorail_index/data_manager/data_manager.py data_manager_monorail_index/data_manager/monorail_index_fetcher.xml data_manager_monorail_index/data_manager_conf.xml data_manager_monorail_index/tool-data/monorail_index.loc.sample
diffstat 4 files changed, 52 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager_monorail_index/data_manager/data_manager.py	Wed Nov 13 23:15:07 2019 +0000
+++ b/data_manager_monorail_index/data_manager/data_manager.py	Thu Nov 14 02:12:39 2019 +0000
@@ -10,28 +10,33 @@
 import zipfile
 
 parser = argparse.ArgumentParser(description='Create data manager json.')
-parser.add_argument('--out', dest='output', action='store', help='JSON filename')
+parser.add_argument('--config', dest='config', action='store', help='JSON filename')
 parser.add_argument('--name', dest='name', action='store', default=None, help='Data table entry unique ID')
-parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/ath10")
+parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/1.0.0/ath10")
+parser.add_argument('--subdir', dest='subdir', action='store', help='subdirectory where the indexes are temporarily downloaded to')
+#parser.add_argument('--version', dest='version', action='store', help='Version of Monorail as a whole (not a particular index/program)')
 
 args = parser.parse_args()
 
 def main(args):
     workdir = os.path.join(os.getcwd(), 'monorail_index')
     data_manager_entry = {}
-    ref = args.url.split('/')[-1]
-    #data_manager_entry['dbkey'] = 'mrail.'+ref
+    #URL syntax assumes that the last two components of the URL are: <version>/<ref>
+    #e.g. 1.0.0/ath10
+    url_comps = args.url.split('/')
+    ref = url_comps[-1]
+    version = url_comps[-2]
     data_manager_entry['dbkey'] = ref.lower()
-    data_manager_entry['value'] = 'v'+ref.lower()
-    jsonin = open(args.output).read()
+    jsonin = open(args.config).read()
     params = json.loads(jsonin)
-    target_directory = params['output_data'][0]['extra_files_path']
+    #target_directory = params['output_data'][0]['extra_files_path']
     #data_manager_entry['path'] = params['output_data'][0]['extra_files_path']
-    data_manager_entry['path'] = target_directory
-    data_manager_entry['name'] = 'mrail.'+ref
+    data_manager_entry['path'] = args.subdir
+    data_manager_entry['exons_path'] = args.subdir
+    data_manager_entry['version'] = args.version
     #data_manager_entry['exons_path'] = data_manager_entry['path'] + os.sep + 'gtf' + os.sep + 'exons.bed'
-    data_manager_json = dict(data_tables={'monorail_index': [data_manager_entry]})
-    file(args.output, 'wb').write(json.dumps(data_manager_json))
+    data_manager_json = {'data_tables':{'monorail_index': [data_manager_entry]}}
+    file(args.output, 'w').write(json.dumps(data_manager_json))
 
 if __name__ == '__main__':
     main(args)
--- a/data_manager_monorail_index/data_manager/monorail_index_fetcher.xml	Wed Nov 13 23:15:07 2019 +0000
+++ b/data_manager_monorail_index/data_manager/monorail_index_fetcher.xml	Thu Nov 14 02:12:39 2019 +0000
@@ -10,13 +10,13 @@
         #set $subdir = os.path.basename($target_directory)
         mkdir '${target_directory}' &&
         bash '${__tool_directory__}/download_and_extract_monorail_index.sh' '${target_directory}' '${monorail_index_url}' &&
-        python '${__tool_directory__}/data_manager.py' --out "${out_file}"
+        python '${__tool_directory__}/data_manager.py' --config "${out_file}" --subdir ${subdir}
         #if $monorail_index_url:
             --url "${monorail_index_url}"
         #end if
         ]]></command>
     <inputs>
-        <param label="Enter base URL for Monorail index directories (e.g. http://snaptron.cs.jhu.edu/data/monorail/ath10)" name="monorail_index_url" type="text" />
+        <param label="Enter base URL for Monorail index directories (e.g. http://snaptron.cs.jhu.edu/data/monorail/1.0.0/ath10)" name="monorail_index_url" type="text" />
     </inputs>
     <outputs>
         <data format="data_manager_json" name="out_file" />
--- a/data_manager_monorail_index/data_manager_conf.xml	Wed Nov 13 23:15:07 2019 +0000
+++ b/data_manager_monorail_index/data_manager_conf.xml	Thu Nov 14 02:12:39 2019 +0000
@@ -3,14 +3,19 @@
     <data_manager tool_file="data_manager/monorail_index_fetcher.xml" id="monorail_index_fetcher" version="1.0.0">
     <data_table name="monorail_index">
             <output>
-                <column name="value" />
-                <column name="dbkey" />
+                <column name="value"/>
+                <column name="dbkey"/>
+                <column name="version"/>
                 <column name="path" output_ref="out_file">
                     <move relative_symlinks="True" type="directory">
                         <!-- no need to set source, gets taken from out_dir.extra_files_path -->
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/mri/${value}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">monorail/${version}/${dbkey}</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/{$dbkey}/mri/${value}/${path}</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/monorail/${version}/${dbkey}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+                <column name="exons_path">
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/monorail/${version}/${dbkey}/gtf/exons.bed</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
             </output>
--- a/data_manager_monorail_index/tool-data/monorail_index.loc.sample	Wed Nov 13 23:15:07 2019 +0000
+++ b/data_manager_monorail_index/tool-data/monorail_index.loc.sample	Thu Nov 14 02:12:39 2019 +0000
@@ -0,0 +1,25 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a rnastar_index2.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The rnastar_index2.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>	<with_gene_model>	<version>
+#
+#The <with_gene_model> column should be 1 or 0, indicating whether the index
+#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used)
+#or not.
+#
+#The <version> column indicates the STAR version that introduced the format of
+#the index, i.e., the oldest STAR version that could make use of the index.
+#
+#Note that STAR indices can become quite large. Consequently, it is only
+#advisable to create indices with annotations if it's known ahead of time that
+#(A) the annotations won't be frequently updated and (B) the read lengths used
+#will also rarely vary. If either of these is not the case, it's advisable to
+#create indices without annotations and then specify an annotation file and
+#maximum read length (minus 1) when running STAR.
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar	0	2.7.1a
+#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar	1	2.7.1a