changeset 1:d9032bb158b9 draft

Add in data manager for CEAS annotation databases.
author pjbriggs
date Wed, 28 Jan 2015 05:04:46 -0500
parents 8b25779ee261
children 695d61decd06
files data_manager/data_manager_ceas_fetch_annotations.py data_manager/data_manager_ceas_fetch_annotations.xml data_manager_conf.xml
diffstat 3 files changed, 170 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_ceas_fetch_annotations.py	Wed Jan 28 05:04:46 2015 -0500
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+#
+
+import sys
+import os
+import optparse
+import urllib2
+import gzip
+
+from galaxy.util.json import from_json_string, to_json_string
+
+# Download file from specified URL and put into local subdir
+
+if __name__ == '__main__':
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('--download',dest='url',action='store',
+                      type="string",default=None,help='URL to download')
+    options,args = parser.parse_args()
+    print "options: %s" % options
+    print "args   : %s" % args
+    if len(args) != 2:
+        p.error("Need to supply JSON file name and description text")
+
+    # Read the JSON supplied from the data manager tool
+    # Results from this program will be returned via the
+    # same file
+    jsonfile = args[0]
+    params = from_json_string(open(jsonfile).read() )
+    print "%s" % params
+
+    # Extract the data from the input JSON
+    # See https://wiki.galaxyproject.org/Admin/Tools/DataManagers/HowTo/Define?highlight=%28\bAdmin%2FTools%2FDataManagers\b%29
+    # for example of JSON
+    #
+    # We want the values set in the data manager XML
+    dbkey = params['param_dict']['dbkey']
+    description = params['param_dict']['description']
+    # Where to put the output file
+    # Nb we have to make this ourselves, it doesn't exist by default
+    target_dir = params['output_data'][0]['extra_files_path']
+    os.mkdir(target_dir)
+
+    # Dictionary for returning to data manager
+    data_manager_dict = {}
+
+    # Download from URL
+    if options.url is not None:
+        print "Downloading: %s" % options.url
+        annotation_file_name = os.path.basename(options.url)
+        annotation_file_path = os.path.join(target_dir,annotation_file_name)
+        print "Annotation file name: %s" % annotation_file_name
+        print "Annotation file path: %s" % annotation_file_path
+        open(annotation_file_path,'wb').write(urllib2.urlopen(options.url).read())
+        if annotation_file_name.endswith('.gz'):
+            # Uncompress
+            uncompressed_file = annotation_file_path[:-3]
+            open(uncompressed_file,'wb').write(gzip.open(annotation_file_path,'rb').read())
+            # Remove gzipped file
+            os.remove(annotation_file_path)
+            annotation_file_name = os.path.basename(uncompressed_file)
+            annotation_file_path = uncompressed_file
+        # Update the output dictionary
+        data_manager_dict['data_tables'] = dict()
+        data_manager_dict['data_tables']['ceas_annotations'] = {
+            'dbkey': dbkey,
+            'name': description,
+            'value': annotation_file_name,
+        }
+    else:
+        raise NotImplementedError("Non-download options not implemented")
+
+    #save info to json file
+    open(jsonfile,'wb').write(to_json_string(data_manager_dict))
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_ceas_fetch_annotations.xml	Wed Jan 28 05:04:46 2015 -0500
@@ -0,0 +1,73 @@
+<tool id="data_manager_fetch_ceas_annotations" name="Fetch CEAS annotation" version="0.0.1" tool_type="manage_data">
+    <description>Fetch and install annotation databases for CEAS</description>
+    <command interpreter="python">data_manager_ceas_fetch_annotations.py
+    #if str( $reference_source.reference_source_selector ) == "ceas_web"
+      --download=$reference_source.annotation_url
+    #end if
+    "${out_file}"
+    "${description}"</command>
+    <inputs>
+        <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" />
+        <param type="text" name="description" value="" label="Description of annotation" />
+        <conditional name="reference_source">
+          <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
+            <option value="ceas_web" selected="True">CEAS website</option>
+	    <!-- Not implemented for now
+            <option value="history">History</option>
+	    -->
+            <option value="directory">Directory on Server</option>
+          </param>
+          <when value="ceas_web">
+            <param type="select" name="annotation_url">
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/hg18.refGene.gz">hg18</option>
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/hg19.refGene.gz">hg19</option>
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/mm8.refGene.gz">mm8</option>
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/mm9.refGene.gz">mm9</option>
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/dm2.refGene.gz">dm2</option>
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/dm3.refGene.gz">dm3</option>
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/ce4.refGene.gz">ce4</option>
+	      <option value="http://liulab.dfci.harvard.edu/CEAS/src/ce6.refGene.gz">ce6</option>
+	    </param>
+          </when>
+	  <!-- Not implemented for now
+          <when value="history">
+            <param name="input_annotation" type="data" format="fasta" label="Annotation File" multiple="False" optional="False" />
+          </when>
+	  -->
+          <when value="directory">
+            <param type="text" name="annotation_filename" value="" label="Full path to CEAS annotation table file on disk" optional="False" />
+            <param type="boolean" name="create_symlink" truevalue="create_symlink" falsevalue="copy_file" label="Create symlink to orignal data instead of copying" checked="False" />
+          </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="dbkey" value="anoGam1"/>
+            <param name="sequence_name" value=""/>
+            <param name="sequence_desc" value=""/>
+            <param name="sequence_id" value=""/>
+            <param name="reference_source_selector" value="history"/>
+            <param name="input_fasta" value="phiX174.fasta"/>
+            <param name="sort_selector" value="as_is"/>
+            <output name="out_file" file="phiX174_as_anoGam1.data_manager_json"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Fetches an annotation database from the CEAS website, Galaxy history, or a server directory
+and populates the "ceas_annotations" data table.
+
+------
+
+
+
+.. class:: infomark
+
+**Notice:** If you leave name, description, or id blank, it will be generated automatically. 
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Wed Jan 28 05:04:46 2015 -0500
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_ceas_fetch_annotations.xml" id="ceas_fetch_annotations">
+        <data_table name="ceas_annotations">
+            <output>
+	      <!--
+              <column name="value" />
+	      -->
+              <column name="dbkey" />
+              <column name="name" />
+              <column name="value" output_ref="out_file" >
+                <move type="file">
+                  <source>${value}</source>
+                  <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">ceas/${dbkey}/${value}</target>
+                </move>
+                <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/ceas/${dbkey}/${value}</value_translation>
+                <value_translation type="function">abspath</value_translation>
+              </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>