changeset 0:8bb037f88ed2

Uploaded
author melissacline
date Tue, 13 Jan 2015 23:37:23 -0500
parents
children ca4510434eda
files runXena.py tool_dependencies.xml ucsc_xena_datapages.xml ucsc_xena_download.py ucsc_xena_hub.xml ucsc_xenabrowser.xml xena.jar xenaAdmin.xml xenaGetDataset.py xenaGetDataset.xml xena_backup.py xena_delete.py xena_delete.xml xena_import.py xena_import.xml xena_query.py xena_restore.py xena_utils.py xena_utils.pyc
diffstat 19 files changed, 1140 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/runXena.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+# To kick off the script, run the following from the python directory:
+#   PYTHONPATH=`pwd` python testdaemon.py start
+
+#standard python libs
+import logging
+import os
+import signal
+import subprocess
+import sys
+import time
+import traceback
+import xena_utils as xena
+
+#third party libs
+from daemon import runner
+from lockfile import LockTimeout
+
+
+class App():
+    
+    def __init__(self):
+        xenaBaseDir = xena.baseDir()
+        if not os.path.exists(xenaBaseDir):
+            os.mkdir(xenaBaseDir)
+        self.pidfile_path =  xenaBaseDir + "/xena-daemon.pid"
+        
+        self.stdin_path = '/dev/null'
+        self.stdout_path = '/dev/null'
+        self.stderr_path = '/dev/null'
+        self.pidfile_timeout = 5
+            
+    def run(self):
+        while True:
+            #Main code goes here ...
+            #Note that logger level needs to be set to logging.DEBUG before this shows up in the logs
+            xenaCmdline = "java -jar %s -r %s/files -d %s/db -t %s/tmp --logfile %s/xena.log -p %s -H 0.0.0.0 --no-auto" 
+            xenaBaseDir = xena.baseDir()
+            xenaCmd = xenaCmdline % (xena.jarPath(), xenaBaseDir, 
+                                     xenaBaseDir, xenaBaseDir, 
+                                     xenaBaseDir, xena.port())
+            logger.debug("Invoking Xena VM with command %s" % (xenaCmd))
+            xenaVm = subprocess.call(xenaCmd, shell=True)
+            logger.info("Starting Xena VM")
+            #logger.warn("Warning message")
+            #logger.error("Error message")
+
+
+
+def processListeningOnPort(portID):
+    cmd = "lsof -t -i :%s -sTCP:LISTEN" % portID
+    pid = subprocess.check_output(cmd, shell=True).rstrip()
+    return(int(pid))
+
+
+fp = open(sys.argv[2], "w")
+    
+app = App()
+logger = logging.getLogger("DaemonLog")
+logger.setLevel(logging.DEBUG)
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+handler = logging.FileHandler(xena.baseDir() + "/xena-daemon.log")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+hostname = subprocess.check_output("hostname -f", shell=True).rstrip()
+
+#
+# Check if there has been a Xena running on this system, and  
+# what its port number is or was.
+#
+xenaPort = xena.port()
+if xenaPort == None:
+    # In this case, no Xena has been running on this sytem.
+    xenaIsRunning = False
+else:
+    xenaIsRunning = xena.isRunning(xenaPort)
+#rint "xena running", xenaIsRunning, "port", xenaPort
+
+
+if sys.argv[1] == "status":
+    #
+    # When checking status, if Xena is running, then report that it's running
+    # with hostname and port.  If it's not running, then report as such.
+    #
+    if xenaIsRunning:
+        fp.write("Xena VM currently running on %s:%s\n" % (hostname, xenaPort))
+        fp.write("You can add %s:%s to Xena Data Hub\n" % (hostname, xenaPort))
+    else:
+        fp.write("Xena VM is not currently running on %s\n" % (hostname))
+
+elif sys.argv[1] == "start":
+    #
+    # When a start command came in, allocate a new port and prepare to start
+    # xena if it's not already running.  If it is already running, then 
+    # report as such.  In either case, close the output file before this
+    # process goes into daemon mode.
+    #
+    if not xenaIsRunning:
+        xenaPort = xena.port(testIfAvailable=True, findNewPort=True)
+        fp.write("Starting Xena VM on %s:%s\n" % (hostname, xenaPort))
+        fp.write("You can add %s:%s to Xena Data Hub\n" % (hostname, xenaPort))
+    else:
+        fp.write("Xena VM already running on %s:%s\n" % (hostname, xenaPort))
+        fp.write("You can add %s:%s to Xena Data Hub\n" % (hostname, xenaPort))
+    fp.close()
+
+elif sys.argv[1] == "stop":
+    #
+    # When stopping Xena, if it's currently running, report that Xena
+    # is being terminated and clean up the port.  If it's not running,
+    # report as such.  
+    #
+    if xenaIsRunning:
+        fp.write("Terminating Xena VM on %s:%s\n" % (hostname, xenaPort))
+    else:
+        fp.write("Xena VM is not currently running on %s\n" % (hostname)) 
+
+else:
+    fp.write(("Error: Unexpected command %s" % sys.argv[1]))
+
+
+#
+# Here is where the starting and stopping of the Xena daemon takes place.
+#
+if sys.argv[1] == "start" or (sys.argv[1] == "stop" and xenaIsRunning):
+    daemon_runner = runner.DaemonRunner(app)
+    # This ensures that the logger file handle does not get closed during daemonization
+    daemon_runner.daemon_context.files_preserve=[handler.stream]
+    try:
+        daemon_runner.do_action()
+    except LockTimeout:
+        # Xena is already running.  No need to do anything special, but this
+        # should be separated from the other exceptions.
+        pass
+    except:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+        allLines = ''.join('!! ' + line for line in lines)
+        fp.write("Unsuccessful: error %s\n" % allLines)
+    if sys.argv[1] == "stop":
+        #
+        # If the Xena stop command has been issued, then kill the Xena 
+        # process.
+        #
+        xenaPid = processListeningOnPort(xenaPort)
+        logger.debug("Attempting to kill process with PID %d" % xenaPid)
+        os.kill(xenaPid, signal.SIGTERM)
+if sys.argv[1] != "start":
+    fp.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="installXena" version="1.0">
+    <install version="1.0">
+      <actions>
+	<action type="setup_virtualenv">
+	  docutils
+	</action>
+	<action type="setup_virtualenv">
+	  python-daemon
+	</action>
+	<action type="set_environment">
+          <environment_variable name="XENA_JAR_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>
+        </action>  
+	<action type="set_environment">
+	  <environment_variable name="PYTHONPATH" action="prepend_to">$REPOSITORY_INSTALL_DIR</environment_variable>
+	</action>
+	<action type="set_environment">
+          <environment_variable name="XENA_BASE_DIR" action="set_to">$INSTALL_DIR/xena</environment_variable>
+        </action>
+      </actions>
+    </install>
+  </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xena_datapages.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="Explore Data in Xena" id="ucsc_xenadatapages" tool_type="data_source">
+    <description>Browse the UCSC Xena Data Pages.</description>
+    <command interpreter="python">ucsc_xena_download.py $genomic $__app__.config.output_size_limit</command>   
+    <inputs action="https://tcga1.kilokluster.ucsc.edu/~jzhu/proj/site/hgHeatmap-cavm/datapages" check_values="false" method="get">
+        <display>Explore the data in Xena $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
+        <param name="tool_id" type="hidden" value="ucsc_xenadatapages" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="genomic" format="tabular" label="#echo $URL.rsplit('/',1)[1].rsplit('.',1)[0]+' genomic'" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xena_download.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+import socket, urllib, sys, os
+from galaxy import eggs #eggs needs to be imported so that galaxy.util can find docutils egg...
+from galaxy.util.json import from_json_string, to_json_string
+from galaxy.util import get_charset_from_http_headers
+import galaxy.model # need to import model before sniff to resolve a circular import dependency
+from galaxy.datatypes import sniff
+import tarfile
+import re
+
+filemap = [
+        ('genomic', r'genomic(Segment|Matrix)$'),
+        ('clinical', r'clinicalMatrix$'),
+        ]
+
+files = {
+        'genomic': sys.argv[1],
+        'clinical': sys.argv[2]
+        }
+
+max_file_size = sys.argv[3]
+
+def file_type(file):
+    with open(file) as f:
+        return from_json_string(f.read())['type']
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+def load_input_parameters( filename, erase_file = True ):
+    datasource_params = {}
+    try:
+        json_params = from_json_string( open( filename, 'r' ).read() )
+        datasource_params = json_params.get( 'param_dict' )
+    except:
+        json_params = None
+        for line in open( filename, 'r' ):
+            try:
+                line = line.strip()
+                fields = line.split( '\t' )
+                datasource_params[ fields[0] ] = fields[1]
+            except:
+                continue
+    if erase_file:
+        open( filename, 'w' ).close() #open file for writing, then close, removes params from file
+    return json_params, datasource_params
+
+def load_file(files):
+    filename = files['genomic']
+    job_params, params = load_input_parameters( filename, False )
+    URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
+    URL_method = params.get( 'URL_method', None )
+    socket.setdefaulttimeout( 600 )
+    try:
+        if not URL_method or URL_method == 'get':
+            page = urllib.urlopen( URL )
+        elif URL_method == 'post':
+            page = urllib.urlopen( URL, urllib.urlencode( params ) )
+    except Exception, e:
+        stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
+    if max_file_size:
+        file_size = int( page.info().get( 'Content-Length', 0 ) )
+        if file_size > max_file_size:
+            stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
+    try:
+        cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( filename, os.O_WRONLY | os.O_CREAT ), filename, source_encoding=get_charset_from_http_headers( page.headers ) )
+    except Exception, e:
+        stop_err( 'Unable to fetch %s:\n%s' % ( URL, e ) )
+
+load_file(files)
+
+tar = tarfile.open(files['genomic'])
+names = tar.getnames()
+metafiles = [n for n in names if n.endswith('.json')]
+tar.extractall()
+withtype = [(file_type(file), file[0:-len(".json")]) for file in metafiles]
+try:
+    renames = [((n for (t, n) in withtype if re.search(pat, t)).next(), name) for (name, pat) in filemap]
+except StopIteration:
+     stop_err( 'Missing required file type in tarball' )
+for (frm, to) in renames:
+    os.rename(frm, files[to])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xena_hub.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="Xena Data Hub" id="ucsc_xenadatahub" tool_type="data_source">
+    <description>Select your Xena Data Hub</description>
+    <command interpreter="python">ucsc_xena_download.py $genomic $__app__.config.output_size_limit</command>   
+    <inputs action="https://genome-cancer.ucsc.edu/proj/site/hgHeatmap-cavm/hub/" check_values="false" method="get">
+        <display>Select your Xena Data Hub $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
+        <param name="tool_id" type="hidden" value="ucsc_xenadatapages" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="genomic" format="tabular" label="#echo $URL.rsplit('/',1)[1].rsplit('.',1)[0]+' genomic'" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xenabrowser.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="UCSC Xena Browser" id="ucsc_xenabrowser" tool_type="data_source">
+    <description>Run the UCSC Xena Browser as a Galaxy tool.</description>
+    <command interpreter="python">ucsc_xena_download.py $genomic $clinical $__app__.config.output_size_limit</command>   
+    <inputs action="https://genome-cancer.ucsc.edu/proj/site/hgHeatmap-cavm/" check_values="false" method="get">
+        <display>go to UCSC Xena Browser $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
+        <param name="tool_id" type="hidden" value="ucsc_xenabrowser" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" />
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+    </request_param_translation>
+    <uihints minwidth="800"/>
+    <outputs>
+        <data name="genomic" format="tabular" label="#echo $URL.rsplit('/',1)[1].rsplit('.',1)[0]+' genomic'" />
+        <data name="clinical" format="tabular" label="#echo $URL.rsplit('/',1)[1].rsplit('.',1)[0]+' clinical'"/>
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
Binary file xena.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaAdmin.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,52 @@
+<tool id="xenaAdmin" name="Xena Administration" version="0.0.2">
+  <description>Administer the Galaxy-embedded Xena: start, check status, stop, backup, restore</description>
+ <requirements>
+   <requirement type="package" version="1.0">installXena</requirement>
+ </requirements>
+  <command interpreter="python">
+    #if $adminAction.actionType == "backup":
+        xena_backup.py ${adminAction.backupDir} $outfile
+    #else:
+        #if $adminAction.actionType == "restore":
+	    xena_restore.py ${adminAction.restoreDir} $outfile
+	#else:
+            runXena.py ${adminAction.actionType} $outfile
+        #end if
+    #end if
+  </command>
+  <inputs>
+    <conditional name="adminAction">
+      <param format="str" type="select" name = "actionType" label="Action">
+	<option value="status">Check Status</option>
+	<option value="start">Start Xena</option>
+	<option value="stop">Stop Xena</option>
+	<option value="backup">Backup the Xena data to an external directory</option>
+	<option value="restore">Restore data to Xena from an external backup directory</option>
+      </param>
+      <when value="backup">
+	<param label="External Directory" type="text" name="backupDir" value="/tmp/xena_files"/>
+      </when>
+      <when value="restore">
+	<param label="Backup Directory" type="text" name="restoreDir" value="/tmp/xena_files"/>
+      </when>
+      <when value="status"/>
+      <when value="start"/>
+      <when value="stop"/>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="txt" name="outfile"/>
+  </outputs>
+ <help>
+
+**Xena Administration**      
+
+Administer the Galaxy-embedded Xena by starting it, stopping it or checking its status.  If you start Xena, or if you check the status while Xena is running, the output will indicate a URL for the Xena server that you can enter in the Xena Data Hub tool.  After you add a new data hub with this URL, and select it, you will be able to see the data from this Xena in the Xena browser.
+
+With this tool, you can also  backup the data from your Galaxy-embedded Xena to an external directory on the same computer, and can restore data from a previous backup into the Xena database.  If you wish to restore data, Xena must be running first.
+   
+  </help>
+</tool>
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+import urllib2
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("dataHub", type=str)
+    parser.add_argument("datasetId", type=str)
+    parser.add_argument("metadatafile", type=str)
+    parser.add_argument("datafile", type=str)
+    args = parser.parse_args()
+
+    datasetUrlHost = re.sub("/proj/", "/download/", args.dataHub)
+    datasetIdTokens = re.split("/", args.datasetId)
+    datasetUrl = datasetUrlHost + "/" + "/".join(datasetIdTokens[1:])
+    print datasetUrl
+    metadataUrl = datasetUrl + ".json"
+    mm = urllib2.urlopen(metadataUrl)
+    with open(args.metadatafile, "w") as metadata:
+        metadata.write(mm.read())
+    mm.close()
+    dd = urllib2.urlopen(datasetUrl)
+    with open(args.datafile, "w") as data:
+        data.write(dd.read())
+    dd.close()
+
+
+if __name__ == "__main__":
+    main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+<tool id="xenaGetDataset" description="Get a specified dataset and metadata from the Xena database" name="Get Data in Xena" version="0.0.2">
+  <description>
+    Retrieve a dataset from Xena
+  </description>
+  <requirements>
+    <requirement type="package" version="1.0">installXena</requirement>
+  </requirements>
+  <command interpreter="python">
+    xenaGetDataset.py $dataHub $dataset $metadataFile $dataFile
+  </command>
+  <inputs>
+    <param type="select" name="dataHub" label="Data Hub" optional="false">
+      <option value="https://genome-cancer.ucsc.edu/proj/public/xena"/>
+    </param>
+    <param type="text" name="dataset" label="Dataset ID" optional="false"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="metadataFile" label="${dataset}.json"/>
+    <data format="tabular" name="dataFile" label="${dataset}" />
+  </outputs>
+  <help>
+    Given the URL of a Xena dataset, download the data into a Galaxy dataset.  Enter the URL of the dataset in the Dataset URL field.  Xena dataset URLs can be obtained through the Explore Data in Xena tool.  The accompanying metadata will be downloaded automatically along with the data, and will be stored in a second Galaxy dataset.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_backup.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+"""
+  xena_backup.py: delete a dataset from Xena
+
+  Back up the Xena data to a user-specified external directory.
+"""
+
+import argparse
+import os
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def writeException(outFp, msg = None):
+    exc_type, exc_value, exc_traceback = sys.exc_info()
+    lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+    allLines = ''.join('!! ' + line for line in lines)
+    if msg is None:
+        outFp.write("Unsuccessful: error %s\n" % allLines)
+    else:
+        outFp.write("%s\n%s" % (msg, allLines))
+    
+    
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("pathname", type=str)
+    parser.add_argument("outfile", type=str)
+    args = parser.parse_args()
+
+    outFp = open(args.outfile, "w")
+    xenaFileDir = xena.fileDir()
+
+    if not os.path.exists(args.pathname):
+        try:
+            os.mkdir(args.pathname)
+        except:
+            writeException(outFp, 
+                           msg="Error: cannot create %s" % args.pathname)
+            outFp.close()
+            sys.exit(-1)
+    for thisFile in os.listdir(xenaFileDir):
+        try:
+            shutil.copy(xenaFileDir + "/" + thisFile, args.pathname)
+        except:
+            writeException(outFp,
+                           msg="Error: cannot back up files from %s to %s" \
+                               % (xena.fileDir(), args.pathname))
+            outFp.close()
+            sys.exit(-1)
+    outFp.write("Backup complete\n")
+    outFp.close()
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_delete.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+"""
+  xena_delete.py: delete a dataset from Xena
+
+  Given the name of a Xena dataset, delete it from the local Xena database.
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("datasetName", type=str)
+    parser.add_argument("outfile", type=str)
+    args = parser.parse_args()
+
+    fp2 = open(args.outfile, "w")
+    xenaFileDir = xena.fileDir()
+
+    # Set up the xena delete comamnd and try to execute it.  If an exception
+    # is generated, output a traceback and exit with nonzero status.  If
+    # no exception was generated, indicate a successful import and exit
+    # with zero status.
+    xenaDeleteCmd = "java -jar %s --delete %s -p %s" % (xena.jarPath(),
+                                                        args.datasetName,
+                                                        xena.port())
+    try:
+        subprocess.call(xenaDeleteCmd, shell=True)
+    except:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+        allLines = ''.join('!! ' + line for line in lines)
+        fp2.write("Unsuccessful: error %s\n" % allLines)
+        fp2.close()
+        sys.exit(-1)
+    else:
+        fp2.write( "Dataset %s deleted\n" % args.datasetName)
+        fp2.close()
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_delete.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,17 @@
+<tool id="xena_delete" description="Delete a dataset from the local Galaxy-embedded Xena" name="Delete Xena Dataset" version="0.0.1">
+  <description>
+    Given the name of a dataset, delete it from the local Xena-embedded Galaxy.
+  </description>
+  <command interpreter="python">
+      xena_delete.py $dataset $outfile
+  </command>
+  <inputs>
+    <param name="dataset" type="text" label="Dataset Name"/>
+  </inputs>
+  <outputs>
+    <data name="outfile" format="txt"/>
+  </outputs>
+  <help>
+    This tool will delete a dataset from the local Galaxy-embedded Xena database, given the name of the dataset.  The names of the datasets can be seen through the Xena Data Hub tool, by selecting the local Galaxy-embedded Xena as the hub and viewing the datasets contained in that hub.  This tool works only for the local Xena-embedded Galaxy.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_import.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+"""
+  xena_import.py: import a dataset into Xena
+
+  Given a cmdline-specified genomic data file and a cmdline-specified Xena 
+  directory, import the genomic data fle into Xena.  This requires assembling
+  the necessary json file, based on cmdline input.
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def updateColNormalization(jsonMetadata):
+    """Set colNormalization to TRUE if the data is of some expression-related
+    subtype and if colNormalization has not been set"""
+    if not jsonMetadata.has_key("colNormalization"):
+        if jsonMetadata.has_key("dataSubType"):
+            if re.search("expression", jsonMetadata['dataSubType'], 
+                         re.IGNORECASE):
+                jsonMetadata["colNormalization"] = "true"
+
+def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath):
+    """Take an existing JSON file.  Read the contents, and check for any
+    content that might be invalid in the local installation.  Delete that
+    content.  Write the rest to the indicated output pathname"""
+    problemFields = [":probeMap"]
+    fileContents = open(inputJsonPath).read()
+    jsonMetadata = json.loads(fileContents)
+    for thisProblem in problemFields:
+        if jsonMetadata.has_key(thisProblem):
+            del jsonMetadata[thisProblem]
+    updateColNormalization(jsonMetadata)
+    fp = open(outputJsonPath, "w")
+    fp.write("%s\n" % json.dumps(jsonMetadata, indent=2))
+    fp.close()
+    
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("genomicDataPath", type=str)
+    parser.add_argument("outfile", type=str)
+    parser.add_argument("--json", type=str, default=None)
+    parser.add_argument("--cohort", type=str)
+    parser.add_argument("--type", type=str)
+    parser.add_argument("--dataSubType", type=str, default=None)
+    parser.add_argument("--label", type=str, default=None)
+    args = parser.parse_args()
+
+    fp2 = open(args.outfile, "w")
+    fp2.write("Importing data to Xena\n")
+    xenaFileDir = xena.fileDir()
+    genomicDataFile = args.genomicDataPath.split("/")[-1]
+    jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, 
+                                                 genomicDataFile)
+
+
+    # The metadata either came as the name of a JSON file or a series of 
+    # command line arguments.
+    if args.json is not None:
+        # In this case, the metadata came in the form of a JSON file.
+        # Verify that the metadata is valid on the current system, which
+        # might mean altering it.  Import the stuff that will validate.
+        verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname)
+    else:
+        # In this case, the metadata came in the form of a series of 
+        # command line arguments.  Assemble them into JSON format, 
+        # and write a JSON file into the Xena file directory.
+        metadata = { 'cohort': args.cohort, 'type': args.type }
+        if args.dataSubType is not None:
+            metadata['dataSubType'] = args.dataSubType
+        if args.label is not None:
+            metadata['label'] = args.label
+        jsonMetadata = json.dumps(metadata, indent=2) 
+        fp = open(jsonMetadataTargetPathname, "w")
+        fp.write("%s\n" % (jsonMetadata))
+        fp.close()
+    
+    # Finally, copy the genomic data into the Xena directory
+    shutil.copy(args.genomicDataPath, xenaFileDir)
+
+    # Set up the xena load comamnd and try to execute it.  If an exception
+    # is generated, output a traceback and exit with nonzero status.  If
+    # no exception was generated, indicate a successful import and exit
+    # with zero status.
+    xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
+                                                           xenaFileDir,
+                                                           genomicDataFile, 
+                                                           xena.port())
+    try:
+        subprocess.call(xenaLoadCmd, shell=True)
+    except:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+        allLines = ''.join('!! ' + line for line in lines)
+        fp2.write("Unsuccessful: error %s\n" % allLines)
+        fp2.close()
+        sys.exit(-1)
+    else:
+        fp2.write( "Import successful\n")
+        fp2.close()
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_import.xml	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,122 @@
+<tool id="xenaImport" description="Import into XENA from Galaxy" name="XENA Import" version="0.0.1">
+  <description>Import from Galaxy into the Xena VM</description>
+  <requirements>
+    <requirement type="package" version="1.0">installXena</requirement>
+  </requirements>
+  <command interpreter="python">
+    xena_import.py ${xenaInputData} $outfile 
+    #if $source.metadataSource == "json"
+        --json ${source.xenaMetadata}
+    #else
+        --cohort "${source.cohort}" --type ${source.format.metadataType}
+        #if $source.format.metadataType == "genomicMatrix" or $source.format.metadataType == "clinicalMatrix":
+            #if $source.format.isCustom.dataSubType == "custom"
+                #if $source.format.isCustom.customSubType
+                    --dataSubType "${source.format.isCustom.customSubType}"
+               #end if
+            #else
+                --dataSubType "${source.format.isCustom.dataSubType}"
+            #end if
+        #end if
+        #if $source.label
+            --label "${source.label}"
+        #else
+            --label  "${xenaInputData.name}"
+        #end if
+    #end if
+  </command>
+  <inputs>
+    <param format="data" type="data" name="xenaInputData" label="Data to import to Xena" optional="false"/>
+    <conditional name="source">
+      <param type="select" name="metadataSource" label="Metadata entry">
+	<option value="json">Specify json metadata file</option>
+	<option value="entry">Enter your own</option>
+      </param>
+      <when value="json">
+          <param format="data" type="data" name="xenaMetadata" label="Metadata file" optional="false"/>
+      </when>
+      <when value="entry">
+        <param format="str" type="text" name="cohort" label="Cohort" optional="false"/>
+        <conditional name="format">
+          <param type="select" name="metadataType" label="File Format">
+	    <option value="genomicMatrix">Rows (Identifiers) by Columns (Samples)</option>
+	    <option value="clinicalMatrix">Rows (Samples) by Columns (Identifiers)</option>
+	    <option value="mutationVector">Mutation By Position</option>
+          </param>
+          <when value="genomicMatrix">
+	    <conditional name="isCustom">
+	      <param type="select" name="dataSubType" label="Type of data">
+	        <option value="custom">Enter your own</option>
+	        <option value="phenotype">phenotype</option>
+	        <option value="copy number">copy number</option>
+	        <option value="DNA methylation">DNA methylation</option>
+	        <option value="exon expression">exon expression</option>
+	        <option value="gene expression">gene expression</option>
+	        <option value="gene expression RNAseq">gene expression RNAseq</option>
+	        <option value="gene expression Array">gene expression Array</option>
+	        <option value="somatic mutation (SNP and small INDELs)">somatic mutation (SNP and small INDELs)</option>
+	        <option value="somatic mutation (gene level)">somatic mutation (gene level)</option>
+	        <option value="protein expression RPPA">protein expression RPPA</option>
+	        <option value="PARADIGM pathway activity">PARADIGM pathway activity</option>
+	      </param>
+	      <when value="custom">
+	        <param type="text" name="customSubType" label="Type of data" optional="true"/>
+	      </when>
+	      <when value="phenotype"/>
+	      <when value="copy number"/>
+	      <when value="DNA methylation"/>
+	      <when value="exon expression"/>
+	      <when value="gene expression"/>
+	      <when value="gene expression RNAseq"/>
+	      <when value="gene expression Array"/>
+	      <when value="somatic mutation (SNP and small INDELs)"/>
+	      <when value="somatic mutation (gene level)"/>
+	      <when value="protein expression RPPA"/>
+	      <when value="PARADIGM pathway activity"/>
+	    </conditional>
+          </when>
+          <when value="clinicalMatrix">
+	    <conditional name="isCustom">
+	      <param type="select" name="dataSubType" label="Type of data">
+	        <option value="custom">Enter your own</option>
+	        <option value="phenotype">phenotype</option>
+	        <option value="copy number">copy number</option>
+	        <option value="DNA methylation">DNA methylation</option>
+	        <option value="exon expression">exon expression</option>
+	        <option value="gene expression">gene expression</option>
+	        <option value="gene expression RNAseq">gene expression RNAseq</option>
+	        <option value="gene expression Array">gene expression Array</option>
+	        <option value="somatic mutation (SNP and small INDELs)">somatic mutation (SNP and small INDELs)</option>
+	        <option value="somatic mutation (gene level)">somatic mutation (gene level)</option>
+	        <option value="protein expression RPPA">protein expression RPPA</option>
+	        <option value="PARADIGM pathway activity">PARADIGM pathway activity</option>
+	      </param>
+	      <when value="custom">
+	        <param type="text" name="customSubType" label="Type of data" optional="true"/>
+	      </when>
+	      <when value="phenotype"/>
+	      <when value="copy number"/>
+	      <when value="DNA methylation"/>
+	      <when value="exon expression"/>
+	      <when value="gene expression"/>
+	      <when value="gene expression RNAseq"/>
+	      <when value="gene expression Array"/>
+	      <when value="somatic mutation (SNP and small INDELs)"/>
+	      <when value="somatic mutation (gene level)"/>
+	      <when value="protein expression RPPA"/>
+	      <when value="PARADIGM pathway activity"/>
+	    </conditional>
+          </when>
+          <when value="mutationVector"/>
+        </conditional>
+        <param type="text" name="label" label="Display Name (Optional)" optional="true"/>
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="txt" name="outfile"/>
+  </outputs>
+  <help>
+    To Appear soon...
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_query.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,157 @@
+"""
+Utilities for xena queries.
+
+A basic query example.
+Queries are scheme expressions.
+
+>>> import xena_query as xena
+>>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(+ 1 2)")
+'3.0'
+
+>>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(let [x 2 y (+ x 3)] (* x y))")
+'10.0'
+
+Looking up sample ids for the TCGA LGG cohort.
+
+>>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena",
+                  xena.patient_to_sample_query("TCGA.LGG.sampleMap",
+                                               ["TCGA-CS-4938",
+                                                "TCGA-HT-7693",
+                                                "TCGA-CS-6665",
+                                                "TCGA-S9-A7J2",
+                                                "TCGA-FG-A6J3"]))
+'{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}'
+
+>>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena",
+                  xena.find_sample_by_field_query("TCGA.LGG.sampleMap",
+                                                    "_PATIENT",
+                                                    ["TCGA-CS-4938",
+                                                     "TCGA-HT-7693",
+                                                     "TCGA-CS-6665",
+                                                     "TCGA-S9-A7J2",
+                                                     "TCGA-FG-A6J3"]))
+'{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}'
+>>> import json
+>>> json.loads(r)
+{u'TCGA.LGG.sampleMap': [u'TCGA-CS-4938-01', u'TCGA-CS-6665-01', u'TCGA-FG-A6J3-01', u'TCGA-HT-7693-01', u'TCGA-S9-A7J2-01']}
+"""
+
+import urllib2
+import re
+
+def compose1(f, g):
+    def composed(*args, **kwargs):
+        return f(g(*args, **kwargs))
+    return composed
+
+# funcitonal composition, e.g.
+# compose(f, g)(a, ...) == f(g(a, ...))
+compose = lambda *funcs: reduce(compose1, funcs)
+
+def quote(s):
+    return '"' + s + '"'
+
+def array_fmt(l):
+    return '[' + ', '.join((quote(s) for s in l)) + ']'
+
+# The strategy here is
+#   o Do table scan on code to find codes matching field values
+#   o Do IN query on unpack(field, x) to find rows matching codes
+#   o Project to unpack(sample, x) to get sampleID code
+#   o Join with code to get sampleID values
+#
+# Note the :limit on the table scan. This makes the table scan exit after we've
+# found enough values, rather than continuing to the end. We can do this because
+# enumerated values are unique. An alternative would be to index all the enumerated
+# values in the db.
+sample_query_str = """
+(let [cohort %s
+      field_id-dataset (car (query {:select [[:field.id :field_id] [:dataset.id :dataset]]
+                                    :from [:dataset]
+                                    :join [:field [:= :dataset_id :dataset.id]]
+                                    :where [:and [:= :cohort cohort]
+                                                 [:= :field.name %s]]}))
+      values %s
+      field_id (:field_id field_id-dataset)
+      dataset (:dataset field_id-dataset)
+      sample (:id (car (query {:select [:field.id]
+                               :from [:field]
+                               :where [:and [:= :dataset_id dataset]
+                                            [:= :field.name "sampleID"]]})))
+      N (- (:rows (car (query {:select [:rows]
+                               :from [:dataset]
+                               :where [:= :id dataset]}))) 1)]
+  {cohort (map :value (query {:select [:value]
+                              :from [{:select [:x #sql/call [:unpack field_id, :x]]
+                                      :from [#sql/call [:system_range 0 N]]
+                                      :where [:in #sql/call [:unpack field_id, :x] {:select [:ordering]
+                                                                                             :from [:code]
+                                                                                             :where [:and [:= :field_id field_id]
+                                                                                                          [:in :value values]]
+                                                                                             :limit (count values)}]}]
+                              :join [:code [:and [:= :field_id sample]
+                                                 [:= :ordering #sql/call [:unpack sample :x]]]]}))})
+"""
+
+cohort_query_str = """
+(map :cohort (query {:select [:%distinct.cohort]
+                     :from [:dataset]
+                     :where [:not [:is nil :cohort]]}))
+"""
+
+datasets_list_in_cohort_query = """
+(map :text (query {:select [:text]
+                   :from [:dataset]
+                   :where [:= :cohort %s ]})
+"""
+
+datasets_type_pattern_str = """
+(map :name (query {:select [:name]
+                   :from [:dataset]
+                   :where [:and [:= :type %s]
+                                [:like :name %s]]}))
+"""
+
+def find_sample_by_field_query(cohort, field, values):
+    """Return a xena query which looks up sample ids for the given field=values."""
+    return sample_query_str % (quote(cohort), quote(field), array_fmt(values))
+
+def patient_to_sample_query(cohort, patients):
+    """Return a xena query which looks up sample ids for the given patients."""
+    return find_sample_by_field_query(cohort, "_PATIENT", patients)
+
+headers = { 'Content-Type' : "text/plain" }
+
+def post(url, query):
+    """POST a xena data query to the given url."""
+    req = urllib2.Request(url + '/data/', query, headers)
+    response = urllib2.urlopen(req)
+    result = response.read()
+    return result
+
+def find_cohorts():
+    """ Return a list of cohorts on a host at a specific url """
+    """ return example: ["chinSF2007_public","TCGA.BRCA.sampleMap","cohort3"] """
+    return cohort_query_str
+
+def find_datasets_in_cohort(url, cohort):
+    """ Return a list of datasets in a specific cohort on server=url.
+    Each dataset is a dictionary of the data's metadata.
+    This should be refactored to be consistent with the other methods."""
+    return map(json.loads,
+            json.loads(post(url, datasets_list_in_cohort_query % (quote(cohort)))))
+
+def find_datasets_type_pattern(type, pattern):
+    """Return a xena query which returns a list of datasets
+    filtered by a pattern on the dataset name. The pattern is sql:
+    % is wildcard."""
+    return datasets_type_pattern_str % (quote(type), quote(pattern))
+
+
+def strip_first_url_dir(path):
+    return re.sub(r'^[^/]*', '', path)
+
+# proj/<proj>/xena/<proj>/<path>
+# download/<proj>/xena/<path>
+def name_to_url(base_url, name):
+    return base_url.replace('/proj/', '/download/') + strip_first_url_dir(name)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_restore.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+"""
+  xena_restore.py: delete a dataset from Xena
+
+  Back up the Xena data to a user-specified external directory.
+"""
+
+import argparse
+import os
+import re
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def writeException(outFp, msg = None):
+    exc_type, exc_value, exc_traceback = sys.exc_info()
+    lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+    allLines = ''.join('!! ' + line for line in lines)
+    if msg is None:
+        outFp.write("Unsuccessful: error %s\n" % allLines)
+    else:
+        outFp.write("%s\n%s" % (msg, allLines))
+    
+    
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("backupDir", type=str)
+    parser.add_argument("outfile", type=str)
+    args = parser.parse_args()
+
+    outFp = open(args.outfile, "w")
+    xenaFileDir = xena.fileDir()
+
+    for thisFile in os.listdir(args.backupDir):
+        # The files in the xena directory should be pairs of 
+        # data and metadata files, with the metadata having the same
+        # name as the data file plus the .json suffix.  When you find
+        # a json file in the backup directory, then copy the json and data
+        # file into the xena directory, starting with the json file, and 
+        # execute the xena load command.
+        if re.search(".json$", thisFile):
+            jsonFile = thisFile
+            dataFile = re.sub(".json$", "", jsonFile)
+            assert(os.path.exists(args.backupDir + "/" + dataFile))
+            try:
+                shutil.copy(args.backupDir + "/" + jsonFile, xenaFileDir)
+            except:
+                writeException(outFp,
+                               msg="Error: cannot restore %s from %s" \
+                                   % (jsonFile, args.backupDir))
+                outFp.close()
+                sys.exit(-1)
+            try:
+                shutil.copy(args.backupDir + "/" + dataFile, xenaFileDir)
+            except:
+                writeException(outFp,
+                               msg="Error: cannot restore %s from %s" \
+                                   % (dataFile, args.backupDir))
+                outFp.close()
+                sys.exit(-1)
+            # Now set up the xena load command and try to execute it.
+            xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
+                                                                   xenaFileDir,
+                                                                   dataFile, 
+                                                                   xena.port())
+            try:
+                subprocess.call(xenaLoadCmd, shell=True)
+            except:
+                writeException(outFp,
+                               msg="Could not reload %s into Xena" % dataFile)
+                outFp.close()
+                sys.exit(-1)
+    # At this point, the restore should've been successful
+    outFp.write("Restore complete\n")
+    outFp.close()
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_utils.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+""" 
+xenaUtils: a set of python utilities for the Galaxy / Xena interface
+"""
+
+import os
+import socket
+import subprocess
+
+def jarPath():
+    """Return the full pathname of the xena jar file"""
+    jarPath = os.getenv("XENA_JAR_PATH", "~")
+    return(os.path.join(jarPath, "xena.jar"))
+
+
+def baseDir():
+    return(os.getenv("XENA_BASE_DIR", "/tmp"))
+
+def fileDir():
+    return(baseDir() + "/files")
+
+def isRunning(xenaPort):
+    """Determine if Xena is running on the specified port"""
+    query = "wget -q -O- http://localhost:%s/data/'(+ 1 2)'" % xenaPort
+    try:
+        result = subprocess.check_output(query, shell=True)
+    except:
+        return False
+    else:
+        return(result == "3.0")
+    
+
+def findUnusedPort():
+    """Find a random port that is available on the local system, and return
+    the port number.
+    """
+    ss = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    ss.bind(('', 0))
+    portNumber = ss.getsockname()[1]
+    ss.close()
+    return(portNumber)
+
+def isPortAvailable(port):
+    """Test if a given port is available"""
+    ss = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    try:
+        ss.bind(('', port))
+    except:
+        return False
+    else:
+        ss.close()
+        return True
+
+
+def portFilename():
+    """ Return the name of the file with the port of the running Xena, 
+    if any
+    """
+    xenaBaseDir = os.getenv("XENA_BASE_DIR", "~")
+    xenaPortFilename = xenaBaseDir + "/xena.port"
+    return(xenaPortFilename)
+
+
+
+
+def port(testIfAvailable=False, findNewPort=False):
+    preferredXenaPort = 7220
+    xenaPort = None
+    xenaPortFname = portFilename()
+    if os.path.exists(xenaPortFname):
+        fp = open(xenaPortFname)
+        line = fp.readline()
+        xenaPort = int(line.rstrip())
+        if testIfAvailable and not isRunning(xenaPort):
+            # Xena is not running on the port.  Make sure that
+            # the port is not occupied by some other process
+            if not isPortAvailable(xenaPort):
+                #cmd = "lsof -t -i :%s -sTCP:LISTEN" % portID
+                #pid = subprocess.check_output(cmd, shell=True).rstrip()
+                #print "not available, used by",pid
+                xenaPort = None
+    if findNewPort and xenaPort == None:
+        if isPortAvailable(preferredXenaPort):
+            xenaPort = preferredXenaPort
+        else:
+            xenaPort = findUnusedPort()
+        fp = open(portFilename(), "w")
+        fp.write("%d\n" % xenaPort)
+        fp.close()
+    return(xenaPort)
+
+def cleanUpPort():
+    """ Clean up the port file after Xena has stopped running"""
+    os.unlink(portFilename())
+
+
+        
Binary file xena_utils.pyc has changed