# HG changeset patch
# User melissacline
# Date 1421210243 18000
# Node ID 8bb037f88ed24017f45c9b516baa1dfd2b8a2f0b
Uploaded
diff -r 000000000000 -r 8bb037f88ed2 runXena.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/runXena.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+# To kick off the script, run the following from the python directory:
+# PYTHONPATH=`pwd` python testdaemon.py start
+
+#standard python libs
+import logging
+import os
+import signal
+import subprocess
+import sys
+import time
+import traceback
+import xena_utils as xena
+
+#third party libs
+from daemon import runner
+from lockfile import LockTimeout
+
+
+class App():
+
+ def __init__(self):
+ xenaBaseDir = xena.baseDir()
+ if not os.path.exists(xenaBaseDir):
+ os.mkdir(xenaBaseDir)
+ self.pidfile_path = xenaBaseDir + "/xena-daemon.pid"
+
+ self.stdin_path = '/dev/null'
+ self.stdout_path = '/dev/null'
+ self.stderr_path = '/dev/null'
+ self.pidfile_timeout = 5
+
+ def run(self):
+ while True:
+ #Main code goes here ...
+ #Note that logger level needs to be set to logging.DEBUG before this shows up in the logs
+ xenaCmdline = "java -jar %s -r %s/files -d %s/db -t %s/tmp --logfile %s/xena.log -p %s -H 0.0.0.0 --no-auto"
+ xenaBaseDir = xena.baseDir()
+ xenaCmd = xenaCmdline % (xena.jarPath(), xenaBaseDir,
+ xenaBaseDir, xenaBaseDir,
+ xenaBaseDir, xena.port())
+ logger.debug("Invoking Xena VM with command %s" % (xenaCmd))
+ xenaVm = subprocess.call(xenaCmd, shell=True)
+ logger.info("Starting Xena VM")
+ #logger.warn("Warning message")
+ #logger.error("Error message")
+
+
+
+def processListeningOnPort(portID):
+ cmd = "lsof -t -i :%s -sTCP:LISTEN" % portID
+ pid = subprocess.check_output(cmd, shell=True).rstrip()
+ return(int(pid))
+
+
+fp = open(sys.argv[2], "w")
+
+app = App()
+logger = logging.getLogger("DaemonLog")
+logger.setLevel(logging.DEBUG)
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+handler = logging.FileHandler(xena.baseDir() + "/xena-daemon.log")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+hostname = subprocess.check_output("hostname -f", shell=True).rstrip()
+
+#
+# Check if there has been a Xena running on this system, and
+# what its port number is or was.
+#
+xenaPort = xena.port()
+if xenaPort == None:
+ # In this case, no Xena has been running on this sytem.
+ xenaIsRunning = False
+else:
+ xenaIsRunning = xena.isRunning(xenaPort)
+#rint "xena running", xenaIsRunning, "port", xenaPort
+
+
+if sys.argv[1] == "status":
+ #
+ # When checking status, if Xena is running, then report that it's running
+ # with hostname and port. If it's not running, then report as such.
+ #
+ if xenaIsRunning:
+ fp.write("Xena VM currently running on %s:%s\n" % (hostname, xenaPort))
+ fp.write("You can add %s:%s to Xena Data Hub\n" % (hostname, xenaPort))
+ else:
+ fp.write("Xena VM is not currently running on %s\n" % (hostname))
+
+elif sys.argv[1] == "start":
+ #
+ # When a start command came in, allocate a new port and prepare to start
+ # xena if it's not already running. If it is already running, then
+ # report as such. In either case, close the output file before this
+ # process goes into daemon mode.
+ #
+ if not xenaIsRunning:
+ xenaPort = xena.port(testIfAvailable=True, findNewPort=True)
+ fp.write("Starting Xena VM on %s:%s\n" % (hostname, xenaPort))
+ fp.write("You can add %s:%s to Xena Data Hub\n" % (hostname, xenaPort))
+ else:
+ fp.write("Xena VM already running on %s:%s\n" % (hostname, xenaPort))
+ fp.write("You can add %s:%s to Xena Data Hub\n" % (hostname, xenaPort))
+ fp.close()
+
+elif sys.argv[1] == "stop":
+ #
+ # When stopping Xena, if it's currently running, report that Xena
+ # is being terminated and clean up the port. If it's not running,
+ # report as such.
+ #
+ if xenaIsRunning:
+ fp.write("Terminating Xena VM on %s:%s\n" % (hostname, xenaPort))
+ else:
+ fp.write("Xena VM is not currently running on %s\n" % (hostname))
+
+else:
+ fp.write(("Error: Unexpected command %s" % sys.argv[1]))
+
+
+#
+# Here is where the starting and stopping of the Xena daemon takes place.
+#
+if sys.argv[1] == "start" or (sys.argv[1] == "stop" and xenaIsRunning):
+ daemon_runner = runner.DaemonRunner(app)
+ # This ensures that the logger file handle does not get closed during daemonization
+ daemon_runner.daemon_context.files_preserve=[handler.stream]
+ try:
+ daemon_runner.do_action()
+ except LockTimeout:
+ # Xena is already running. No need to do anything special, but this
+ # should be separated from the other exceptions.
+ pass
+ except:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+ allLines = ''.join('!! ' + line for line in lines)
+ fp.write("Unsuccessful: error %s\n" % allLines)
+ if sys.argv[1] == "stop":
+ #
+ # If the Xena stop command has been issued, then kill the Xena
+ # process.
+ #
+ xenaPid = processListeningOnPort(xenaPort)
+ logger.debug("Attempting to kill process with PID %d" % xenaPid)
+ os.kill(xenaPid, signal.SIGTERM)
+if sys.argv[1] != "start":
+ fp.close()
diff -r 000000000000 -r 8bb037f88ed2 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+ docutils
+
+
+ python-daemon
+
+
+ $REPOSITORY_INSTALL_DIR
+
+
+ $REPOSITORY_INSTALL_DIR
+
+
+ $INSTALL_DIR/xena
+
+
+
+
+
diff -r 000000000000 -r 8bb037f88ed2 ucsc_xena_datapages.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xena_datapages.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+
+
+
+ Browse the UCSC Xena Data Pages.
+ ucsc_xena_download.py $genomic $__app__.config.output_size_limit
+
+ Explore the data in Xena $GALAXY_URL
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 8bb037f88ed2 ucsc_xena_download.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xena_download.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+import socket, urllib, sys, os
+from galaxy import eggs #eggs needs to be imported so that galaxy.util can find docutils egg...
+from galaxy.util.json import from_json_string, to_json_string
+from galaxy.util import get_charset_from_http_headers
+import galaxy.model # need to import model before sniff to resolve a circular import dependency
+from galaxy.datatypes import sniff
+import tarfile
+import re
+
+filemap = [
+ ('genomic', r'genomic(Segment|Matrix)$'),
+ ('clinical', r'clinicalMatrix$'),
+ ]
+
+files = {
+ 'genomic': sys.argv[1],
+ 'clinical': sys.argv[2]
+ }
+
+max_file_size = sys.argv[3]
+
+def file_type(file):
+ with open(file) as f:
+ return from_json_string(f.read())['type']
+
+def stop_err( msg ):
+ sys.stderr.write( msg )
+ sys.exit()
+
+def load_input_parameters( filename, erase_file = True ):
+ datasource_params = {}
+ try:
+ json_params = from_json_string( open( filename, 'r' ).read() )
+ datasource_params = json_params.get( 'param_dict' )
+ except:
+ json_params = None
+ for line in open( filename, 'r' ):
+ try:
+ line = line.strip()
+ fields = line.split( '\t' )
+ datasource_params[ fields[0] ] = fields[1]
+ except:
+ continue
+ if erase_file:
+ open( filename, 'w' ).close() #open file for writing, then close, removes params from file
+ return json_params, datasource_params
+
+def load_file(files):
+ filename = files['genomic']
+ job_params, params = load_input_parameters( filename, False )
+ URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
+ URL_method = params.get( 'URL_method', None )
+ socket.setdefaulttimeout( 600 )
+ try:
+ if not URL_method or URL_method == 'get':
+ page = urllib.urlopen( URL )
+ elif URL_method == 'post':
+ page = urllib.urlopen( URL, urllib.urlencode( params ) )
+ except Exception, e:
+ stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
+ if max_file_size:
+ file_size = int( page.info().get( 'Content-Length', 0 ) )
+ if file_size > max_file_size:
+ stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
+ try:
+ cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( filename, os.O_WRONLY | os.O_CREAT ), filename, source_encoding=get_charset_from_http_headers( page.headers ) )
+ except Exception, e:
+ stop_err( 'Unable to fetch %s:\n%s' % ( URL, e ) )
+
+load_file(files)
+
+tar = tarfile.open(files['genomic'])
+names = tar.getnames()
+metafiles = [n for n in names if n.endswith('.json')]
+tar.extractall()
+withtype = [(file_type(file), file[0:-len(".json")]) for file in metafiles]
+try:
+ renames = [((n for (t, n) in withtype if re.search(pat, t)).next(), name) for (name, pat) in filemap]
+except StopIteration:
+ stop_err( 'Missing required file type in tarball' )
+for (frm, to) in renames:
+ os.rename(frm, files[to])
diff -r 000000000000 -r 8bb037f88ed2 ucsc_xena_hub.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xena_hub.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+
+
+
+ Select your Xena Data Hub
+ ucsc_xena_download.py $genomic $__app__.config.output_size_limit
+
+ Select your Xena Data Hub $GALAXY_URL
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 8bb037f88ed2 ucsc_xenabrowser.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsc_xenabrowser.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,25 @@
+
+
+
+ Run the UCSC Xena Browser as a Galaxy tool.
+ ucsc_xena_download.py $genomic $clinical $__app__.config.output_size_limit
+
+ go to UCSC Xena Browser $GALAXY_URL
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 8bb037f88ed2 xena.jar
Binary file xena.jar has changed
diff -r 000000000000 -r 8bb037f88ed2 xenaAdmin.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaAdmin.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,52 @@
+
+ Administer the Galaxy-embedded Xena: start, check status, stop, backup, restore
+
+ installXena
+
+
+ #if $adminAction.actionType == "backup":
+ xena_backup.py ${adminAction.backupDir} $outfile
+ #else:
+ #if $adminAction.actionType == "restore":
+ xena_restore.py ${adminAction.restoreDir} $outfile
+ #else:
+ runXena.py ${adminAction.actionType} $outfile
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Xena Administration**
+
+Administer the Galaxy-embedded Xena by starting it, stopping it or checking its status. If you start Xena, or if you check the status while Xena is running, the output will indicate a URL for the Xena server that you can enter in the Xena Data Hub tool. After you add a new data hub with this URL, and select it, you will be able to see the data from this Xena in the Xena browser.
+
+With this tool, you can also backup the data from your Galaxy-embedded Xena to an external directory on the same computer, and can restore data from a previous backup into the Xena database. If you wish to restore data, Xena must be running first.
+
+
+
+
+
+
diff -r 000000000000 -r 8bb037f88ed2 xenaGetDataset.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+import urllib2
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("dataHub", type=str)
+ parser.add_argument("datasetId", type=str)
+ parser.add_argument("metadatafile", type=str)
+ parser.add_argument("datafile", type=str)
+ args = parser.parse_args()
+
+ datasetUrlHost = re.sub("/proj/", "/download/", args.dataHub)
+ datasetIdTokens = re.split("/", args.datasetId)
+ datasetUrl = datasetUrlHost + "/" + "/".join(datasetIdTokens[1:])
+ print datasetUrl
+ metadataUrl = datasetUrl + ".json"
+ mm = urllib2.urlopen(metadataUrl)
+ with open(args.metadatafile, "w") as metadata:
+ metadata.write(mm.read())
+ mm.close()
+ dd = urllib2.urlopen(datasetUrl)
+ with open(args.datafile, "w") as data:
+ data.write(dd.read())
+ dd.close()
+
+
+if __name__ == "__main__":
+ main()
+
diff -r 000000000000 -r 8bb037f88ed2 xenaGetDataset.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,24 @@
+
+
+ Retrieve a dataset from Xena
+
+
+ installXena
+
+
+ xenaGetDataset.py $dataHub $dataset $metadataFile $dataFile
+
+
+
+
+
+
+
+
+
+
+
+
+ Given the URL of a Xena dataset, download the data into a Galaxy dataset. Enter the URL of the dataset in the Dataset URL field. Xena dataset URLs can be obtained through the Explore Data in Xena tool. The accompanying metadata will be downloaded automatically along with the data, and will be stored in a second Galaxy dataset.
+
+
diff -r 000000000000 -r 8bb037f88ed2 xena_backup.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_backup.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+"""
+ xena_backup.py: delete a dataset from Xena
+
+ Back up the Xena data to a user-specified external directory.
+"""
+
+import argparse
+import os
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def writeException(outFp, msg = None):
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+ allLines = ''.join('!! ' + line for line in lines)
+ if msg is None:
+ outFp.write("Unsuccessful: error %s\n" % allLines)
+ else:
+ outFp.write("%s\n%s" % (msg, allLines))
+
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("pathname", type=str)
+ parser.add_argument("outfile", type=str)
+ args = parser.parse_args()
+
+ outFp = open(args.outfile, "w")
+ xenaFileDir = xena.fileDir()
+
+ if not os.path.exists(args.pathname):
+ try:
+ os.mkdir(args.pathname)
+ except:
+ writeException(outFp,
+ msg="Error: cannot create %s" % args.pathname)
+ outFp.close()
+ sys.exit(-1)
+ for thisFile in os.listdir(xenaFileDir):
+ try:
+ shutil.copy(xenaFileDir + "/" + thisFile, args.pathname)
+ except:
+ writeException(outFp,
+ msg="Error: cannot back up files from %s to %s" \
+ % (xena.fileDir(), args.pathname))
+ outFp.close()
+ sys.exit(-1)
+ outFp.write("Backup complete\n")
+ outFp.close()
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 8bb037f88ed2 xena_delete.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_delete.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+"""
+ xena_delete.py: delete a dataset from Xena
+
+ Given the name of a Xena dataset, delete it from the local Xena database.
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("datasetName", type=str)
+ parser.add_argument("outfile", type=str)
+ args = parser.parse_args()
+
+ fp2 = open(args.outfile, "w")
+ xenaFileDir = xena.fileDir()
+
+ # Set up the xena delete comamnd and try to execute it. If an exception
+ # is generated, output a traceback and exit with nonzero status. If
+ # no exception was generated, indicate a successful import and exit
+ # with zero status.
+ xenaDeleteCmd = "java -jar %s --delete %s -p %s" % (xena.jarPath(),
+ args.datasetName,
+ xena.port())
+ try:
+ subprocess.call(xenaDeleteCmd, shell=True)
+ except:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+ allLines = ''.join('!! ' + line for line in lines)
+ fp2.write("Unsuccessful: error %s\n" % allLines)
+ fp2.close()
+ sys.exit(-1)
+ else:
+ fp2.write( "Dataset %s deleted\n" % args.datasetName)
+ fp2.close()
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 8bb037f88ed2 xena_delete.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_delete.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,17 @@
+
+
+ Given the name of a dataset, delete it from the local Xena-embedded Galaxy.
+
+
+ xena_delete.py $dataset $outfile
+
+
+
+
+
+
+
+
+ This tool will delete a dataset from the local Galaxy-embedded Xena database, given the name of the dataset. The names of the datasets can be seen through the Xena Data Hub tool, by selecting the local Galaxy-embedded Xena as the hub and viewing the datasets contained in that hub. This tool works only for the local Xena-embedded Galaxy.
+
+
diff -r 000000000000 -r 8bb037f88ed2 xena_import.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_import.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+"""
+ xena_import.py: import a dataset into Xena
+
+ Given a cmdline-specified genomic data file and a cmdline-specified Xena
+ directory, import the genomic data fle into Xena. This requires assembling
+ the necessary json file, based on cmdline input.
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def updateColNormalization(jsonMetadata):
+ """Set colNormalization to TRUE if the data is of some expression-related
+ subtype and if colNormalization has not been set"""
+ if not jsonMetadata.has_key("colNormalization"):
+ if jsonMetadata.has_key("dataSubType"):
+ if re.search("expression", jsonMetadata['dataSubType'],
+ re.IGNORECASE):
+ jsonMetadata["colNormalization"] = "true"
+
+def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath):
+ """Take an existing JSON file. Read the contents, and check for any
+ content that might be invalid in the local installation. Delete that
+ content. Write the rest to the indicated output pathname"""
+ problemFields = [":probeMap"]
+ fileContents = open(inputJsonPath).read()
+ jsonMetadata = json.loads(fileContents)
+ for thisProblem in problemFields:
+ if jsonMetadata.has_key(thisProblem):
+ del jsonMetadata[thisProblem]
+ updateColNormalization(jsonMetadata)
+ fp = open(outputJsonPath, "w")
+ fp.write("%s\n" % json.dumps(jsonMetadata, indent=2))
+ fp.close()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("genomicDataPath", type=str)
+ parser.add_argument("outfile", type=str)
+ parser.add_argument("--json", type=str, default=None)
+ parser.add_argument("--cohort", type=str)
+ parser.add_argument("--type", type=str)
+ parser.add_argument("--dataSubType", type=str, default=None)
+ parser.add_argument("--label", type=str, default=None)
+ args = parser.parse_args()
+
+ fp2 = open(args.outfile, "w")
+ fp2.write("Importing data to Xena\n")
+ xenaFileDir = xena.fileDir()
+ genomicDataFile = args.genomicDataPath.split("/")[-1]
+ jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir,
+ genomicDataFile)
+
+
+ # The metadata either came as the name of a JSON file or a series of
+ # command line arguments.
+ if args.json is not None:
+ # In this case, the metadata came in the form of a JSON file.
+ # Verify that the metadata is valid on the current system, which
+ # might mean altering it. Import the stuff that will validate.
+ verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname)
+ else:
+ # In this case, the metadata came in the form of a series of
+ # command line arguments. Assemble them into JSON format,
+ # and write a JSON file into the Xena file directory.
+ metadata = { 'cohort': args.cohort, 'type': args.type }
+ if args.dataSubType is not None:
+ metadata['dataSubType'] = args.dataSubType
+ if args.label is not None:
+ metadata['label'] = args.label
+ jsonMetadata = json.dumps(metadata, indent=2)
+ fp = open(jsonMetadataTargetPathname, "w")
+ fp.write("%s\n" % (jsonMetadata))
+ fp.close()
+
+ # Finally, copy the genomic data into the Xena directory
+ shutil.copy(args.genomicDataPath, xenaFileDir)
+
+ # Set up the xena load comamnd and try to execute it. If an exception
+ # is generated, output a traceback and exit with nonzero status. If
+ # no exception was generated, indicate a successful import and exit
+ # with zero status.
+ xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
+ xenaFileDir,
+ genomicDataFile,
+ xena.port())
+ try:
+ subprocess.call(xenaLoadCmd, shell=True)
+ except:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+ allLines = ''.join('!! ' + line for line in lines)
+ fp2.write("Unsuccessful: error %s\n" % allLines)
+ fp2.close()
+ sys.exit(-1)
+ else:
+ fp2.write( "Import successful\n")
+ fp2.close()
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 8bb037f88ed2 xena_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_import.xml Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,122 @@
+
+ Import from Galaxy into the Xena VM
+
+ installXena
+
+
+ xena_import.py ${xenaInputData} $outfile
+ #if $source.metadataSource == "json"
+ --json ${source.xenaMetadata}
+ #else
+ --cohort "${source.cohort}" --type ${source.format.metadataType}
+ #if $source.format.metadataType == "genomicMatrix" or $source.format.metadataType == "clinicalMatrix":
+ #if $source.format.isCustom.dataSubType == "custom"
+ #if $source.format.isCustom.customSubType
+ --dataSubType "${source.format.isCustom.customSubType}"
+ #end if
+ #else
+ --dataSubType "${source.format.isCustom.dataSubType}"
+ #end if
+ #end if
+ #if $source.label
+ --label "${source.label}"
+ #else
+ --label "${xenaInputData.name}"
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ To Appear soon...
+
+
diff -r 000000000000 -r 8bb037f88ed2 xena_query.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_query.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,157 @@
+"""
+Utilities for xena queries.
+
+A basic query example.
+Queries are scheme expressions.
+
+>>> import xena_query as xena
+>>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(+ 1 2)")
+'3.0'
+
+>>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(let [x 2 y (+ x 3)] (* x y))")
+'10.0'
+
+Looking up sample ids for the TCGA LGG cohort.
+
+>>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena",
+ xena.patient_to_sample_query("TCGA.LGG.sampleMap",
+ ["TCGA-CS-4938",
+ "TCGA-HT-7693",
+ "TCGA-CS-6665",
+ "TCGA-S9-A7J2",
+ "TCGA-FG-A6J3"]))
+'{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}'
+
+>>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena",
+ xena.find_sample_by_field_query("TCGA.LGG.sampleMap",
+ "_PATIENT",
+ ["TCGA-CS-4938",
+ "TCGA-HT-7693",
+ "TCGA-CS-6665",
+ "TCGA-S9-A7J2",
+ "TCGA-FG-A6J3"]))
+'{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}'
+>>> import json
+>>> json.loads(r)
+{u'TCGA.LGG.sampleMap': [u'TCGA-CS-4938-01', u'TCGA-CS-6665-01', u'TCGA-FG-A6J3-01', u'TCGA-HT-7693-01', u'TCGA-S9-A7J2-01']}
+"""
+
+import urllib2
+import re
+
+def compose1(f, g):
+ def composed(*args, **kwargs):
+ return f(g(*args, **kwargs))
+ return composed
+
+# funcitonal composition, e.g.
+# compose(f, g)(a, ...) == f(g(a, ...))
+compose = lambda *funcs: reduce(compose1, funcs)
+
+def quote(s):
+ return '"' + s + '"'
+
+def array_fmt(l):
+ return '[' + ', '.join((quote(s) for s in l)) + ']'
+
+# The strategy here is
+# o Do table scan on code to find codes matching field values
+# o Do IN query on unpack(field, x) to find rows matching codes
+# o Project to unpack(sample, x) to get sampleID code
+# o Join with code to get sampleID values
+#
+# Note the :limit on the table scan. This makes the table scan exit after we've
+# found enough values, rather than continuing to the end. We can do this because
+# enumerated values are unique. An alternative would be to index all the enumerated
+# values in the db.
+sample_query_str = """
+(let [cohort %s
+ field_id-dataset (car (query {:select [[:field.id :field_id] [:dataset.id :dataset]]
+ :from [:dataset]
+ :join [:field [:= :dataset_id :dataset.id]]
+ :where [:and [:= :cohort cohort]
+ [:= :field.name %s]]}))
+ values %s
+ field_id (:field_id field_id-dataset)
+ dataset (:dataset field_id-dataset)
+ sample (:id (car (query {:select [:field.id]
+ :from [:field]
+ :where [:and [:= :dataset_id dataset]
+ [:= :field.name "sampleID"]]})))
+ N (- (:rows (car (query {:select [:rows]
+ :from [:dataset]
+ :where [:= :id dataset]}))) 1)]
+ {cohort (map :value (query {:select [:value]
+ :from [{:select [:x #sql/call [:unpack field_id, :x]]
+ :from [#sql/call [:system_range 0 N]]
+ :where [:in #sql/call [:unpack field_id, :x] {:select [:ordering]
+ :from [:code]
+ :where [:and [:= :field_id field_id]
+ [:in :value values]]
+ :limit (count values)}]}]
+ :join [:code [:and [:= :field_id sample]
+ [:= :ordering #sql/call [:unpack sample :x]]]]}))})
+"""
+
+cohort_query_str = """
+(map :cohort (query {:select [:%distinct.cohort]
+ :from [:dataset]
+ :where [:not [:is nil :cohort]]}))
+"""
+
+datasets_list_in_cohort_query = """
+(map :text (query {:select [:text]
+ :from [:dataset]
+ :where [:= :cohort %s ]})
+"""
+
+datasets_type_pattern_str = """
+(map :name (query {:select [:name]
+ :from [:dataset]
+ :where [:and [:= :type %s]
+ [:like :name %s]]}))
+"""
+
+def find_sample_by_field_query(cohort, field, values):
+ """Return a xena query which looks up sample ids for the given field=values."""
+ return sample_query_str % (quote(cohort), quote(field), array_fmt(values))
+
+def patient_to_sample_query(cohort, patients):
+ """Return a xena query which looks up sample ids for the given patients."""
+ return find_sample_by_field_query(cohort, "_PATIENT", patients)
+
+headers = { 'Content-Type' : "text/plain" }
+
+def post(url, query):
+ """POST a xena data query to the given url."""
+ req = urllib2.Request(url + '/data/', query, headers)
+ response = urllib2.urlopen(req)
+ result = response.read()
+ return result
+
+def find_cohorts():
+ """ Return a list of cohorts on a host at a specific url """
+ """ return example: ["chinSF2007_public","TCGA.BRCA.sampleMap","cohort3"] """
+ return cohort_query_str
+
+def find_datasets_in_cohort(url, cohort):
+ """ Return a list of datasets in a specific cohort on server=url.
+ Each dataset is a dictionary of the data's metadata.
+ This should be refactored to be consistent with the other methods."""
+ return map(json.loads,
+ json.loads(post(url, datasets_list_in_cohort_query % (quote(cohort)))))
+
+def find_datasets_type_pattern(type, pattern):
+ """Return a xena query which returns a list of datasets
+ filtered by a pattern on the dataset name. The pattern is sql:
+ % is wildcard."""
+ return datasets_type_pattern_str % (quote(type), quote(pattern))
+
+
+def strip_first_url_dir(path):
+ return re.sub(r'^[^/]*', '', path)
+
+# proj//xena//
+# download//xena/
+def name_to_url(base_url, name):
+ return base_url.replace('/proj/', '/download/') + strip_first_url_dir(name)
diff -r 000000000000 -r 8bb037f88ed2 xena_restore.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_restore.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+"""
+ xena_restore.py: delete a dataset from Xena
+
+ Back up the Xena data to a user-specified external directory.
+"""
+
+import argparse
+import os
+import re
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def writeException(outFp, msg = None):
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+ allLines = ''.join('!! ' + line for line in lines)
+ if msg is None:
+ outFp.write("Unsuccessful: error %s\n" % allLines)
+ else:
+ outFp.write("%s\n%s" % (msg, allLines))
+
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("backupDir", type=str)
+ parser.add_argument("outfile", type=str)
+ args = parser.parse_args()
+
+ outFp = open(args.outfile, "w")
+ xenaFileDir = xena.fileDir()
+
+ for thisFile in os.listdir(args.backupDir):
+ # The files in the xena directory should be pairs of
+ # data and metadata files, with the metadata having the same
+ # name as the data file plus the .json suffix. When you find
+ # a json file in the backup directory, then copy the json and data
+ # file into the xena directory, starting with the json file, and
+ # execute the xena load command.
+ if re.search(".json$", thisFile):
+ jsonFile = thisFile
+ dataFile = re.sub(".json$", "", jsonFile)
+ assert(os.path.exists(args.backupDir + "/" + dataFile))
+ try:
+ shutil.copy(args.backupDir + "/" + jsonFile, xenaFileDir)
+ except:
+ writeException(outFp,
+ msg="Error: cannot restore %s from %s" \
+ % (jsonFile, args.backupDir))
+ outFp.close()
+ sys.exit(-1)
+ try:
+ shutil.copy(args.backupDir + "/" + dataFile, xenaFileDir)
+ except:
+ writeException(outFp,
+ msg="Error: cannot restore %s from %s" \
+ % (dataFile, args.backupDir))
+ outFp.close()
+ sys.exit(-1)
+ # Now set up the xena load command and try to execute it.
+ xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
+ xenaFileDir,
+ dataFile,
+ xena.port())
+ try:
+ subprocess.call(xenaLoadCmd, shell=True)
+ except:
+ writeException(outFp,
+ msg="Could not reload %s into Xena" % dataFile)
+ outFp.close()
+ sys.exit(-1)
+ # At this point, the restore should've been successful
+ outFp.write("Restore complete\n")
+ outFp.close()
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 8bb037f88ed2 xena_utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_utils.py Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+"""
+xenaUtils: a set of python utilities for the Galaxy / Xena interface
+"""
+
+import os
+import socket
+import subprocess
+
+def jarPath():
+ """Return the full pathname of the xena jar file"""
+ jarPath = os.getenv("XENA_JAR_PATH", "~")
+ return(os.path.join(jarPath, "xena.jar"))
+
+
+def baseDir():
+ return(os.getenv("XENA_BASE_DIR", "/tmp"))
+
+def fileDir():
+ return(baseDir() + "/files")
+
+def isRunning(xenaPort):
+ """Determine if Xena is running on the specified port"""
+ query = "wget -q -O- http://localhost:%s/data/'(+ 1 2)'" % xenaPort
+ try:
+ result = subprocess.check_output(query, shell=True)
+ except:
+ return False
+ else:
+ return(result == "3.0")
+
+
+def findUnusedPort():
+ """Find a random port that is available on the local system, and return
+ the port number.
+ """
+ ss = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ ss.bind(('', 0))
+ portNumber = ss.getsockname()[1]
+ ss.close()
+ return(portNumber)
+
+def isPortAvailable(port):
+ """Test if a given port is available"""
+ ss = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ ss.bind(('', port))
+ except:
+ return False
+ else:
+ ss.close()
+ return True
+
+
+def portFilename():
+ """ Return the name of the file with the port of the running Xena,
+ if any
+ """
+ xenaBaseDir = os.getenv("XENA_BASE_DIR", "~")
+ xenaPortFilename = xenaBaseDir + "/xena.port"
+ return(xenaPortFilename)
+
+
+
+
+def port(testIfAvailable=False, findNewPort=False):
+ preferredXenaPort = 7220
+ xenaPort = None
+ xenaPortFname = portFilename()
+ if os.path.exists(xenaPortFname):
+ fp = open(xenaPortFname)
+ line = fp.readline()
+ xenaPort = int(line.rstrip())
+ if testIfAvailable and not isRunning(xenaPort):
+ # Xena is not running on the port. Make sure that
+ # the port is not occupied by some other process
+ if not isPortAvailable(xenaPort):
+ #cmd = "lsof -t -i :%s -sTCP:LISTEN" % portID
+ #pid = subprocess.check_output(cmd, shell=True).rstrip()
+ #print "not available, used by",pid
+ xenaPort = None
+ if findNewPort and xenaPort == None:
+ if isPortAvailable(preferredXenaPort):
+ xenaPort = preferredXenaPort
+ else:
+ xenaPort = findUnusedPort()
+ fp = open(portFilename(), "w")
+ fp.write("%d\n" % xenaPort)
+ fp.close()
+ return(xenaPort)
+
+def cleanUpPort():
+ """ Clean up the port file after Xena has stopped running"""
+ os.unlink(portFilename())
+
+
+
diff -r 000000000000 -r 8bb037f88ed2 xena_utils.pyc
Binary file xena_utils.pyc has changed