diff xena_import.py @ 0:8bb037f88ed2

Uploaded
author melissacline
date Tue, 13 Jan 2015 23:37:23 -0500
parents
children 6eeb3ca663fb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xena_import.py	Tue Jan 13 23:37:23 2015 -0500
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+"""
+  xena_import.py: import a dataset into Xena
+
+  Given a cmdline-specified genomic data file and a cmdline-specified Xena 
+  directory, import the genomic data fle into Xena.  This requires assembling
+  the necessary json file, based on cmdline input.
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import traceback
+import xena_utils as xena
+
+def updateColNormalization(jsonMetadata):
+    """Set colNormalization to TRUE if the data is of some expression-related
+    subtype and if colNormalization has not been set"""
+    if not jsonMetadata.has_key("colNormalization"):
+        if jsonMetadata.has_key("dataSubType"):
+            if re.search("expression", jsonMetadata['dataSubType'], 
+                         re.IGNORECASE):
+                jsonMetadata["colNormalization"] = "true"
+
+def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath):
+    """Take an existing JSON file.  Read the contents, and check for any
+    content that might be invalid in the local installation.  Delete that
+    content.  Write the rest to the indicated output pathname"""
+    problemFields = [":probeMap"]
+    fileContents = open(inputJsonPath).read()
+    jsonMetadata = json.loads(fileContents)
+    for thisProblem in problemFields:
+        if jsonMetadata.has_key(thisProblem):
+            del jsonMetadata[thisProblem]
+    updateColNormalization(jsonMetadata)
+    fp = open(outputJsonPath, "w")
+    fp.write("%s\n" % json.dumps(jsonMetadata, indent=2))
+    fp.close()
+    
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("genomicDataPath", type=str)
+    parser.add_argument("outfile", type=str)
+    parser.add_argument("--json", type=str, default=None)
+    parser.add_argument("--cohort", type=str)
+    parser.add_argument("--type", type=str)
+    parser.add_argument("--dataSubType", type=str, default=None)
+    parser.add_argument("--label", type=str, default=None)
+    args = parser.parse_args()
+
+    fp2 = open(args.outfile, "w")
+    fp2.write("Importing data to Xena\n")
+    xenaFileDir = xena.fileDir()
+    genomicDataFile = args.genomicDataPath.split("/")[-1]
+    jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, 
+                                                 genomicDataFile)
+
+
+    # The metadata either came as the name of a JSON file or a series of 
+    # command line arguments.
+    if args.json is not None:
+        # In this case, the metadata came in the form of a JSON file.
+        # Verify that the metadata is valid on the current system, which
+        # might mean altering it.  Import the stuff that will validate.
+        verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname)
+    else:
+        # In this case, the metadata came in the form of a series of 
+        # command line arguments.  Assemble them into JSON format, 
+        # and write a JSON file into the Xena file directory.
+        metadata = { 'cohort': args.cohort, 'type': args.type }
+        if args.dataSubType is not None:
+            metadata['dataSubType'] = args.dataSubType
+        if args.label is not None:
+            metadata['label'] = args.label
+        jsonMetadata = json.dumps(metadata, indent=2) 
+        fp = open(jsonMetadataTargetPathname, "w")
+        fp.write("%s\n" % (jsonMetadata))
+        fp.close()
+    
+    # Finally, copy the genomic data into the Xena directory
+    shutil.copy(args.genomicDataPath, xenaFileDir)
+
+    # Set up the xena load comamnd and try to execute it.  If an exception
+    # is generated, output a traceback and exit with nonzero status.  If
+    # no exception was generated, indicate a successful import and exit
+    # with zero status.
+    xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
+                                                           xenaFileDir,
+                                                           genomicDataFile, 
+                                                           xena.port())
+    try:
+        subprocess.call(xenaLoadCmd, shell=True)
+    except:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+        allLines = ''.join('!! ' + line for line in lines)
+        fp2.write("Unsuccessful: error %s\n" % allLines)
+        fp2.close()
+        sys.exit(-1)
+    else:
+        fp2.write( "Import successful\n")
+        fp2.close()
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()