Mercurial > repos > melissacline > ucsc_xena_platform
view xena_import.py @ 47:b19e848f9886
update xena to v16
author | jingchunzhu |
---|---|
date | Fri, 07 Aug 2015 17:01:10 -0700 |
parents | 1ef1886dae04 |
children | 8da6920a39ac |
line wrap: on
line source
#!/usr/bin/env python """ xena_import.py: import a dataset into Xena Given a cmdline-specified genomic data file and a cmdline-specified Xena directory, import the genomic data fle into Xena. This requires assembling the necessary json file, based on cmdline input. """ import argparse import json import os import re import shutil import subprocess import sys import traceback import xena_utils as xena def updateColNormalization(jsonMetadata): """Set colNormalization to TRUE if the data is of some expression-related subtype and if colNormalization has not been set""" if not jsonMetadata.has_key("colNormalization"): if jsonMetadata.has_key("dataSubType"): if re.search("expression", jsonMetadata['dataSubType'], re.IGNORECASE): jsonMetadata["colNormalization"] = "true" def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath): """Take an existing JSON file. Read the contents, and check for any content that might be invalid in the local installation. Delete that content. Write the rest to the indicated output pathname""" problemFields = [":probeMap"] fileContents = open(inputJsonPath).read() jsonMetadata = json.loads(fileContents) for thisProblem in problemFields: if jsonMetadata.has_key(thisProblem): del jsonMetadata[thisProblem] updateColNormalization(jsonMetadata) fp = open(outputJsonPath, "w") fp.write("%s\n" % json.dumps(jsonMetadata, indent=2)) fp.close() def main(): parser = argparse.ArgumentParser() parser.add_argument("genomicDataPath", type=str) parser.add_argument("outfile", type=str) parser.add_argument("--json", type=str, default=None) parser.add_argument("--cohort", type=str) parser.add_argument("--type", type=str) parser.add_argument("--dataSubType", type=str, default=None) parser.add_argument("--label", type=str, default=None) parser.add_argument("--colNormalization", type=bool, default=False) parser.add_argument("--probeMap", type=str, default=None) args = parser.parse_args() fp2 = open(args.outfile, "w") fp2.write("Importing data to Xena\n") xenaFileDir = xena.fileDir() genomicDataFile = args.genomicDataPath.split("/")[-1] jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, genomicDataFile) if args.probeMap is not None: probeMapDataFile = args.probeMap.split("/")[-1] jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, probeMapDataFile) # The metadata either came as the name of a JSON file or a series of # command line arguments. if args.json is not None: # In this case, the metadata came in the form of a JSON file. # Verify that the metadata is valid on the current system, which # might mean altering it. Import the stuff that will validate. verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname) else: # In this case, the metadata came in the form of a series of # command line arguments. Assemble them into JSON format, # and write a JSON file into the Xena file directory. metadata = { 'cohort': args.cohort, 'type': args.type } if args.dataSubType is not None: metadata['dataSubType'] = args.dataSubType if args.label is not None: metadata['label'] = args.label if args.colNormalization: metadata['colNormalization'] = "true" else: updateColNormalization(metadata) if args.probeMap is not None: metadata[':probeMap'] = probeMapDataFile jsonMetadata = json.dumps(metadata, indent=2) fp = open(jsonMetadataTargetPathname, "w") fp.write("%s\n" % (jsonMetadata)) fp.close() if args.probeMap is not None: probeMapMetadata={"type":"probeMap"} jsonProbeMapMetadata = json.dumps(probeMapMetadata, indent=2) jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, probeMapDataFile) fp = open(jsonProbeMapMetadataTargetPathname, "w") fp.write("%s\n" % ( jsonProbeMapMetadata)) fp.close() # Finally, copy the genomic data into the Xena directory shutil.copy(args.genomicDataPath, xenaFileDir) if args.probeMap is not None: shutil.copy(args.probeMap, xenaFileDir) # Set up the xena load comamnd and try to execute it. If an exception # is generated, output a traceback and exit with nonzero status. If # no exception was generated, indicate a successful import and exit # with zero status. xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), xenaFileDir, genomicDataFile, xena.port()) if args.label is not None: name = args.label else: name = genomicDataFile loadInToXena (xenaLoadCmd, name, fp2) if args.probeMap is not None: xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), xenaFileDir, probeMapDataFile, xena.port()) loadInToXena (xenaLoadCmd, "id to gene mapping file for %s" % (name), fp2) fp2.close() def loadInToXena (xenaLoadCmd, name, fp2): print xenaLoadCmd try: subprocess.call(xenaLoadCmd, shell=True) except: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) allLines = ''.join('!! ' + line for line in lines) fp2.write("Unsuccessful: Import %s\n" % (name)) fp2.write("Unsuccessful: error %s\n" % allLines) else: fp2.write( "Importing %s is successful\n" % (name)) if __name__ == "__main__": main()