Mercurial > repos > melissacline > ucsc_xena_platform
view xena_import.py @ 28:e1a4d4552ec0
xena version 15
author | jingchunzhu@gmail.com |
---|---|
date | Mon, 13 Jul 2015 14:17:54 -0700 |
parents | 6eeb3ca663fb |
children | d8dc482ef970 |
line wrap: on
line source
#!/usr/bin/env python """ xena_import.py: import a dataset into Xena Given a cmdline-specified genomic data file and a cmdline-specified Xena directory, import the genomic data fle into Xena. This requires assembling the necessary json file, based on cmdline input. """ import argparse import json import os import re import shutil import subprocess import sys import traceback import xena_utils as xena def updateColNormalization(jsonMetadata): """Set colNormalization to TRUE if the data is of some expression-related subtype and if colNormalization has not been set""" if not jsonMetadata.has_key("colNormalization"): if jsonMetadata.has_key("dataSubType"): if re.search("expression", jsonMetadata['dataSubType'], re.IGNORECASE): jsonMetadata["colNormalization"] = "true" def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath): """Take an existing JSON file. Read the contents, and check for any content that might be invalid in the local installation. Delete that content. Write the rest to the indicated output pathname""" problemFields = [":probeMap"] fileContents = open(inputJsonPath).read() jsonMetadata = json.loads(fileContents) for thisProblem in problemFields: if jsonMetadata.has_key(thisProblem): del jsonMetadata[thisProblem] updateColNormalization(jsonMetadata) fp = open(outputJsonPath, "w") fp.write("%s\n" % json.dumps(jsonMetadata, indent=2)) fp.close() def main(): parser = argparse.ArgumentParser() parser.add_argument("genomicDataPath", type=str) parser.add_argument("outfile", type=str) parser.add_argument("--json", type=str, default=None) parser.add_argument("--cohort", type=str) parser.add_argument("--type", type=str) parser.add_argument("--dataSubType", type=str, default=None) parser.add_argument("--label", type=str, default=None) parser.add_argument("--colNormalization", type=bool, default=False) args = parser.parse_args() fp2 = open(args.outfile, "w") fp2.write("Importing data to Xena\n") xenaFileDir = xena.fileDir() genomicDataFile = args.genomicDataPath.split("/")[-1] jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, genomicDataFile) # The metadata either came as the name of a JSON file or a series of # command line arguments. if args.json is not None: # In this case, the metadata came in the form of a JSON file. # Verify that the metadata is valid on the current system, which # might mean altering it. Import the stuff that will validate. verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname) else: # In this case, the metadata came in the form of a series of # command line arguments. Assemble them into JSON format, # and write a JSON file into the Xena file directory. metadata = { 'cohort': args.cohort, 'type': args.type } if args.dataSubType is not None: metadata['dataSubType'] = args.dataSubType if args.label is not None: metadata['label'] = args.label if args.colNormalization: metadata['colNormalization'] = "true" else: updateColNormalization(metadata) jsonMetadata = json.dumps(metadata, indent=2) fp = open(jsonMetadataTargetPathname, "w") fp.write("%s\n" % (jsonMetadata)) fp.close() # Finally, copy the genomic data into the Xena directory shutil.copy(args.genomicDataPath, xenaFileDir) # Set up the xena load comamnd and try to execute it. If an exception # is generated, output a traceback and exit with nonzero status. If # no exception was generated, indicate a successful import and exit # with zero status. xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), xenaFileDir, genomicDataFile, xena.port()) try: subprocess.call(xenaLoadCmd, shell=True) except: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) allLines = ''.join('!! ' + line for line in lines) fp2.write("Unsuccessful: error %s\n" % allLines) fp2.close() sys.exit(-1) else: fp2.write( "Import successful\n") fp2.close() sys.exit(0) if __name__ == "__main__": main()