Mercurial > repos > melissacline > ucsc_xena_platform
diff xena_import.py @ 0:8bb037f88ed2
Uploaded
author | melissacline |
---|---|
date | Tue, 13 Jan 2015 23:37:23 -0500 |
parents | |
children | 6eeb3ca663fb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xena_import.py Tue Jan 13 23:37:23 2015 -0500 @@ -0,0 +1,113 @@ +#!/usr/bin/env python + +""" + xena_import.py: import a dataset into Xena + + Given a cmdline-specified genomic data file and a cmdline-specified Xena + directory, import the genomic data fle into Xena. This requires assembling + the necessary json file, based on cmdline input. +""" + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +import traceback +import xena_utils as xena + +def updateColNormalization(jsonMetadata): + """Set colNormalization to TRUE if the data is of some expression-related + subtype and if colNormalization has not been set""" + if not jsonMetadata.has_key("colNormalization"): + if jsonMetadata.has_key("dataSubType"): + if re.search("expression", jsonMetadata['dataSubType'], + re.IGNORECASE): + jsonMetadata["colNormalization"] = "true" + +def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath): + """Take an existing JSON file. Read the contents, and check for any + content that might be invalid in the local installation. Delete that + content. Write the rest to the indicated output pathname""" + problemFields = [":probeMap"] + fileContents = open(inputJsonPath).read() + jsonMetadata = json.loads(fileContents) + for thisProblem in problemFields: + if jsonMetadata.has_key(thisProblem): + del jsonMetadata[thisProblem] + updateColNormalization(jsonMetadata) + fp = open(outputJsonPath, "w") + fp.write("%s\n" % json.dumps(jsonMetadata, indent=2)) + fp.close() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("genomicDataPath", type=str) + parser.add_argument("outfile", type=str) + parser.add_argument("--json", type=str, default=None) + parser.add_argument("--cohort", type=str) + parser.add_argument("--type", type=str) + parser.add_argument("--dataSubType", type=str, default=None) + parser.add_argument("--label", type=str, default=None) + args = parser.parse_args() + + fp2 = open(args.outfile, "w") + fp2.write("Importing data to Xena\n") + xenaFileDir = xena.fileDir() + genomicDataFile = args.genomicDataPath.split("/")[-1] + jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, + genomicDataFile) + + + # The metadata either came as the name of a JSON file or a series of + # command line arguments. + if args.json is not None: + # In this case, the metadata came in the form of a JSON file. + # Verify that the metadata is valid on the current system, which + # might mean altering it. Import the stuff that will validate. + verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname) + else: + # In this case, the metadata came in the form of a series of + # command line arguments. Assemble them into JSON format, + # and write a JSON file into the Xena file directory. + metadata = { 'cohort': args.cohort, 'type': args.type } + if args.dataSubType is not None: + metadata['dataSubType'] = args.dataSubType + if args.label is not None: + metadata['label'] = args.label + jsonMetadata = json.dumps(metadata, indent=2) + fp = open(jsonMetadataTargetPathname, "w") + fp.write("%s\n" % (jsonMetadata)) + fp.close() + + # Finally, copy the genomic data into the Xena directory + shutil.copy(args.genomicDataPath, xenaFileDir) + + # Set up the xena load comamnd and try to execute it. If an exception + # is generated, output a traceback and exit with nonzero status. If + # no exception was generated, indicate a successful import and exit + # with zero status. + xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), + xenaFileDir, + genomicDataFile, + xena.port()) + try: + subprocess.call(xenaLoadCmd, shell=True) + except: + exc_type, exc_value, exc_traceback = sys.exc_info() + lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + allLines = ''.join('!! ' + line for line in lines) + fp2.write("Unsuccessful: error %s\n" % allLines) + fp2.close() + sys.exit(-1) + else: + fp2.write( "Import successful\n") + fp2.close() + sys.exit(0) + + +if __name__ == "__main__": + main()