view xena_import.py @ 55:421b18a0b659 default tip

update v17 step 2, add xena.jar
author jingchunzhu
date Tue, 22 Sep 2015 10:07:51 -0700
parents 8da6920a39ac
children
line wrap: on
line source

#!/usr/bin/env python

"""
  xena_import.py: import a dataset into Xena

  Given a cmdline-specified genomic data file and a cmdline-specified Xena 
  directory, import the genomic data fle into Xena.  This requires assembling
  the necessary json file, based on cmdline input.
"""

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
import traceback
import xena_utils as xena

def updateColNormalization(jsonMetadata):
    """Set colNormalization to TRUE if the data is of some expression-related
    subtype and if colNormalization has not been set"""
    if not jsonMetadata.has_key("colNormalization"):
        if jsonMetadata.has_key("dataSubType"):
            if re.search("expression", jsonMetadata['dataSubType'], 
                         re.IGNORECASE):
                jsonMetadata["colNormalization"] = "true"

def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath):
    """Take an existing JSON file.  Read the contents, and check for any
    content that might be invalid in the local installation.  Delete that
    content.  Write the rest to the indicated output pathname"""
    problemFields = [":probeMap"]
    fileContents = open(inputJsonPath).read()
    jsonMetadata = json.loads(fileContents)
    for thisProblem in problemFields:
        if jsonMetadata.has_key(thisProblem):
            del jsonMetadata[thisProblem]
    updateColNormalization(jsonMetadata)
    fp = open(outputJsonPath, "w")
    fp.write("%s\n" % json.dumps(jsonMetadata, indent=2))
    fp.close()
    

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("genomicDataPath", type=str)
    parser.add_argument("outfile", type=str)
    parser.add_argument("--json", type=str, default=None)
    parser.add_argument("--cohort", type=str)
    parser.add_argument("--type", type=str)
    parser.add_argument("--dataSubType", type=str, default=None)
    parser.add_argument("--label", type=str, default=None)
    parser.add_argument("--colNormalization", type=bool, default=False)
    parser.add_argument("--probeMap", type=str, default=None)
    parser.add_argument("--assembly", type=str, default=None)
    args = parser.parse_args()

    fp2 = open(args.outfile, "w")
    fp2.write("Importing data to Xena\n")
    xenaFileDir = xena.fileDir()
    genomicDataFile = args.genomicDataPath.split("/")[-1]
    jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, 
                                                 genomicDataFile)

    if args.probeMap is not None:
        probeMapDataFile = args.probeMap.split("/")[-1]
        jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, 
                                                             probeMapDataFile)

    # The metadata either came as the name of a JSON file or a series of 
    # command line arguments.
    if args.json is not None:
        # In this case, the metadata came in the form of a JSON file.
        # Verify that the metadata is valid on the current system, which
        # might mean altering it.  Import the stuff that will validate.
        verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname)
    else:
        # In this case, the metadata came in the form of a series of 
        # command line arguments.  Assemble them into JSON format, 
        # and write a JSON file into the Xena file directory.
        metadata = { 'cohort': args.cohort, 'type': args.type }
        if args.dataSubType is not None:
            metadata['dataSubType'] = args.dataSubType
        if args.label is not None:
            metadata['label'] = args.label
        if args.colNormalization:
            metadata['colNormalization'] = "true"
        else:
            updateColNormalization(metadata)
        if args.probeMap is not None:
            metadata[':probeMap'] = probeMapDataFile
        if args.assembly:
            metadata['assembly'] = args.assembly
        jsonMetadata = json.dumps(metadata, indent=2) 
        fp = open(jsonMetadataTargetPathname, "w")
        fp.write("%s\n" % (jsonMetadata))
        fp.close()
    
        if args.probeMap is not None:
            probeMapMetadata={"type":"probeMap"}
            jsonProbeMapMetadata = json.dumps(probeMapMetadata, indent=2)
            jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, 
                                                                 probeMapDataFile)
            fp = open(jsonProbeMapMetadataTargetPathname, "w")
            fp.write("%s\n" % ( jsonProbeMapMetadata))
            fp.close()

    # Finally, copy the genomic data into the Xena directory
    shutil.copy(args.genomicDataPath, xenaFileDir)
    if args.probeMap is not None:
        shutil.copy(args.probeMap, xenaFileDir)

    # Set up the xena load comamnd and try to execute it.  If an exception
    # is generated, output a traceback and exit with nonzero status.  If
    # no exception was generated, indicate a successful import and exit
    # with zero status.
    xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
                                                           xenaFileDir,
                                                           genomicDataFile, 
                                                           xena.port())
    if args.label is not None:
        name = args.label
    else:
        name = genomicDataFile
    loadInToXena (xenaLoadCmd, name, fp2)

    if args.probeMap is not None:
        xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
                                                               xenaFileDir,
                                                               probeMapDataFile,
                                                               xena.port())
        loadInToXena (xenaLoadCmd, "id to gene mapping file for %s" % (name), fp2)
    fp2.close()

def loadInToXena (xenaLoadCmd, name, fp2):
    print xenaLoadCmd
    try:
        subprocess.call(xenaLoadCmd, shell=True)
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        allLines = ''.join('!! ' + line for line in lines)
        fp2.write("Unsuccessful: Import %s\n" % (name))
        fp2.write("Unsuccessful: error %s\n" % allLines)
    else:
        fp2.write( "Importing %s is successful\n" % (name))

if __name__ == "__main__":
    main()