view xena_import.py @ 27:5c7df318e87b

Uploaded
author melissacline
date Tue, 02 Jun 2015 19:26:17 -0400
parents 6eeb3ca663fb
children d8dc482ef970
line wrap: on
line source

#!/usr/bin/env python

"""
  xena_import.py: import a dataset into Xena

  Given a cmdline-specified genomic data file and a cmdline-specified Xena 
  directory, import the genomic data fle into Xena.  This requires assembling
  the necessary json file, based on cmdline input.
"""

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
import traceback
import xena_utils as xena

def updateColNormalization(jsonMetadata):
    """Set colNormalization to TRUE if the data is of some expression-related
    subtype and if colNormalization has not been set"""
    if not jsonMetadata.has_key("colNormalization"):
        if jsonMetadata.has_key("dataSubType"):
            if re.search("expression", jsonMetadata['dataSubType'], 
                         re.IGNORECASE):
                jsonMetadata["colNormalization"] = "true"

def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath):
    """Take an existing JSON file.  Read the contents, and check for any
    content that might be invalid in the local installation.  Delete that
    content.  Write the rest to the indicated output pathname"""
    problemFields = [":probeMap"]
    fileContents = open(inputJsonPath).read()
    jsonMetadata = json.loads(fileContents)
    for thisProblem in problemFields:
        if jsonMetadata.has_key(thisProblem):
            del jsonMetadata[thisProblem]
    updateColNormalization(jsonMetadata)
    fp = open(outputJsonPath, "w")
    fp.write("%s\n" % json.dumps(jsonMetadata, indent=2))
    fp.close()
    

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("genomicDataPath", type=str)
    parser.add_argument("outfile", type=str)
    parser.add_argument("--json", type=str, default=None)
    parser.add_argument("--cohort", type=str)
    parser.add_argument("--type", type=str)
    parser.add_argument("--dataSubType", type=str, default=None)
    parser.add_argument("--label", type=str, default=None)
    parser.add_argument("--colNormalization", type=bool, default=False)
    args = parser.parse_args()

    fp2 = open(args.outfile, "w")
    fp2.write("Importing data to Xena\n")
    xenaFileDir = xena.fileDir()
    genomicDataFile = args.genomicDataPath.split("/")[-1]
    jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, 
                                                 genomicDataFile)


    # The metadata either came as the name of a JSON file or a series of 
    # command line arguments.
    if args.json is not None:
        # In this case, the metadata came in the form of a JSON file.
        # Verify that the metadata is valid on the current system, which
        # might mean altering it.  Import the stuff that will validate.
        verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname)
    else:
        # In this case, the metadata came in the form of a series of 
        # command line arguments.  Assemble them into JSON format, 
        # and write a JSON file into the Xena file directory.
        metadata = { 'cohort': args.cohort, 'type': args.type }
        if args.dataSubType is not None:
            metadata['dataSubType'] = args.dataSubType
        if args.label is not None:
            metadata['label'] = args.label
        if args.colNormalization:
            metadata['colNormalization'] = "true"
        else:
            updateColNormalization(metadata)
        jsonMetadata = json.dumps(metadata, indent=2) 
        fp = open(jsonMetadataTargetPathname, "w")
        fp.write("%s\n" % (jsonMetadata))
        fp.close()
    
    # Finally, copy the genomic data into the Xena directory
    shutil.copy(args.genomicDataPath, xenaFileDir)

    # Set up the xena load comamnd and try to execute it.  If an exception
    # is generated, output a traceback and exit with nonzero status.  If
    # no exception was generated, indicate a successful import and exit
    # with zero status.
    xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(),
                                                           xenaFileDir,
                                                           genomicDataFile, 
                                                           xena.port())
    try:
        subprocess.call(xenaLoadCmd, shell=True)
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        allLines = ''.join('!! ' + line for line in lines)
        fp2.write("Unsuccessful: error %s\n" % allLines)
        fp2.close()
        sys.exit(-1)
    else:
        fp2.write( "Import successful\n")
        fp2.close()
        sys.exit(0)


if __name__ == "__main__":
    main()