Mercurial > repos > melissacline > ucsc_xena_platform
annotate xena_import.py @ 31:bb84d3bc5308
make Xena VM already running on ... message more useful for adding the data hub
| author | jingchunzhu <jingchunzhu@gmail.com> |
|---|---|
| date | Tue, 21 Jul 2015 21:09:37 -0700 |
| parents | 6eeb3ca663fb |
| children | d8dc482ef970 |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 | |
| 3 """ | |
| 4 xena_import.py: import a dataset into Xena | |
| 5 | |
| 6 Given a cmdline-specified genomic data file and a cmdline-specified Xena | |
| 7 directory, import the genomic data fle into Xena. This requires assembling | |
| 8 the necessary json file, based on cmdline input. | |
| 9 """ | |
| 10 | |
| 11 import argparse | |
| 12 import json | |
| 13 import os | |
| 14 import re | |
| 15 import shutil | |
| 16 import subprocess | |
| 17 import sys | |
| 18 import traceback | |
| 19 import xena_utils as xena | |
| 20 | |
| 21 def updateColNormalization(jsonMetadata): | |
| 22 """Set colNormalization to TRUE if the data is of some expression-related | |
| 23 subtype and if colNormalization has not been set""" | |
| 24 if not jsonMetadata.has_key("colNormalization"): | |
| 25 if jsonMetadata.has_key("dataSubType"): | |
| 26 if re.search("expression", jsonMetadata['dataSubType'], | |
| 27 re.IGNORECASE): | |
| 28 jsonMetadata["colNormalization"] = "true" | |
| 29 | |
| 30 def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath): | |
| 31 """Take an existing JSON file. Read the contents, and check for any | |
| 32 content that might be invalid in the local installation. Delete that | |
| 33 content. Write the rest to the indicated output pathname""" | |
| 34 problemFields = [":probeMap"] | |
| 35 fileContents = open(inputJsonPath).read() | |
| 36 jsonMetadata = json.loads(fileContents) | |
| 37 for thisProblem in problemFields: | |
| 38 if jsonMetadata.has_key(thisProblem): | |
| 39 del jsonMetadata[thisProblem] | |
| 40 updateColNormalization(jsonMetadata) | |
| 41 fp = open(outputJsonPath, "w") | |
| 42 fp.write("%s\n" % json.dumps(jsonMetadata, indent=2)) | |
| 43 fp.close() | |
| 44 | |
| 45 | |
| 46 def main(): | |
| 47 parser = argparse.ArgumentParser() | |
| 48 parser.add_argument("genomicDataPath", type=str) | |
| 49 parser.add_argument("outfile", type=str) | |
| 50 parser.add_argument("--json", type=str, default=None) | |
| 51 parser.add_argument("--cohort", type=str) | |
| 52 parser.add_argument("--type", type=str) | |
| 53 parser.add_argument("--dataSubType", type=str, default=None) | |
| 54 parser.add_argument("--label", type=str, default=None) | |
|
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
55 parser.add_argument("--colNormalization", type=bool, default=False) |
| 0 | 56 args = parser.parse_args() |
| 57 | |
| 58 fp2 = open(args.outfile, "w") | |
| 59 fp2.write("Importing data to Xena\n") | |
| 60 xenaFileDir = xena.fileDir() | |
| 61 genomicDataFile = args.genomicDataPath.split("/")[-1] | |
| 62 jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, | |
| 63 genomicDataFile) | |
| 64 | |
| 65 | |
| 66 # The metadata either came as the name of a JSON file or a series of | |
| 67 # command line arguments. | |
| 68 if args.json is not None: | |
| 69 # In this case, the metadata came in the form of a JSON file. | |
| 70 # Verify that the metadata is valid on the current system, which | |
| 71 # might mean altering it. Import the stuff that will validate. | |
| 72 verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname) | |
| 73 else: | |
| 74 # In this case, the metadata came in the form of a series of | |
| 75 # command line arguments. Assemble them into JSON format, | |
| 76 # and write a JSON file into the Xena file directory. | |
| 77 metadata = { 'cohort': args.cohort, 'type': args.type } | |
| 78 if args.dataSubType is not None: | |
| 79 metadata['dataSubType'] = args.dataSubType | |
| 80 if args.label is not None: | |
| 81 metadata['label'] = args.label | |
|
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
82 if args.colNormalization: |
|
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
83 metadata['colNormalization'] = "true" |
|
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
84 else: |
|
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
85 updateColNormalization(metadata) |
| 0 | 86 jsonMetadata = json.dumps(metadata, indent=2) |
| 87 fp = open(jsonMetadataTargetPathname, "w") | |
| 88 fp.write("%s\n" % (jsonMetadata)) | |
| 89 fp.close() | |
| 90 | |
| 91 # Finally, copy the genomic data into the Xena directory | |
| 92 shutil.copy(args.genomicDataPath, xenaFileDir) | |
| 93 | |
| 94 # Set up the xena load comamnd and try to execute it. If an exception | |
| 95 # is generated, output a traceback and exit with nonzero status. If | |
| 96 # no exception was generated, indicate a successful import and exit | |
| 97 # with zero status. | |
| 98 xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), | |
| 99 xenaFileDir, | |
| 100 genomicDataFile, | |
| 101 xena.port()) | |
| 102 try: | |
| 103 subprocess.call(xenaLoadCmd, shell=True) | |
| 104 except: | |
| 105 exc_type, exc_value, exc_traceback = sys.exc_info() | |
| 106 lines = traceback.format_exception(exc_type, exc_value, exc_traceback) | |
| 107 allLines = ''.join('!! ' + line for line in lines) | |
| 108 fp2.write("Unsuccessful: error %s\n" % allLines) | |
| 109 fp2.close() | |
| 110 sys.exit(-1) | |
| 111 else: | |
| 112 fp2.write( "Import successful\n") | |
| 113 fp2.close() | |
| 114 sys.exit(0) | |
| 115 | |
| 116 | |
| 117 if __name__ == "__main__": | |
| 118 main() |
