Mercurial > repos > melissacline > ucsc_xena_platform
annotate xena_import.py @ 2:6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
author | melissacline |
---|---|
date | Wed, 14 Jan 2015 15:21:09 -0800 |
parents | 8bb037f88ed2 |
children | d8dc482ef970 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 """ | |
4 xena_import.py: import a dataset into Xena | |
5 | |
6 Given a cmdline-specified genomic data file and a cmdline-specified Xena | |
7 directory, import the genomic data fle into Xena. This requires assembling | |
8 the necessary json file, based on cmdline input. | |
9 """ | |
10 | |
11 import argparse | |
12 import json | |
13 import os | |
14 import re | |
15 import shutil | |
16 import subprocess | |
17 import sys | |
18 import traceback | |
19 import xena_utils as xena | |
20 | |
21 def updateColNormalization(jsonMetadata): | |
22 """Set colNormalization to TRUE if the data is of some expression-related | |
23 subtype and if colNormalization has not been set""" | |
24 if not jsonMetadata.has_key("colNormalization"): | |
25 if jsonMetadata.has_key("dataSubType"): | |
26 if re.search("expression", jsonMetadata['dataSubType'], | |
27 re.IGNORECASE): | |
28 jsonMetadata["colNormalization"] = "true" | |
29 | |
30 def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath): | |
31 """Take an existing JSON file. Read the contents, and check for any | |
32 content that might be invalid in the local installation. Delete that | |
33 content. Write the rest to the indicated output pathname""" | |
34 problemFields = [":probeMap"] | |
35 fileContents = open(inputJsonPath).read() | |
36 jsonMetadata = json.loads(fileContents) | |
37 for thisProblem in problemFields: | |
38 if jsonMetadata.has_key(thisProblem): | |
39 del jsonMetadata[thisProblem] | |
40 updateColNormalization(jsonMetadata) | |
41 fp = open(outputJsonPath, "w") | |
42 fp.write("%s\n" % json.dumps(jsonMetadata, indent=2)) | |
43 fp.close() | |
44 | |
45 | |
46 def main(): | |
47 parser = argparse.ArgumentParser() | |
48 parser.add_argument("genomicDataPath", type=str) | |
49 parser.add_argument("outfile", type=str) | |
50 parser.add_argument("--json", type=str, default=None) | |
51 parser.add_argument("--cohort", type=str) | |
52 parser.add_argument("--type", type=str) | |
53 parser.add_argument("--dataSubType", type=str, default=None) | |
54 parser.add_argument("--label", type=str, default=None) | |
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
55 parser.add_argument("--colNormalization", type=bool, default=False) |
0 | 56 args = parser.parse_args() |
57 | |
58 fp2 = open(args.outfile, "w") | |
59 fp2.write("Importing data to Xena\n") | |
60 xenaFileDir = xena.fileDir() | |
61 genomicDataFile = args.genomicDataPath.split("/")[-1] | |
62 jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, | |
63 genomicDataFile) | |
64 | |
65 | |
66 # The metadata either came as the name of a JSON file or a series of | |
67 # command line arguments. | |
68 if args.json is not None: | |
69 # In this case, the metadata came in the form of a JSON file. | |
70 # Verify that the metadata is valid on the current system, which | |
71 # might mean altering it. Import the stuff that will validate. | |
72 verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname) | |
73 else: | |
74 # In this case, the metadata came in the form of a series of | |
75 # command line arguments. Assemble them into JSON format, | |
76 # and write a JSON file into the Xena file directory. | |
77 metadata = { 'cohort': args.cohort, 'type': args.type } | |
78 if args.dataSubType is not None: | |
79 metadata['dataSubType'] = args.dataSubType | |
80 if args.label is not None: | |
81 metadata['label'] = args.label | |
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
82 if args.colNormalization: |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
83 metadata['colNormalization'] = "true" |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
84 else: |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
85 updateColNormalization(metadata) |
0 | 86 jsonMetadata = json.dumps(metadata, indent=2) |
87 fp = open(jsonMetadataTargetPathname, "w") | |
88 fp.write("%s\n" % (jsonMetadata)) | |
89 fp.close() | |
90 | |
91 # Finally, copy the genomic data into the Xena directory | |
92 shutil.copy(args.genomicDataPath, xenaFileDir) | |
93 | |
94 # Set up the xena load comamnd and try to execute it. If an exception | |
95 # is generated, output a traceback and exit with nonzero status. If | |
96 # no exception was generated, indicate a successful import and exit | |
97 # with zero status. | |
98 xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), | |
99 xenaFileDir, | |
100 genomicDataFile, | |
101 xena.port()) | |
102 try: | |
103 subprocess.call(xenaLoadCmd, shell=True) | |
104 except: | |
105 exc_type, exc_value, exc_traceback = sys.exc_info() | |
106 lines = traceback.format_exception(exc_type, exc_value, exc_traceback) | |
107 allLines = ''.join('!! ' + line for line in lines) | |
108 fp2.write("Unsuccessful: error %s\n" % allLines) | |
109 fp2.close() | |
110 sys.exit(-1) | |
111 else: | |
112 fp2.write( "Import successful\n") | |
113 fp2.close() | |
114 sys.exit(0) | |
115 | |
116 | |
117 if __name__ == "__main__": | |
118 main() |