Mercurial > repos > melissacline > ucsc_xena_platform
annotate xena_import.py @ 54:3e64e6ed5925
update to version 17 step 1, remove xena.jar
author | jingchunzhu |
---|---|
date | Tue, 22 Sep 2015 10:06:35 -0700 |
parents | 8da6920a39ac |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 """ | |
4 xena_import.py: import a dataset into Xena | |
5 | |
6 Given a cmdline-specified genomic data file and a cmdline-specified Xena | |
7 directory, import the genomic data fle into Xena. This requires assembling | |
8 the necessary json file, based on cmdline input. | |
9 """ | |
10 | |
11 import argparse | |
12 import json | |
13 import os | |
14 import re | |
15 import shutil | |
16 import subprocess | |
17 import sys | |
18 import traceback | |
19 import xena_utils as xena | |
20 | |
21 def updateColNormalization(jsonMetadata): | |
22 """Set colNormalization to TRUE if the data is of some expression-related | |
23 subtype and if colNormalization has not been set""" | |
24 if not jsonMetadata.has_key("colNormalization"): | |
25 if jsonMetadata.has_key("dataSubType"): | |
26 if re.search("expression", jsonMetadata['dataSubType'], | |
27 re.IGNORECASE): | |
28 jsonMetadata["colNormalization"] = "true" | |
29 | |
30 def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath): | |
31 """Take an existing JSON file. Read the contents, and check for any | |
32 content that might be invalid in the local installation. Delete that | |
33 content. Write the rest to the indicated output pathname""" | |
34 problemFields = [":probeMap"] | |
35 fileContents = open(inputJsonPath).read() | |
36 jsonMetadata = json.loads(fileContents) | |
37 for thisProblem in problemFields: | |
38 if jsonMetadata.has_key(thisProblem): | |
39 del jsonMetadata[thisProblem] | |
40 updateColNormalization(jsonMetadata) | |
41 fp = open(outputJsonPath, "w") | |
42 fp.write("%s\n" % json.dumps(jsonMetadata, indent=2)) | |
43 fp.close() | |
44 | |
45 | |
46 def main(): | |
47 parser = argparse.ArgumentParser() | |
48 parser.add_argument("genomicDataPath", type=str) | |
49 parser.add_argument("outfile", type=str) | |
50 parser.add_argument("--json", type=str, default=None) | |
51 parser.add_argument("--cohort", type=str) | |
52 parser.add_argument("--type", type=str) | |
53 parser.add_argument("--dataSubType", type=str, default=None) | |
54 parser.add_argument("--label", type=str, default=None) | |
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
55 parser.add_argument("--colNormalization", type=bool, default=False) |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
56 parser.add_argument("--probeMap", type=str, default=None) |
49 | 57 parser.add_argument("--assembly", type=str, default=None) |
0 | 58 args = parser.parse_args() |
59 | |
60 fp2 = open(args.outfile, "w") | |
61 fp2.write("Importing data to Xena\n") | |
62 xenaFileDir = xena.fileDir() | |
63 genomicDataFile = args.genomicDataPath.split("/")[-1] | |
64 jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, | |
65 genomicDataFile) | |
66 | |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
67 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
68 probeMapDataFile = args.probeMap.split("/")[-1] |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
69 jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
70 probeMapDataFile) |
0 | 71 |
72 # The metadata either came as the name of a JSON file or a series of | |
73 # command line arguments. | |
74 if args.json is not None: | |
75 # In this case, the metadata came in the form of a JSON file. | |
76 # Verify that the metadata is valid on the current system, which | |
77 # might mean altering it. Import the stuff that will validate. | |
78 verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname) | |
79 else: | |
80 # In this case, the metadata came in the form of a series of | |
81 # command line arguments. Assemble them into JSON format, | |
82 # and write a JSON file into the Xena file directory. | |
83 metadata = { 'cohort': args.cohort, 'type': args.type } | |
84 if args.dataSubType is not None: | |
85 metadata['dataSubType'] = args.dataSubType | |
86 if args.label is not None: | |
87 metadata['label'] = args.label | |
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
88 if args.colNormalization: |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
89 metadata['colNormalization'] = "true" |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
90 else: |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
91 updateColNormalization(metadata) |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
92 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
93 metadata[':probeMap'] = probeMapDataFile |
49 | 94 if args.assembly: |
95 metadata['assembly'] = args.assembly | |
0 | 96 jsonMetadata = json.dumps(metadata, indent=2) |
97 fp = open(jsonMetadataTargetPathname, "w") | |
98 fp.write("%s\n" % (jsonMetadata)) | |
99 fp.close() | |
100 | |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
101 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
102 probeMapMetadata={"type":"probeMap"} |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
103 jsonProbeMapMetadata = json.dumps(probeMapMetadata, indent=2) |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
104 jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
105 probeMapDataFile) |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
106 fp = open(jsonProbeMapMetadataTargetPathname, "w") |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
107 fp.write("%s\n" % ( jsonProbeMapMetadata)) |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
108 fp.close() |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
109 |
0 | 110 # Finally, copy the genomic data into the Xena directory |
111 shutil.copy(args.genomicDataPath, xenaFileDir) | |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
112 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
113 shutil.copy(args.probeMap, xenaFileDir) |
0 | 114 |
115 # Set up the xena load comamnd and try to execute it. If an exception | |
116 # is generated, output a traceback and exit with nonzero status. If | |
117 # no exception was generated, indicate a successful import and exit | |
118 # with zero status. | |
119 xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), | |
120 xenaFileDir, | |
121 genomicDataFile, | |
122 xena.port()) | |
38
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
123 if args.label is not None: |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
124 name = args.label |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
125 else: |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
126 name = genomicDataFile |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
127 loadInToXena (xenaLoadCmd, name, fp2) |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
128 |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
129 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
130 xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
131 xenaFileDir, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
132 probeMapDataFile, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
133 xena.port()) |
38
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
134 loadInToXena (xenaLoadCmd, "id to gene mapping file for %s" % (name), fp2) |
36 | 135 fp2.close() |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
136 |
36 | 137 def loadInToXena (xenaLoadCmd, name, fp2): |
138 print xenaLoadCmd | |
0 | 139 try: |
140 subprocess.call(xenaLoadCmd, shell=True) | |
141 except: | |
142 exc_type, exc_value, exc_traceback = sys.exc_info() | |
143 lines = traceback.format_exception(exc_type, exc_value, exc_traceback) | |
144 allLines = ''.join('!! ' + line for line in lines) | |
36 | 145 fp2.write("Unsuccessful: Import %s\n" % (name)) |
0 | 146 fp2.write("Unsuccessful: error %s\n" % allLines) |
147 else: | |
38
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
148 fp2.write( "Importing %s is successful\n" % (name)) |
0 | 149 |
150 if __name__ == "__main__": | |
151 main() |