Mercurial > repos > melissacline > ucsc_xena_platform
annotate xena_import.py @ 40:fd24e220f240
more edit on description
author | jingchunzhu <jingchunzhu@gmail.com> |
---|---|
date | Mon, 27 Jul 2015 00:59:02 -0700 |
parents | 1ef1886dae04 |
children | 8da6920a39ac |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 """ | |
4 xena_import.py: import a dataset into Xena | |
5 | |
6 Given a cmdline-specified genomic data file and a cmdline-specified Xena | |
7 directory, import the genomic data fle into Xena. This requires assembling | |
8 the necessary json file, based on cmdline input. | |
9 """ | |
10 | |
11 import argparse | |
12 import json | |
13 import os | |
14 import re | |
15 import shutil | |
16 import subprocess | |
17 import sys | |
18 import traceback | |
19 import xena_utils as xena | |
20 | |
21 def updateColNormalization(jsonMetadata): | |
22 """Set colNormalization to TRUE if the data is of some expression-related | |
23 subtype and if colNormalization has not been set""" | |
24 if not jsonMetadata.has_key("colNormalization"): | |
25 if jsonMetadata.has_key("dataSubType"): | |
26 if re.search("expression", jsonMetadata['dataSubType'], | |
27 re.IGNORECASE): | |
28 jsonMetadata["colNormalization"] = "true" | |
29 | |
30 def verifyAndImportExistingMetadata(inputJsonPath, outputJsonPath): | |
31 """Take an existing JSON file. Read the contents, and check for any | |
32 content that might be invalid in the local installation. Delete that | |
33 content. Write the rest to the indicated output pathname""" | |
34 problemFields = [":probeMap"] | |
35 fileContents = open(inputJsonPath).read() | |
36 jsonMetadata = json.loads(fileContents) | |
37 for thisProblem in problemFields: | |
38 if jsonMetadata.has_key(thisProblem): | |
39 del jsonMetadata[thisProblem] | |
40 updateColNormalization(jsonMetadata) | |
41 fp = open(outputJsonPath, "w") | |
42 fp.write("%s\n" % json.dumps(jsonMetadata, indent=2)) | |
43 fp.close() | |
44 | |
45 | |
46 def main(): | |
47 parser = argparse.ArgumentParser() | |
48 parser.add_argument("genomicDataPath", type=str) | |
49 parser.add_argument("outfile", type=str) | |
50 parser.add_argument("--json", type=str, default=None) | |
51 parser.add_argument("--cohort", type=str) | |
52 parser.add_argument("--type", type=str) | |
53 parser.add_argument("--dataSubType", type=str, default=None) | |
54 parser.add_argument("--label", type=str, default=None) | |
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
55 parser.add_argument("--colNormalization", type=bool, default=False) |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
56 parser.add_argument("--probeMap", type=str, default=None) |
0 | 57 args = parser.parse_args() |
58 | |
59 fp2 = open(args.outfile, "w") | |
60 fp2.write("Importing data to Xena\n") | |
61 xenaFileDir = xena.fileDir() | |
62 genomicDataFile = args.genomicDataPath.split("/")[-1] | |
63 jsonMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, | |
64 genomicDataFile) | |
65 | |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
66 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
67 probeMapDataFile = args.probeMap.split("/")[-1] |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
68 jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
69 probeMapDataFile) |
0 | 70 |
71 # The metadata either came as the name of a JSON file or a series of | |
72 # command line arguments. | |
73 if args.json is not None: | |
74 # In this case, the metadata came in the form of a JSON file. | |
75 # Verify that the metadata is valid on the current system, which | |
76 # might mean altering it. Import the stuff that will validate. | |
77 verifyAndImportExistingMetadata(args.json, jsonMetadataTargetPathname) | |
78 else: | |
79 # In this case, the metadata came in the form of a series of | |
80 # command line arguments. Assemble them into JSON format, | |
81 # and write a JSON file into the Xena file directory. | |
82 metadata = { 'cohort': args.cohort, 'type': args.type } | |
83 if args.dataSubType is not None: | |
84 metadata['dataSubType'] = args.dataSubType | |
85 if args.label is not None: | |
86 metadata['label'] = args.label | |
2
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
87 if args.colNormalization: |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
88 metadata['colNormalization'] = "true" |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
89 else: |
6eeb3ca663fb
Fixed dataset delete bugs (the full pathname was needed) plus changed xena_import to give a checkbox for turning on column normalization
melissacline
parents:
0
diff
changeset
|
90 updateColNormalization(metadata) |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
91 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
92 metadata[':probeMap'] = probeMapDataFile |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
93 |
0 | 94 jsonMetadata = json.dumps(metadata, indent=2) |
95 fp = open(jsonMetadataTargetPathname, "w") | |
96 fp.write("%s\n" % (jsonMetadata)) | |
97 fp.close() | |
98 | |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
99 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
100 probeMapMetadata={"type":"probeMap"} |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
101 jsonProbeMapMetadata = json.dumps(probeMapMetadata, indent=2) |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
102 jsonProbeMapMetadataTargetPathname = "%s/%s.json" % (xenaFileDir, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
103 probeMapDataFile) |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
104 fp = open(jsonProbeMapMetadataTargetPathname, "w") |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
105 fp.write("%s\n" % ( jsonProbeMapMetadata)) |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
106 fp.close() |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
107 |
0 | 108 # Finally, copy the genomic data into the Xena directory |
109 shutil.copy(args.genomicDataPath, xenaFileDir) | |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
110 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
111 shutil.copy(args.probeMap, xenaFileDir) |
0 | 112 |
113 # Set up the xena load comamnd and try to execute it. If an exception | |
114 # is generated, output a traceback and exit with nonzero status. If | |
115 # no exception was generated, indicate a successful import and exit | |
116 # with zero status. | |
117 xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), | |
118 xenaFileDir, | |
119 genomicDataFile, | |
120 xena.port()) | |
38
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
121 if args.label is not None: |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
122 name = args.label |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
123 else: |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
124 name = genomicDataFile |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
125 loadInToXena (xenaLoadCmd, name, fp2) |
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
126 |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
127 if args.probeMap is not None: |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
128 xenaLoadCmd = "java -jar %s -l --force %s/%s -p %s" % (xena.jarPath(), |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
129 xenaFileDir, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
130 probeMapDataFile, |
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
131 xena.port()) |
38
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
132 loadInToXena (xenaLoadCmd, "id to gene mapping file for %s" % (name), fp2) |
36 | 133 fp2.close() |
35
d8dc482ef970
add functionality to import probeMap
jingchunzhu <jingchunzhu@gmail.com>
parents:
2
diff
changeset
|
134 |
36 | 135 def loadInToXena (xenaLoadCmd, name, fp2): |
136 print xenaLoadCmd | |
0 | 137 try: |
138 subprocess.call(xenaLoadCmd, shell=True) | |
139 except: | |
140 exc_type, exc_value, exc_traceback = sys.exc_info() | |
141 lines = traceback.format_exception(exc_type, exc_value, exc_traceback) | |
142 allLines = ''.join('!! ' + line for line in lines) | |
36 | 143 fp2.write("Unsuccessful: Import %s\n" % (name)) |
0 | 144 fp2.write("Unsuccessful: error %s\n" % allLines) |
145 else: | |
38
1ef1886dae04
data import sucess/unsuccess message more informative
jingchunzhu
parents:
36
diff
changeset
|
146 fp2.write( "Importing %s is successful\n" % (name)) |
0 | 147 |
148 if __name__ == "__main__": | |
149 main() |