annotate xena_import.py @ 10:cb0bb2406736

Clarified the direction of data transfer a bit
author melissacline
date Wed, 10 Sep 2014 15:23:33 -0700
parents a2a7096897a8
children 7f03b062f330
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
1 #!/usr/bin/env python
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
2
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
3 """
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
4 xena_import.py: import a dataset into Xena
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
5
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
6 Given a cmdline-specified genomic data file and a cmdline-specified Xena
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
7 directory, import the genomic data fle into Xena. This requires assembling
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
8 the necessary json file, based on cmdline input.
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
9 """
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
10
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
11 import argparse
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
12 import json
9
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
13 import os
2
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
14 import shutil
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
15
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
16 def main():
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
17 parser = argparse.ArgumentParser()
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
18 parser.add_argument("genomicDataPathname", type=str)
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
19 parser.add_argument("cohort", type=str)
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
20 parser.add_argument("type", type=str)
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
21 args = parser.parse_args()
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
22
3
cae2b765ca5d Changing how the Xena base dir is communicated
melissacline
parents: 2
diff changeset
23 xenaBaseDir = os.getenv("XENA_BASE_DIR", "~")
9
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
24 xenaFileDir = xenaBaseDir + "/files"
3
cae2b765ca5d Changing how the Xena base dir is communicated
melissacline
parents: 2
diff changeset
25
2
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
26 # Assemble the metadata in JSON format
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
27 metadata = { 'cohort': args.cohort, 'type': args.type }
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
28 jsonMetadata = json.dumps(metadata, indent=2)
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
29
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
30 # Write the metadata to a file in the Xena directory. Use the filename
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
31 # of the genomic data file, with an added .json extension.
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
32 genomicDataFilename = args.genomicDataPathname.split("/")[-1]
9
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
33 jsonMetadataPathname = "%s/%s.json" % (xenaFileDir,
2
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
34 genomicDataFilename)
9
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
35 #fp = open("/inside/home/cline/tmp/xena_import.out", "w")
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
36 #fp.write("xena file dir %s\n" % (xenaFileDir))
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
37 #fp.write("copying metadata to %s and data to %s" % (jsonMetadataPathname,
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
38 # xenaFileDir))
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
39 #fp.close()
6
8d87f0ecc08d Removed some debugging messages, which I hope we are now done with
melissacline
parents: 3
diff changeset
40
2
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
41 fp = open(jsonMetadataPathname, "w")
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
42 fp.write("%s\n" % (jsonMetadata))
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
43 fp.close()
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
44
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
45 # Finally, copy the genomic data into the Xena directory
9
a2a7096897a8 FIXED the xena import process
melissacline
parents: 6
diff changeset
46 shutil.copy(args.genomicDataPathname, xenaFileDir)
2
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
47
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
48 if __name__ == "__main__":
b3cd322f7749 Uploaded
melissacline
parents:
diff changeset
49 main()