annotate xenaGetDataset.py @ 18:154a9b440b63

Updated the xena jar to version 14, updated the URL of the Xena pages
author melissacline
date Tue, 02 Jun 2015 14:55:55 -0700
parents 8bb037f88ed2
children 02b0824c7d60
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
1 #!/usr/bin/env python
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
2
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
3 import argparse
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
4 import re
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
5 import urllib2
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
6
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
7
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
8 def main():
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
9 parser = argparse.ArgumentParser()
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
10 parser.add_argument("dataHub", type=str)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
11 parser.add_argument("datasetId", type=str)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
12 parser.add_argument("metadatafile", type=str)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
13 parser.add_argument("datafile", type=str)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
14 args = parser.parse_args()
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
15
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
16 datasetUrlHost = re.sub("/proj/", "/download/", args.dataHub)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
17 datasetIdTokens = re.split("/", args.datasetId)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
18 datasetUrl = datasetUrlHost + "/" + "/".join(datasetIdTokens[1:])
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
19 print datasetUrl
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
20 metadataUrl = datasetUrl + ".json"
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
21 mm = urllib2.urlopen(metadataUrl)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
22 with open(args.metadatafile, "w") as metadata:
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
23 metadata.write(mm.read())
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
24 mm.close()
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
25 dd = urllib2.urlopen(datasetUrl)
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
26 with open(args.datafile, "w") as data:
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
27 data.write(dd.read())
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
28 dd.close()
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
29
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
30
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
31 if __name__ == "__main__":
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
32 main()
8bb037f88ed2 Uploaded
melissacline
parents:
diff changeset
33