comparison xenaGetDataset/xenaGetDataset.py @ 0:aa2ed7c6c09b

Uploaded
author melissacline
date Mon, 15 Sep 2014 19:13:12 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:aa2ed7c6c09b
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5 import urllib2
6 import re
7 from functools import partial
8
9 # import xena query utilities
10 import xena_query as xena
11
12 # xena url for public datasets, at ucsc
13 xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
14
15 def main():
16 parser = argparse.ArgumentParser()
17 parser.add_argument("datasetType", type=str)
18 parser.add_argument("cohortName", type=str)
19 parser.add_argument("datasetName", type=str)
20 parser.add_argument("outfile", type=str)
21 args = parser.parse_args()
22
23 # short-hand wrappers for xena methods
24 post = xena.compose(json.loads, partial(xena.post, xena_url))
25 find_datasets_type_pattern = xena.compose(post,
26 xena.find_datasets_type_pattern)
27 name_to_url = partial(xena.name_to_url, xena_url)
28
29 # Find all TCGA mutation calls by matching on the type and dataset name.
30 fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName)
31 fullDatasetName = "%" + fullDatasetName
32 matchingDatasets = find_datasets_type_pattern(args.datasetType,
33 fullDatasetName)
34 if len(matchingDatasets) > 1:
35 errorMsg = "Error: matches to multiple datasets:"
36 for dataset in matchingDatasets:
37 errorMsg = "%s %s" % (errorMsg, dataset)
38 exit(errorMsg)
39 else:
40 ff = urllib2.urlopen(name_to_url(matchingDatasets[0]))
41 with open(args.outfile, "w") as out:
42 out.write(ff.read())
43
44
45 if __name__ == "__main__":
46 main()
47