0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import argparse
|
|
4 import json
|
|
5 import urllib2
|
|
6 import re
|
|
7 from functools import partial
|
|
8
|
|
9 # import xena query utilities
|
|
10 import xena_query as xena
|
|
11
|
|
12 # xena url for public datasets, at ucsc
|
|
13 xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
|
|
14
|
|
15 def main():
|
|
16 parser = argparse.ArgumentParser()
|
|
17 parser.add_argument("datasetType", type=str)
|
|
18 parser.add_argument("cohortName", type=str)
|
|
19 parser.add_argument("datasetName", type=str)
|
|
20 parser.add_argument("outfile", type=str)
|
|
21 args = parser.parse_args()
|
|
22
|
|
23 # short-hand wrappers for xena methods
|
|
24 post = xena.compose(json.loads, partial(xena.post, xena_url))
|
|
25 find_datasets_type_pattern = xena.compose(post,
|
|
26 xena.find_datasets_type_pattern)
|
|
27 name_to_url = partial(xena.name_to_url, xena_url)
|
|
28
|
|
29 # Find all TCGA mutation calls by matching on the type and dataset name.
|
|
30 fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName)
|
|
31 fullDatasetName = "%" + fullDatasetName
|
|
32 matchingDatasets = find_datasets_type_pattern(args.datasetType,
|
|
33 fullDatasetName)
|
|
34 if len(matchingDatasets) > 1:
|
|
35 errorMsg = "Error: matches to multiple datasets:"
|
|
36 for dataset in matchingDatasets:
|
|
37 errorMsg = "%s %s" % (errorMsg, dataset)
|
|
38 exit(errorMsg)
|
|
39 else:
|
|
40 ff = urllib2.urlopen(name_to_url(matchingDatasets[0]))
|
|
41 with open(args.outfile, "w") as out:
|
|
42 out.write(ff.read())
|
|
43
|
|
44
|
|
45 if __name__ == "__main__":
|
|
46 main()
|
|
47
|