annotate xenaGetDataset/xenaGetDataset.py @ 0:aa2ed7c6c09b

Uploaded
author melissacline
date Mon, 15 Sep 2014 19:13:12 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
1 #!/usr/bin/env python
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
2
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
3 import argparse
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
4 import json
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
5 import urllib2
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
6 import re
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
7 from functools import partial
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
8
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
9 # import xena query utilities
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
10 import xena_query as xena
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
11
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
12 # xena url for public datasets, at ucsc
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
13 xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
14
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
15 def main():
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
16 parser = argparse.ArgumentParser()
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
17 parser.add_argument("datasetType", type=str)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
18 parser.add_argument("cohortName", type=str)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
19 parser.add_argument("datasetName", type=str)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
20 parser.add_argument("outfile", type=str)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
21 args = parser.parse_args()
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
22
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
23 # short-hand wrappers for xena methods
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
24 post = xena.compose(json.loads, partial(xena.post, xena_url))
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
25 find_datasets_type_pattern = xena.compose(post,
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
26 xena.find_datasets_type_pattern)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
27 name_to_url = partial(xena.name_to_url, xena_url)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
28
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
29 # Find all TCGA mutation calls by matching on the type and dataset name.
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
30 fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
31 fullDatasetName = "%" + fullDatasetName
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
32 matchingDatasets = find_datasets_type_pattern(args.datasetType,
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
33 fullDatasetName)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
34 if len(matchingDatasets) > 1:
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
35 errorMsg = "Error: matches to multiple datasets:"
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
36 for dataset in matchingDatasets:
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
37 errorMsg = "%s %s" % (errorMsg, dataset)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
38 exit(errorMsg)
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
39 else:
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
40 ff = urllib2.urlopen(name_to_url(matchingDatasets[0]))
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
41 with open(args.outfile, "w") as out:
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
42 out.write(ff.read())
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
43
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
44
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
45 if __name__ == "__main__":
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
46 main()
aa2ed7c6c09b Uploaded
melissacline
parents:
diff changeset
47