Mercurial > repos > melissacline > xena_get_dataset
comparison xenaGetDataset/xenaGetDataset.py @ 0:aa2ed7c6c09b
Uploaded
author | melissacline |
---|---|
date | Mon, 15 Sep 2014 19:13:12 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:aa2ed7c6c09b |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import json | |
5 import urllib2 | |
6 import re | |
7 from functools import partial | |
8 | |
9 # import xena query utilities | |
10 import xena_query as xena | |
11 | |
12 # xena url for public datasets, at ucsc | |
13 xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" | |
14 | |
15 def main(): | |
16 parser = argparse.ArgumentParser() | |
17 parser.add_argument("datasetType", type=str) | |
18 parser.add_argument("cohortName", type=str) | |
19 parser.add_argument("datasetName", type=str) | |
20 parser.add_argument("outfile", type=str) | |
21 args = parser.parse_args() | |
22 | |
23 # short-hand wrappers for xena methods | |
24 post = xena.compose(json.loads, partial(xena.post, xena_url)) | |
25 find_datasets_type_pattern = xena.compose(post, | |
26 xena.find_datasets_type_pattern) | |
27 name_to_url = partial(xena.name_to_url, xena_url) | |
28 | |
29 # Find all TCGA mutation calls by matching on the type and dataset name. | |
30 fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName) | |
31 fullDatasetName = "%" + fullDatasetName | |
32 matchingDatasets = find_datasets_type_pattern(args.datasetType, | |
33 fullDatasetName) | |
34 if len(matchingDatasets) > 1: | |
35 errorMsg = "Error: matches to multiple datasets:" | |
36 for dataset in matchingDatasets: | |
37 errorMsg = "%s %s" % (errorMsg, dataset) | |
38 exit(errorMsg) | |
39 else: | |
40 ff = urllib2.urlopen(name_to_url(matchingDatasets[0])) | |
41 with open(args.outfile, "w") as out: | |
42 out.write(ff.read()) | |
43 | |
44 | |
45 if __name__ == "__main__": | |
46 main() | |
47 |