| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 import argparse | 
|  | 4 import json | 
|  | 5 import urllib2 | 
|  | 6 import re | 
|  | 7 from functools import partial | 
|  | 8 | 
|  | 9 # import xena query utilities | 
|  | 10 import xena_query as xena | 
|  | 11 | 
|  | 12 # xena url for public datasets, at ucsc | 
|  | 13 xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" | 
|  | 14 | 
|  | 15 def main(): | 
|  | 16     parser = argparse.ArgumentParser() | 
|  | 17     parser.add_argument("datasetType", type=str) | 
|  | 18     parser.add_argument("cohortName", type=str) | 
|  | 19     parser.add_argument("datasetName", type=str) | 
|  | 20     parser.add_argument("outfile", type=str) | 
|  | 21     args = parser.parse_args() | 
|  | 22 | 
|  | 23     # short-hand wrappers for xena methods | 
|  | 24     post = xena.compose(json.loads, partial(xena.post, xena_url)) | 
|  | 25     find_datasets_type_pattern = xena.compose(post, | 
|  | 26 					      xena.find_datasets_type_pattern) | 
|  | 27     name_to_url = partial(xena.name_to_url, xena_url) | 
|  | 28 | 
|  | 29 # Find all TCGA mutation calls by matching on the type and dataset name. | 
|  | 30     fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName) | 
|  | 31     fullDatasetName = "%" + fullDatasetName | 
|  | 32     matchingDatasets = find_datasets_type_pattern(args.datasetType, | 
|  | 33                                                   fullDatasetName) | 
|  | 34     if len(matchingDatasets) > 1: | 
|  | 35         errorMsg = "Error: matches to multiple datasets:" | 
|  | 36 	for dataset in matchingDatasets: | 
|  | 37             errorMsg = "%s %s" % (errorMsg, dataset) | 
|  | 38 	exit(errorMsg) | 
|  | 39     else: | 
|  | 40         ff = urllib2.urlopen(name_to_url(matchingDatasets[0])) | 
|  | 41 	with open(args.outfile, "w") as out: | 
|  | 42             out.write(ff.read()) | 
|  | 43 | 
|  | 44 | 
|  | 45 if __name__ == "__main__": | 
|  | 46     main() | 
|  | 47 |