Mercurial > repos > melissacline > xena_get_dataset
diff xenaGetDataset/xenaGetDataset.py @ 0:aa2ed7c6c09b
Uploaded
author | melissacline |
---|---|
date | Mon, 15 Sep 2014 19:13:12 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaGetDataset/xenaGetDataset.py Mon Sep 15 19:13:12 2014 -0400 @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import argparse +import json +import urllib2 +import re +from functools import partial + +# import xena query utilities +import xena_query as xena + +# xena url for public datasets, at ucsc +xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("datasetType", type=str) + parser.add_argument("cohortName", type=str) + parser.add_argument("datasetName", type=str) + parser.add_argument("outfile", type=str) + args = parser.parse_args() + + # short-hand wrappers for xena methods + post = xena.compose(json.loads, partial(xena.post, xena_url)) + find_datasets_type_pattern = xena.compose(post, + xena.find_datasets_type_pattern) + name_to_url = partial(xena.name_to_url, xena_url) + +# Find all TCGA mutation calls by matching on the type and dataset name. + fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName) + fullDatasetName = "%" + fullDatasetName + matchingDatasets = find_datasets_type_pattern(args.datasetType, + fullDatasetName) + if len(matchingDatasets) > 1: + errorMsg = "Error: matches to multiple datasets:" + for dataset in matchingDatasets: + errorMsg = "%s %s" % (errorMsg, dataset) + exit(errorMsg) + else: + ff = urllib2.urlopen(name_to_url(matchingDatasets[0])) + with open(args.outfile, "w") as out: + out.write(ff.read()) + + +if __name__ == "__main__": + main() +