Mercurial > repos > melissacline > xena_get_dataset
view xenaGetDataset/xenaGetDataset.py @ 0:aa2ed7c6c09b
Uploaded
author | melissacline |
---|---|
date | Mon, 15 Sep 2014 19:13:12 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import argparse import json import urllib2 import re from functools import partial # import xena query utilities import xena_query as xena # xena url for public datasets, at ucsc xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" def main(): parser = argparse.ArgumentParser() parser.add_argument("datasetType", type=str) parser.add_argument("cohortName", type=str) parser.add_argument("datasetName", type=str) parser.add_argument("outfile", type=str) args = parser.parse_args() # short-hand wrappers for xena methods post = xena.compose(json.loads, partial(xena.post, xena_url)) find_datasets_type_pattern = xena.compose(post, xena.find_datasets_type_pattern) name_to_url = partial(xena.name_to_url, xena_url) # Find all TCGA mutation calls by matching on the type and dataset name. fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName) fullDatasetName = "%" + fullDatasetName matchingDatasets = find_datasets_type_pattern(args.datasetType, fullDatasetName) if len(matchingDatasets) > 1: errorMsg = "Error: matches to multiple datasets:" for dataset in matchingDatasets: errorMsg = "%s %s" % (errorMsg, dataset) exit(errorMsg) else: ff = urllib2.urlopen(name_to_url(matchingDatasets[0])) with open(args.outfile, "w") as out: out.write(ff.read()) if __name__ == "__main__": main()