Mercurial > repos > melissacline > xena_find_datasets
diff xenaFindDatasets/xenaFindDatasets.py @ 0:a4253c71f31d
Uploaded
author | melissacline |
---|---|
date | Tue, 09 Sep 2014 21:53:10 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaFindDatasets/xenaFindDatasets.py Tue Sep 09 21:53:10 2014 -0400 @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import argparse +import json +from functools import partial +import re +import xena_query as xena + +# xena url for public datasets, at ucsc +xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("datasetType", type=str) + parser.add_argument("cohortName", type=str) + parser.add_argument("outfile", type=str) + args = parser.parse_args() + + cohort = args.cohortName + if not re.search("^%", cohort): + cohort = "%" + cohort + if not re.search("%$", cohort): + cohort = cohort + "%" + + # short-hand wrappers for xena methods + post = xena.compose(json.loads, partial(xena.post, xena_url)) + find_datasets_type_pattern = xena.compose(post, + xena.find_datasets_type_pattern) + name_to_url = partial(xena.name_to_url, xena_url) + datasets = find_datasets_type_pattern(args.datasetType, cohort) + fp = open(args.outfile, "w") + fp.write("Cohort\tDataset\n") + for thisDataset in datasets: + tokens = thisDataset.split("/") + cohort = tokens[-2] + datasetName = tokens[-1] + fp.write("%s\t%s\n" % (cohort, datasetName)) + fp.close() + +if __name__ == "__main__": + main()