Mercurial > repos > melissacline > xena_find_datasets
view xenaFindDatasets/xenaFindDatasets.py @ 1:a1979987ecf2 default tip
Added a sanitizer object to the input field for the cohort name, so that users can include wildcards in their input
author | melissacline |
---|---|
date | Wed, 10 Sep 2014 15:19:30 -0700 |
parents | a4253c71f31d |
children |
line wrap: on
line source
#!/usr/bin/env python import argparse import json from functools import partial import re import xena_query as xena # xena url for public datasets, at ucsc xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena" def main(): parser = argparse.ArgumentParser() parser.add_argument("datasetType", type=str) parser.add_argument("cohortName", type=str) parser.add_argument("outfile", type=str) args = parser.parse_args() cohort = args.cohortName if not re.search("^%", cohort): cohort = "%" + cohort if not re.search("%$", cohort): cohort = cohort + "%" # short-hand wrappers for xena methods post = xena.compose(json.loads, partial(xena.post, xena_url)) find_datasets_type_pattern = xena.compose(post, xena.find_datasets_type_pattern) name_to_url = partial(xena.name_to_url, xena_url) datasets = find_datasets_type_pattern(args.datasetType, cohort) fp = open(args.outfile, "w") fp.write("Cohort\tDataset\n") for thisDataset in datasets: tokens = thisDataset.split("/") cohort = tokens[-2] datasetName = tokens[-1] fp.write("%s\t%s\n" % (cohort, datasetName)) fp.close() if __name__ == "__main__": main()