view xenaFindDatasets/xenaFindDatasets.py @ 1:a1979987ecf2 default tip

Added a sanitizer object to the input field for the cohort name, so that users can include wildcards in their input
author melissacline
date Wed, 10 Sep 2014 15:19:30 -0700
parents a4253c71f31d
children
line wrap: on
line source

#!/usr/bin/env python

import argparse
import json
from functools import partial
import re
import xena_query as xena

# xena url for public datasets, at ucsc                                        
xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("datasetType", type=str)
    parser.add_argument("cohortName", type=str)
    parser.add_argument("outfile", type=str)
    args = parser.parse_args()

    cohort = args.cohortName
    if not re.search("^%", cohort):
        cohort = "%" + cohort
    if not re.search("%$", cohort):
        cohort = cohort + "%"

    # short-hand wrappers for xena methods                                  
    post = xena.compose(json.loads, partial(xena.post, xena_url))
    find_datasets_type_pattern = xena.compose(post, 
                                              xena.find_datasets_type_pattern)
    name_to_url = partial(xena.name_to_url, xena_url)
    datasets = find_datasets_type_pattern(args.datasetType, cohort)
    fp = open(args.outfile, "w")
    fp.write("Cohort\tDataset\n")
    for thisDataset in datasets:
        tokens = thisDataset.split("/")
        cohort = tokens[-2]
        datasetName = tokens[-1]
        fp.write("%s\t%s\n" % (cohort, datasetName))
    fp.close()

if __name__ == "__main__":
    main()