annotate xenaFindDatasets/xenaFindDatasets.py @ 1:a1979987ecf2 default tip

Added a sanitizer object to the input field for the cohort name, so that users can include wildcards in their input
author melissacline
date Wed, 10 Sep 2014 15:19:30 -0700
parents a4253c71f31d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
1 #!/usr/bin/env python
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
2
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
3 import argparse
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
4 import json
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
5 from functools import partial
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
6 import re
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
7 import xena_query as xena
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
8
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
9 # xena url for public datasets, at ucsc
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
10 xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
11
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
12
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
13 def main():
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
14 parser = argparse.ArgumentParser()
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
15 parser.add_argument("datasetType", type=str)
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
16 parser.add_argument("cohortName", type=str)
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
17 parser.add_argument("outfile", type=str)
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
18 args = parser.parse_args()
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
19
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
20 cohort = args.cohortName
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
21 if not re.search("^%", cohort):
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
22 cohort = "%" + cohort
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
23 if not re.search("%$", cohort):
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
24 cohort = cohort + "%"
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
25
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
26 # short-hand wrappers for xena methods
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
27 post = xena.compose(json.loads, partial(xena.post, xena_url))
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
28 find_datasets_type_pattern = xena.compose(post,
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
29 xena.find_datasets_type_pattern)
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
30 name_to_url = partial(xena.name_to_url, xena_url)
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
31 datasets = find_datasets_type_pattern(args.datasetType, cohort)
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
32 fp = open(args.outfile, "w")
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
33 fp.write("Cohort\tDataset\n")
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
34 for thisDataset in datasets:
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
35 tokens = thisDataset.split("/")
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
36 cohort = tokens[-2]
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
37 datasetName = tokens[-1]
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
38 fp.write("%s\t%s\n" % (cohort, datasetName))
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
39 fp.close()
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
40
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
41 if __name__ == "__main__":
a4253c71f31d Uploaded
melissacline
parents:
diff changeset
42 main()