Mercurial > repos > melissacline > xena_find_datasets

diff xenaFindDatasets/xenaFindDatasets.py @ 0:a4253c71f31d
Uploaded
author: melissacline
date: Tue, 09 Sep 2014 21:53:10 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaFindDatasets/xenaFindDatasets.py	Tue Sep 09 21:53:10 2014 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+from functools import partial
+import re
+import xena_query as xena
+
+# xena url for public datasets, at ucsc                                        
+xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("datasetType", type=str)
+    parser.add_argument("cohortName", type=str)
+    parser.add_argument("outfile", type=str)
+    args = parser.parse_args()
+
+    cohort = args.cohortName
+    if not re.search("^%", cohort):
+        cohort = "%" + cohort
+    if not re.search("%$", cohort):
+        cohort = cohort + "%"
+
+    # short-hand wrappers for xena methods                                  
+    post = xena.compose(json.loads, partial(xena.post, xena_url))
+    find_datasets_type_pattern = xena.compose(post, 
+                                              xena.find_datasets_type_pattern)
+    name_to_url = partial(xena.name_to_url, xena_url)
+    datasets = find_datasets_type_pattern(args.datasetType, cohort)
+    fp = open(args.outfile, "w")
+    fp.write("Cohort\tDataset\n")
+    for thisDataset in datasets:
+        tokens = thisDataset.split("/")
+        cohort = tokens[-2]
+        datasetName = tokens[-1]
+        fp.write("%s\t%s\n" % (cohort, datasetName))
+    fp.close()
+
+if __name__ == "__main__":
+    main()
author	melissacline
date	Tue, 09 Sep 2014 21:53:10 -0400
parents
children