diff xenaGetDataset/xenaGetDataset.py @ 0:aa2ed7c6c09b

Uploaded
author melissacline
date Mon, 15 Sep 2014 19:13:12 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset/xenaGetDataset.py	Mon Sep 15 19:13:12 2014 -0400
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import urllib2
+import re
+from functools import partial
+
+# import xena query utilities
+import xena_query as xena
+
+# xena url for public datasets, at ucsc
+xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("datasetType", type=str)
+    parser.add_argument("cohortName", type=str)
+    parser.add_argument("datasetName", type=str)
+    parser.add_argument("outfile", type=str)
+    args = parser.parse_args()
+
+    # short-hand wrappers for xena methods
+    post = xena.compose(json.loads, partial(xena.post, xena_url))
+    find_datasets_type_pattern = xena.compose(post, 
+					      xena.find_datasets_type_pattern)
+    name_to_url = partial(xena.name_to_url, xena_url)
+
+# Find all TCGA mutation calls by matching on the type and dataset name.
+    fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName)
+    fullDatasetName = "%" + fullDatasetName
+    matchingDatasets = find_datasets_type_pattern(args.datasetType, 
+                                                  fullDatasetName)
+    if len(matchingDatasets) > 1:
+        errorMsg = "Error: matches to multiple datasets:"
+	for dataset in matchingDatasets:
+            errorMsg = "%s %s" % (errorMsg, dataset)
+	exit(errorMsg)
+    else:
+        ff = urllib2.urlopen(name_to_url(matchingDatasets[0]))
+	with open(args.outfile, "w") as out:
+            out.write(ff.read())
+
+
+if __name__ == "__main__":
+    main()
+