changeset 0:aa2ed7c6c09b

Uploaded
author melissacline
date Mon, 15 Sep 2014 19:13:12 -0400
parents
children bf4a5e41adbc
files xenaGetDataset/._xenaGetDataset.py xenaGetDataset/tool_dependencies.xml xenaGetDataset/xenaGetDataset.py xenaGetDataset/xenaGetDataset.xml
diffstat 4 files changed, 88 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file xenaGetDataset/._xenaGetDataset.py has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset/tool_dependencies.xml	Mon Sep 15 19:13:12 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="installXena" version="1.0">
+    <repository name="start_xena" owner="melissacline" toolshed="http://testtoolshed.g2.bx.psu.edu" changeset_revision="75c7d80df9c1"/>
+  </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset/xenaGetDataset.py	Mon Sep 15 19:13:12 2014 -0400
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import urllib2
+import re
+from functools import partial
+
+# import xena query utilities
+import xena_query as xena
+
+# xena url for public datasets, at ucsc
+xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("datasetType", type=str)
+    parser.add_argument("cohortName", type=str)
+    parser.add_argument("datasetName", type=str)
+    parser.add_argument("outfile", type=str)
+    args = parser.parse_args()
+
+    # short-hand wrappers for xena methods
+    post = xena.compose(json.loads, partial(xena.post, xena_url))
+    find_datasets_type_pattern = xena.compose(post, 
+					      xena.find_datasets_type_pattern)
+    name_to_url = partial(xena.name_to_url, xena_url)
+
+# Find all TCGA mutation calls by matching on the type and dataset name.
+    fullDatasetName = "%s/%s" % (args.cohortName, args.datasetName)
+    fullDatasetName = "%" + fullDatasetName
+    matchingDatasets = find_datasets_type_pattern(args.datasetType, 
+                                                  fullDatasetName)
+    if len(matchingDatasets) > 1:
+        errorMsg = "Error: matches to multiple datasets:"
+	for dataset in matchingDatasets:
+            errorMsg = "%s %s" % (errorMsg, dataset)
+	exit(errorMsg)
+    else:
+        ff = urllib2.urlopen(name_to_url(matchingDatasets[0]))
+	with open(args.outfile, "w") as out:
+            out.write(ff.read())
+
+
+if __name__ == "__main__":
+    main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaGetDataset/xenaGetDataset.xml	Mon Sep 15 19:13:12 2014 -0400
@@ -0,0 +1,35 @@
+<tool id="xenaGetDatasets" description="Fetch the indicated dataset from the Xena database" name="Get Datasets from Xena" version="0.0.1">
+  <description>
+    Retrieve the indicated dataset
+  </description>
+  <command interpreter="python">
+    xenaGetDataset.py ${datasetType} ${cohortName} ${datasetName} $outFile
+  </command>
+  <inputs>
+    <param name="datasetType" type="select" label="Dataset Type">
+      <option value="genomicMatrix">Genomic Data</option>
+      <option value="clinicalMatrix">Clinical Data</option>
+      <option value="mutationVector">Mutation Data</option>
+    </param>
+    <param type="text" name="cohortName" label="Cohort (wildcard is %)" optional="false">
+      <sanitizer>
+        <valid>
+          <add value="%"/>
+        </valid>
+      </sanitizer>
+    </param>
+    <param type="text" name="datasetName" label="Dataset (wildcard is %)" optional="false">
+      <sanitizer>
+        <valid>
+          <add value="%"/>
+        </valid>
+      </sanitizer>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="outFile" />
+  </outputs>
+  <help>
+    Later...
+  </help>
+</tool>