changeset 0:a4253c71f31d

Uploaded
author melissacline
date Tue, 09 Sep 2014 21:53:10 -0400
parents
children a1979987ecf2
files xenaFindDatasets/tool_dependencies.xml xenaFindDatasets/xenaFindDatasets.py xenaFindDatasets/xenaFindDatasets.xml
diffstat 3 files changed, 73 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaFindDatasets/tool_dependencies.xml	Tue Sep 09 21:53:10 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="installXena" version="1.0">
+    <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="start_xena" owner="melissacline" changeset_revision="82755b0ee5a5"/>
+  </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaFindDatasets/xenaFindDatasets.py	Tue Sep 09 21:53:10 2014 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+from functools import partial
+import re
+import xena_query as xena
+
+# xena url for public datasets, at ucsc                                        
+xena_url = "https://genome-cancer.ucsc.edu/proj/public/xena"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("datasetType", type=str)
+    parser.add_argument("cohortName", type=str)
+    parser.add_argument("outfile", type=str)
+    args = parser.parse_args()
+
+    cohort = args.cohortName
+    if not re.search("^%", cohort):
+        cohort = "%" + cohort
+    if not re.search("%$", cohort):
+        cohort = cohort + "%"
+
+    # short-hand wrappers for xena methods                                  
+    post = xena.compose(json.loads, partial(xena.post, xena_url))
+    find_datasets_type_pattern = xena.compose(post, 
+                                              xena.find_datasets_type_pattern)
+    name_to_url = partial(xena.name_to_url, xena_url)
+    datasets = find_datasets_type_pattern(args.datasetType, cohort)
+    fp = open(args.outfile, "w")
+    fp.write("Cohort\tDataset\n")
+    for thisDataset in datasets:
+        tokens = thisDataset.split("/")
+        cohort = tokens[-2]
+        datasetName = tokens[-1]
+        fp.write("%s\t%s\n" % (cohort, datasetName))
+    fp.close()
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xenaFindDatasets/xenaFindDatasets.xml	Tue Sep 09 21:53:10 2014 -0400
@@ -0,0 +1,25 @@
+<tool id="xenaFindDatasets" description="Find datasets in the Xena database" name="Find Datasets in Xena" version="0.0.1">
+  <description>
+    Retrieve the cohorts and datasets that match the specified pattern.
+  </description>
+  <requirements>
+    <requirement type="package" version="1.0">installXena</requirement>
+  </requirements>
+  <command interpreter="python">
+    xenaFindDatasets.py ${datasetType} ${cohortName} $outFile
+  </command>
+  <inputs>
+    <param name="datasetType" type="select" label="Dataset Type">
+      <option value="genomicMatrix">Genomic Data</option>
+      <option value="clinicalMatrix">Clinical Data</option>
+      <option value="mutationVector">Mutation Data</option>
+    </param>
+    <param type="text" name="cohortName" label="Cohort (wildcard is %)" optional="true" />
+  </inputs>
+  <outputs>
+    <data format="tabular" name="outFile" />
+  </outputs>
+  <help>
+    Later...
+  </help>
+</tool>