diff getXenaData.py @ 41:02b0824c7d60

Download data from any hub in the federated xena platform
author jingchunzhu <jingchunzhu@gmail.com>
date Mon, 27 Jul 2015 10:05:22 -0700
parents
children bc9784300015
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/getXenaData.py	Mon Jul 27 10:05:22 2015 -0700
@@ -0,0 +1,44 @@
+# getXenaData.py
+import os, sys, string, json, csv
+import xena_query as xena
+
+if len(sys.argv[:])!=4:
+  print "python getXenaData.py hub datasetId outputfile\n"
+  sys.exit(1)
+
+url = sys.argv[1]
+dataset = sys.argv[2]
+output = sys.argv[3]
+
+if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
+  url = "https://galaxyxena.soe.ucsc.edu:443/xena"
+
+samples = xena.dataset_samples (url, dataset)
+if not samples:
+  print "dataset does not exists"
+  sys.exit(1)
+
+type = xena.dataset_type(url, dataset)
+if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
+  print "the current data type is not supported"
+  sys.exit(1)
+
+fout = open(output,'w')
+writer = csv.writer(fout, delimiter='\t')
+writer.writerow(["sample"]+samples)
+
+probes = xena.dataset_field(url, dataset)
+start=0
+size =100
+N= len(probes)
+for i in range (start, N,size):
+  results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size])
+  print "..."
+  for j in range (0, size):
+    if i+j == N:
+      break
+    writer.writerow([probes[i+j]]+results[j])
+
+fout.close()
+print "done"
+sys.exit(0)