view getXenaData.py @ 41:02b0824c7d60

Download data from any hub in the federated xena platform
author jingchunzhu <jingchunzhu@gmail.com>
date Mon, 27 Jul 2015 10:05:22 -0700
parents
children bc9784300015
line wrap: on
line source

# getXenaData.py
import os, sys, string, json, csv
import xena_query as xena

if len(sys.argv[:])!=4:
  print "python getXenaData.py hub datasetId outputfile\n"
  sys.exit(1)

url = sys.argv[1]
dataset = sys.argv[2]
output = sys.argv[3]

if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
  url = "https://galaxyxena.soe.ucsc.edu:443/xena"

samples = xena.dataset_samples (url, dataset)
if not samples:
  print "dataset does not exists"
  sys.exit(1)

type = xena.dataset_type(url, dataset)
if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
  print "the current data type is not supported"
  sys.exit(1)

fout = open(output,'w')
writer = csv.writer(fout, delimiter='\t')
writer.writerow(["sample"]+samples)

probes = xena.dataset_field(url, dataset)
start=0
size =100
N= len(probes)
for i in range (start, N,size):
  results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size])
  print "..."
  for j in range (0, size):
    if i+j == N:
      break
    writer.writerow([probes[i+j]]+results[j])

fout.close()
print "done"
sys.exit(0)