view getXenaData.py @ 43:78d6e6772e30

more reliable
author jingchunzhu <jingchunzhu@gmail.com>
date Mon, 27 Jul 2015 12:21:55 -0700
parents bc9784300015
children 3167c1a26101
line wrap: on
line source

# getXenaData.py
import os, sys, string, json, csv
import xena_query as xena

if len(sys.argv[:])!=4:
  print "python getXenaData.py hub datasetId outputfile\n"
  sys.exit(1)

url = sys.argv[1]
dataset = sys.argv[2]
output = sys.argv[3]

fout = open(output,'w')

if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
  url = "https://galaxyxena.soe.ucsc.edu:443/xena"

#testing if the url is reachable
try:
  r =json.loads(xena.post(url, "(+ 1 2)")) 
  if  r!=3.0:
    print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you."
    print "You entered hub: %s" % (url)
    fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n")
    fout.write("You entered hub: %s\n" % (url))
    fout.close()
    sys.exit(1)
except: 
    print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you."
    print "You entered hub: %s" % (url)
    fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n")
    fout.write("You entered hub: %s\n" % (url))
    fout.close()
    sys.exit(1)

samples = xena.dataset_samples (url, dataset)
if not samples:
  print "Dataset does not exist"
  print "You entered dataset id: %s" % (dataset)
  fout.write("Dataset does not exists\n")
  fout.write("You entered dataset id: %s\n" % (dataset))
  fout.close()
  sys.exit(1)

type = xena.dataset_type(url, dataset)
if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
  print "The type of data is not supported"
  print "datatype=%s" % (type[0])
  fout.write("The type of data is not supported\n")
  fout.write("datatype=%s\n" % (type[0]))
  fout.close()
  sys.exit(1)

writer = csv.writer(fout, delimiter='\t')
writer.writerow(["sample"]+samples)

probes = xena.dataset_field(url, dataset)
start=0
size =100
N= len(probes)
for i in range (start, N,size):
  results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size])
  print ".",
  for j in range (0, size):
    if i+j == N:
      break
    writer.writerow([probes[i+j]]+results[j])

fout.close()
print "done"
sys.exit(0)