view getXenaData.py @ 50:3167c1a26101

fix
author jingchunzhu
date Wed, 12 Aug 2015 16:38:12 -0700
parents 78d6e6772e30
children bb840cc2603d
line wrap: on
line source

# getXenaData.py
import os, sys, string, json, csv
import xena_query as xena

if len(sys.argv[:])!=4:
  print "python getXenaData.py hub datasetId outputfile\n"
  sys.exit(1)

def main():
  url = sys.argv[1]
  dataset = sys.argv[2]
  output = sys.argv[3]

  fout = open(output,'w')

  contactUrl = url
  if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
    contactUrl = "https://galaxyxena.soe.ucsc.edu:443/xena"

  #testing if the url is reachable
  try:
    r =json.loads(xena.post(contactUrl, "(+ 1 2)")) 
  
    if  r!=3.0:
      print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance."
      print "You entered hub: %s" % (url)
      fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance.\n")
      fout.write("You entered hub: %s\n" % (url))
      fout.close()
      sys.exit(1)

  except: 
    print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance."
    print "You entered hub: %s" % (url)
    fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance.\n")
    fout.write("You entered hub: %s\n" % (url))
    fout.close()
    sys.exit(1)

  samples = xena.dataset_samples (contactUrl, dataset)
  if not samples:
    print "Dataset does not exist"
    print "You entered dataset id: %s" % (dataset)
    fout.write("Dataset does not exists\n")
    fout.write("You entered dataset id: %s\n" % (dataset))
    fout.close()
    sys.exit(1)

  type = xena.dataset_type(contactUrl, dataset)
  if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
    print "The type of data is not supported"
    print "datatype=%s" % (type[0])
    fout.write("The type of data is not supported\n")
    fout.write("datatype=%s\n" % (type[0]))
    fout.close()
    sys.exit(1)

  writer = csv.writer(fout, delimiter='\t')
  writer.writerow(["sample"]+samples)

  probes = xena.dataset_field(contactUrl, dataset)
  start=0
  size =100
  N= len(probes)
  for i in range (start, N,size):
    results = xena.dataset_probe_values (contactUrl, dataset, samples, probes[i:i+size])
    print ".",
    for j in range (0, size):
      if i+j == N:
        break
      writer.writerow([probes[i+j]]+results[j])

  fout.close()
  print "done"
  sys.exit(0)


if __name__ == "__main__":
    main()