view getXenaData.py @ 55:421b18a0b659 default tip

update v17 step 2, add xena.jar
author jingchunzhu
date Tue, 22 Sep 2015 10:07:51 -0700
parents bb840cc2603d
children
line wrap: on
line source

#!/usr/bin/env python

import os, sys, string, json, csv
import xena_query as xena

if len(sys.argv[:])!=5:
  print "python getXenaData.py hub datasetId outputfile galaxy_url\n"
  sys.exit(2)

def main():
  url = sys.argv[1]
  dataset = sys.argv[2]
  output = sys.argv[3]
  GALAXY_URL = sys.argv[4]

  fout = open(output,'w')

  contactUrl = url
  if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
    contactUrl = "https://galaxyxena.soe.ucsc.edu:443/xena"

  #testing if the url is reachable
  try:
    r =json.loads(xena.post(contactUrl, "(+ 1 2)")) 
  
    if  r!=3.0:
      fout.write("There is an error\n\n")
      fout.write("You entered hub: %s\n" % (url))
      fout.write("Possible causes for error:\n")
      fout.write("1. The hub is not running.\n")
      fout.write("2. The hub is not accessible to this galaxy instance at: %s.\n" %(GALAXY_URL))
      fout.write("3. The hub url has a typo.\n")
      fout.close()
      sys.exit(1)

  except: 
    fout.write("There is an error\n\n")
    fout.write("You entered hub: %s\n" % (url))
    fout.write("Possible causes for error:\n")
    fout.write("1. The hub is not running.\n")
    fout.write("2. The hub is not accessible to this galaxy instance at: %s.\n" %(GALAXY_URL))
    fout.write("3. The hub url has a typo.\n")
    fout.close()
    sys.exit(1)

  samples = xena.dataset_samples (contactUrl, dataset)
  if not samples:
    fout.write("There is an error\n\n")
    fout.write("You entered dataset id: %s\n" % (dataset))
    fout.write("Possible causes for error:\n")
    fout.write("1. Dataset does not exist.\n")
    fout.close()
    sys.exit(1)

  type = xena.dataset_type(contactUrl, dataset)
  if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
    fout.write("There is an error\n\n")
    fout.write("You entered dataset id: %s\n" % (dataset))
    fout.write("It's datatype=%s\n" % (type[0]))
    fout.write("This type of data is not supported yet.\n")
    fout.close()
    sys.exit(1)

  writer = csv.writer(fout, delimiter='\t')
  writer.writerow(["sample"]+samples)

  probes = xena.dataset_field(contactUrl, dataset)
  start=0
  size =100
  N= len(probes)
  for i in range (start, N,size):
    results = xena.dataset_probe_values (contactUrl, dataset, samples, probes[i:i+size])
    print ".",
    for j in range (0, size):
      if i+j == N:
        break
      writer.writerow([probes[i+j]]+results[j])

  fout.close()
  sys.exit(0)

if __name__ == "__main__":
    main()