Mercurial > repos > melissacline > ucsc_xena_platform
view getXenaData.py @ 55:421b18a0b659 default tip
update v17 step 2, add xena.jar
author | jingchunzhu |
---|---|
date | Tue, 22 Sep 2015 10:07:51 -0700 |
parents | bb840cc2603d |
children |
line wrap: on
line source
#!/usr/bin/env python import os, sys, string, json, csv import xena_query as xena if len(sys.argv[:])!=5: print "python getXenaData.py hub datasetId outputfile galaxy_url\n" sys.exit(2) def main(): url = sys.argv[1] dataset = sys.argv[2] output = sys.argv[3] GALAXY_URL = sys.argv[4] fout = open(output,'w') contactUrl = url if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: contactUrl = "https://galaxyxena.soe.ucsc.edu:443/xena" #testing if the url is reachable try: r =json.loads(xena.post(contactUrl, "(+ 1 2)")) if r!=3.0: fout.write("There is an error\n\n") fout.write("You entered hub: %s\n" % (url)) fout.write("Possible causes for error:\n") fout.write("1. The hub is not running.\n") fout.write("2. The hub is not accessible to this galaxy instance at: %s.\n" %(GALAXY_URL)) fout.write("3. The hub url has a typo.\n") fout.close() sys.exit(1) except: fout.write("There is an error\n\n") fout.write("You entered hub: %s\n" % (url)) fout.write("Possible causes for error:\n") fout.write("1. The hub is not running.\n") fout.write("2. The hub is not accessible to this galaxy instance at: %s.\n" %(GALAXY_URL)) fout.write("3. The hub url has a typo.\n") fout.close() sys.exit(1) samples = xena.dataset_samples (contactUrl, dataset) if not samples: fout.write("There is an error\n\n") fout.write("You entered dataset id: %s\n" % (dataset)) fout.write("Possible causes for error:\n") fout.write("1. Dataset does not exist.\n") fout.close() sys.exit(1) type = xena.dataset_type(contactUrl, dataset) if type[0] not in ["genomicMatrix", "clinicalMatrix"]: fout.write("There is an error\n\n") fout.write("You entered dataset id: %s\n" % (dataset)) fout.write("It's datatype=%s\n" % (type[0])) fout.write("This type of data is not supported yet.\n") fout.close() sys.exit(1) writer = csv.writer(fout, delimiter='\t') writer.writerow(["sample"]+samples) probes = xena.dataset_field(contactUrl, dataset) start=0 size =100 N= len(probes) for i in range (start, N,size): results = xena.dataset_probe_values (contactUrl, dataset, samples, probes[i:i+size]) print ".", for j in range (0, size): if i+j == N: break writer.writerow([probes[i+j]]+results[j]) fout.close() sys.exit(0) if __name__ == "__main__": main()