Mercurial > repos > melissacline > ucsc_xena_platform
changeset 50:3167c1a26101
fix
author | jingchunzhu |
---|---|
date | Wed, 12 Aug 2015 16:38:12 -0700 |
parents | 8da6920a39ac |
children | 47926759295a |
files | getXenaData.py xenaGetDataset.py |
diffstat | 2 files changed, 72 insertions(+), 64 deletions(-) [+] |
line wrap: on
line diff
--- a/getXenaData.py Sun Aug 09 23:23:58 2015 -0700 +++ b/getXenaData.py Wed Aug 12 16:38:12 2015 -0700 @@ -6,66 +6,75 @@ print "python getXenaData.py hub datasetId outputfile\n" sys.exit(1) -url = sys.argv[1] -dataset = sys.argv[2] -output = sys.argv[3] +def main(): + url = sys.argv[1] + dataset = sys.argv[2] + output = sys.argv[3] -fout = open(output,'w') + fout = open(output,'w') -if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: - url = "https://galaxyxena.soe.ucsc.edu:443/xena" + contactUrl = url + if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: + contactUrl = "https://galaxyxena.soe.ucsc.edu:443/xena" -#testing if the url is reachable -try: - r =json.loads(xena.post(url, "(+ 1 2)")) - if r!=3.0: - print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you." + #testing if the url is reachable + try: + r =json.loads(xena.post(contactUrl, "(+ 1 2)")) + + if r!=3.0: + print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance." + print "You entered hub: %s" % (url) + fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance.\n") + fout.write("You entered hub: %s\n" % (url)) + fout.close() + sys.exit(1) + + except: + print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance." print "You entered hub: %s" % (url) - fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n") - fout.write("You entered hub: %s\n" % (url)) - fout.close() - sys.exit(1) -except: - print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you." - print "You entered hub: %s" % (url) - fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n") + fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance.\n") fout.write("You entered hub: %s\n" % (url)) fout.close() sys.exit(1) -samples = xena.dataset_samples (url, dataset) -if not samples: - print "Dataset does not exist" - print "You entered dataset id: %s" % (dataset) - fout.write("Dataset does not exists\n") - fout.write("You entered dataset id: %s\n" % (dataset)) - fout.close() - sys.exit(1) + samples = xena.dataset_samples (contactUrl, dataset) + if not samples: + print "Dataset does not exist" + print "You entered dataset id: %s" % (dataset) + fout.write("Dataset does not exists\n") + fout.write("You entered dataset id: %s\n" % (dataset)) + fout.close() + sys.exit(1) -type = xena.dataset_type(url, dataset) -if type[0] not in ["genomicMatrix", "clinicalMatrix"]: - print "The type of data is not supported" - print "datatype=%s" % (type[0]) - fout.write("The type of data is not supported\n") - fout.write("datatype=%s\n" % (type[0])) - fout.close() - sys.exit(1) + type = xena.dataset_type(contactUrl, dataset) + if type[0] not in ["genomicMatrix", "clinicalMatrix"]: + print "The type of data is not supported" + print "datatype=%s" % (type[0]) + fout.write("The type of data is not supported\n") + fout.write("datatype=%s\n" % (type[0])) + fout.close() + sys.exit(1) + + writer = csv.writer(fout, delimiter='\t') + writer.writerow(["sample"]+samples) -writer = csv.writer(fout, delimiter='\t') -writer.writerow(["sample"]+samples) + probes = xena.dataset_field(contactUrl, dataset) + start=0 + size =100 + N= len(probes) + for i in range (start, N,size): + results = xena.dataset_probe_values (contactUrl, dataset, samples, probes[i:i+size]) + print ".", + for j in range (0, size): + if i+j == N: + break + writer.writerow([probes[i+j]]+results[j]) -probes = xena.dataset_field(url, dataset) -start=0 -size =100 -N= len(probes) -for i in range (start, N,size): - results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size]) - print ".", - for j in range (0, size): - if i+j == N: - break - writer.writerow([probes[i+j]]+results[j]) + fout.close() + print "done" + sys.exit(0) -fout.close() -print "done" -sys.exit(0) + +if __name__ == "__main__": + main() +
--- a/xenaGetDataset.py Sun Aug 09 23:23:58 2015 -0700 +++ b/xenaGetDataset.py Wed Aug 12 16:38:12 2015 -0700 @@ -9,26 +9,25 @@ parser = argparse.ArgumentParser() parser.add_argument("dataHub", type=str) parser.add_argument("datasetId", type=str) - #parser.add_argument("metadatafile", type=str) parser.add_argument("datafile", type=str) args = parser.parse_args() datasetUrlHost = re.sub("/proj/", "/download/", args.dataHub) datasetIdTokens = re.split("/", args.datasetId) datasetUrl = datasetUrlHost + "/" + "/".join(datasetIdTokens[1:]) - print datasetUrl - """ - metadataUrl = datasetUrl + ".json" - mm = urllib2.urlopen(metadataUrl) - with open(args.metadatafile, "w") as metadata: - metadata.write(mm.read()) - mm.close() - """ - dd = urllib2.urlopen(datasetUrl) - with open(args.datafile, "w") as data: + + data = open(args.datafile, "w") + + try: + dd = urllib2.urlopen(datasetUrl) data.write(dd.read()) - dd.close() - + dd.close() + data.close() + except: + data.wriet("You entered dataset id: %s\n" % (args.datasetId)) + data.write("Dataset does not exist. check typo.") + data.close() + sys.exit(1) if __name__ == "__main__": main()