# HG changeset patch # User jingchunzhu # Date 1439422692 25200 # Node ID 3167c1a26101ec60e7cf5bc5a05541972acf1ace # Parent 8da6920a39ac6e35db2c6bf6190deab3101956fa fix diff -r 8da6920a39ac -r 3167c1a26101 getXenaData.py --- a/getXenaData.py Sun Aug 09 23:23:58 2015 -0700 +++ b/getXenaData.py Wed Aug 12 16:38:12 2015 -0700 @@ -6,66 +6,75 @@ print "python getXenaData.py hub datasetId outputfile\n" sys.exit(1) -url = sys.argv[1] -dataset = sys.argv[2] -output = sys.argv[3] +def main(): + url = sys.argv[1] + dataset = sys.argv[2] + output = sys.argv[3] -fout = open(output,'w') + fout = open(output,'w') -if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: - url = "https://galaxyxena.soe.ucsc.edu:443/xena" + contactUrl = url + if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: + contactUrl = "https://galaxyxena.soe.ucsc.edu:443/xena" -#testing if the url is reachable -try: - r =json.loads(xena.post(url, "(+ 1 2)")) - if r!=3.0: - print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you." + #testing if the url is reachable + try: + r =json.loads(xena.post(contactUrl, "(+ 1 2)")) + + if r!=3.0: + print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance." + print "You entered hub: %s" % (url) + fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance.\n") + fout.write("You entered hub: %s\n" % (url)) + fout.close() + sys.exit(1) + + except: + print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance." print "You entered hub: %s" % (url) - fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n") - fout.write("You entered hub: %s\n" % (url)) - fout.close() - sys.exit(1) -except: - print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you." - print "You entered hub: %s" % (url) - fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n") + fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to this galaxy instance.\n") fout.write("You entered hub: %s\n" % (url)) fout.close() sys.exit(1) -samples = xena.dataset_samples (url, dataset) -if not samples: - print "Dataset does not exist" - print "You entered dataset id: %s" % (dataset) - fout.write("Dataset does not exists\n") - fout.write("You entered dataset id: %s\n" % (dataset)) - fout.close() - sys.exit(1) + samples = xena.dataset_samples (contactUrl, dataset) + if not samples: + print "Dataset does not exist" + print "You entered dataset id: %s" % (dataset) + fout.write("Dataset does not exists\n") + fout.write("You entered dataset id: %s\n" % (dataset)) + fout.close() + sys.exit(1) -type = xena.dataset_type(url, dataset) -if type[0] not in ["genomicMatrix", "clinicalMatrix"]: - print "The type of data is not supported" - print "datatype=%s" % (type[0]) - fout.write("The type of data is not supported\n") - fout.write("datatype=%s\n" % (type[0])) - fout.close() - sys.exit(1) + type = xena.dataset_type(contactUrl, dataset) + if type[0] not in ["genomicMatrix", "clinicalMatrix"]: + print "The type of data is not supported" + print "datatype=%s" % (type[0]) + fout.write("The type of data is not supported\n") + fout.write("datatype=%s\n" % (type[0])) + fout.close() + sys.exit(1) + + writer = csv.writer(fout, delimiter='\t') + writer.writerow(["sample"]+samples) -writer = csv.writer(fout, delimiter='\t') -writer.writerow(["sample"]+samples) + probes = xena.dataset_field(contactUrl, dataset) + start=0 + size =100 + N= len(probes) + for i in range (start, N,size): + results = xena.dataset_probe_values (contactUrl, dataset, samples, probes[i:i+size]) + print ".", + for j in range (0, size): + if i+j == N: + break + writer.writerow([probes[i+j]]+results[j]) -probes = xena.dataset_field(url, dataset) -start=0 -size =100 -N= len(probes) -for i in range (start, N,size): - results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size]) - print ".", - for j in range (0, size): - if i+j == N: - break - writer.writerow([probes[i+j]]+results[j]) + fout.close() + print "done" + sys.exit(0) -fout.close() -print "done" -sys.exit(0) + +if __name__ == "__main__": + main() + diff -r 8da6920a39ac -r 3167c1a26101 xenaGetDataset.py --- a/xenaGetDataset.py Sun Aug 09 23:23:58 2015 -0700 +++ b/xenaGetDataset.py Wed Aug 12 16:38:12 2015 -0700 @@ -9,26 +9,25 @@ parser = argparse.ArgumentParser() parser.add_argument("dataHub", type=str) parser.add_argument("datasetId", type=str) - #parser.add_argument("metadatafile", type=str) parser.add_argument("datafile", type=str) args = parser.parse_args() datasetUrlHost = re.sub("/proj/", "/download/", args.dataHub) datasetIdTokens = re.split("/", args.datasetId) datasetUrl = datasetUrlHost + "/" + "/".join(datasetIdTokens[1:]) - print datasetUrl - """ - metadataUrl = datasetUrl + ".json" - mm = urllib2.urlopen(metadataUrl) - with open(args.metadatafile, "w") as metadata: - metadata.write(mm.read()) - mm.close() - """ - dd = urllib2.urlopen(datasetUrl) - with open(args.datafile, "w") as data: + + data = open(args.datafile, "w") + + try: + dd = urllib2.urlopen(datasetUrl) data.write(dd.read()) - dd.close() - + dd.close() + data.close() + except: + data.wriet("You entered dataset id: %s\n" % (args.datasetId)) + data.write("Dataset does not exist. check typo.") + data.close() + sys.exit(1) if __name__ == "__main__": main()