Mercurial > repos > melissacline > ucsc_xena_platform
diff getXenaData.py @ 41:02b0824c7d60
Download data from any hub in the federated xena platform
author | jingchunzhu <jingchunzhu@gmail.com> |
---|---|
date | Mon, 27 Jul 2015 10:05:22 -0700 |
parents | |
children | bc9784300015 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getXenaData.py Mon Jul 27 10:05:22 2015 -0700 @@ -0,0 +1,44 @@ +# getXenaData.py +import os, sys, string, json, csv +import xena_query as xena + +if len(sys.argv[:])!=4: + print "python getXenaData.py hub datasetId outputfile\n" + sys.exit(1) + +url = sys.argv[1] +dataset = sys.argv[2] +output = sys.argv[3] + +if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: + url = "https://galaxyxena.soe.ucsc.edu:443/xena" + +samples = xena.dataset_samples (url, dataset) +if not samples: + print "dataset does not exists" + sys.exit(1) + +type = xena.dataset_type(url, dataset) +if type[0] not in ["genomicMatrix", "clinicalMatrix"]: + print "the current data type is not supported" + sys.exit(1) + +fout = open(output,'w') +writer = csv.writer(fout, delimiter='\t') +writer.writerow(["sample"]+samples) + +probes = xena.dataset_field(url, dataset) +start=0 +size =100 +N= len(probes) +for i in range (start, N,size): + results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size]) + print "..." + for j in range (0, size): + if i+j == N: + break + writer.writerow([probes[i+j]]+results[j]) + +fout.close() +print "done" +sys.exit(0)