comparison getXenaData.py @ 41:02b0824c7d60

Download data from any hub in the federated xena platform
author jingchunzhu <jingchunzhu@gmail.com>
date Mon, 27 Jul 2015 10:05:22 -0700
parents
children bc9784300015
comparison
equal deleted inserted replaced
40:fd24e220f240 41:02b0824c7d60
1 # getXenaData.py
2 import os, sys, string, json, csv
3 import xena_query as xena
4
5 if len(sys.argv[:])!=4:
6 print "python getXenaData.py hub datasetId outputfile\n"
7 sys.exit(1)
8
9 url = sys.argv[1]
10 dataset = sys.argv[2]
11 output = sys.argv[3]
12
13 if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
14 url = "https://galaxyxena.soe.ucsc.edu:443/xena"
15
16 samples = xena.dataset_samples (url, dataset)
17 if not samples:
18 print "dataset does not exists"
19 sys.exit(1)
20
21 type = xena.dataset_type(url, dataset)
22 if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
23 print "the current data type is not supported"
24 sys.exit(1)
25
26 fout = open(output,'w')
27 writer = csv.writer(fout, delimiter='\t')
28 writer.writerow(["sample"]+samples)
29
30 probes = xena.dataset_field(url, dataset)
31 start=0
32 size =100
33 N= len(probes)
34 for i in range (start, N,size):
35 results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size])
36 print "..."
37 for j in range (0, size):
38 if i+j == N:
39 break
40 writer.writerow([probes[i+j]]+results[j])
41
42 fout.close()
43 print "done"
44 sys.exit(0)