Mercurial > repos > melissacline > ucsc_xena_platform
comparison getXenaData.py @ 41:02b0824c7d60
Download data from any hub in the federated xena platform
author | jingchunzhu <jingchunzhu@gmail.com> |
---|---|
date | Mon, 27 Jul 2015 10:05:22 -0700 |
parents | |
children | bc9784300015 |
comparison
equal
deleted
inserted
replaced
40:fd24e220f240 | 41:02b0824c7d60 |
---|---|
1 # getXenaData.py | |
2 import os, sys, string, json, csv | |
3 import xena_query as xena | |
4 | |
5 if len(sys.argv[:])!=4: | |
6 print "python getXenaData.py hub datasetId outputfile\n" | |
7 sys.exit(1) | |
8 | |
9 url = sys.argv[1] | |
10 dataset = sys.argv[2] | |
11 output = sys.argv[3] | |
12 | |
13 if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: | |
14 url = "https://galaxyxena.soe.ucsc.edu:443/xena" | |
15 | |
16 samples = xena.dataset_samples (url, dataset) | |
17 if not samples: | |
18 print "dataset does not exists" | |
19 sys.exit(1) | |
20 | |
21 type = xena.dataset_type(url, dataset) | |
22 if type[0] not in ["genomicMatrix", "clinicalMatrix"]: | |
23 print "the current data type is not supported" | |
24 sys.exit(1) | |
25 | |
26 fout = open(output,'w') | |
27 writer = csv.writer(fout, delimiter='\t') | |
28 writer.writerow(["sample"]+samples) | |
29 | |
30 probes = xena.dataset_field(url, dataset) | |
31 start=0 | |
32 size =100 | |
33 N= len(probes) | |
34 for i in range (start, N,size): | |
35 results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size]) | |
36 print "..." | |
37 for j in range (0, size): | |
38 if i+j == N: | |
39 break | |
40 writer.writerow([probes[i+j]]+results[j]) | |
41 | |
42 fout.close() | |
43 print "done" | |
44 sys.exit(0) |