annotate getXenaData.py @ 41:02b0824c7d60

Download data from any hub in the federated xena platform
author jingchunzhu <jingchunzhu@gmail.com>
date Mon, 27 Jul 2015 10:05:22 -0700
parents
children bc9784300015
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
1 # getXenaData.py
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
2 import os, sys, string, json, csv
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
3 import xena_query as xena
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
4
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
5 if len(sys.argv[:])!=4:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
6 print "python getXenaData.py hub datasetId outputfile\n"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
7 sys.exit(1)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
8
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
9 url = sys.argv[1]
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
10 dataset = sys.argv[2]
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
11 output = sys.argv[3]
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
12
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
13 if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
14 url = "https://galaxyxena.soe.ucsc.edu:443/xena"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
15
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
16 samples = xena.dataset_samples (url, dataset)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
17 if not samples:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
18 print "dataset does not exists"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
19 sys.exit(1)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
20
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
21 type = xena.dataset_type(url, dataset)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
22 if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
23 print "the current data type is not supported"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
24 sys.exit(1)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
25
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
26 fout = open(output,'w')
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
27 writer = csv.writer(fout, delimiter='\t')
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
28 writer.writerow(["sample"]+samples)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
29
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
30 probes = xena.dataset_field(url, dataset)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
31 start=0
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
32 size =100
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
33 N= len(probes)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
34 for i in range (start, N,size):
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
35 results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size])
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
36 print "..."
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
37 for j in range (0, size):
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
38 if i+j == N:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
39 break
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
40 writer.writerow([probes[i+j]]+results[j])
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
42 fout.close()
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
43 print "done"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
44 sys.exit(0)