annotate getXenaData.py @ 55:421b18a0b659 default tip

update v17 step 2, add xena.jar
author jingchunzhu
date Tue, 22 Sep 2015 10:07:51 -0700
parents bb840cc2603d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
1 #!/usr/bin/env python
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
2
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
3 import os, sys, string, json, csv
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
4 import xena_query as xena
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
5
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
6 if len(sys.argv[:])!=5:
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
7 print "python getXenaData.py hub datasetId outputfile galaxy_url\n"
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
8 sys.exit(2)
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
9
50
jingchunzhu
parents: 43
diff changeset
10 def main():
jingchunzhu
parents: 43
diff changeset
11 url = sys.argv[1]
jingchunzhu
parents: 43
diff changeset
12 dataset = sys.argv[2]
jingchunzhu
parents: 43
diff changeset
13 output = sys.argv[3]
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
14 GALAXY_URL = sys.argv[4]
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
15
50
jingchunzhu
parents: 43
diff changeset
16 fout = open(output,'w')
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
17
50
jingchunzhu
parents: 43
diff changeset
18 contactUrl = url
jingchunzhu
parents: 43
diff changeset
19 if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
jingchunzhu
parents: 43
diff changeset
20 contactUrl = "https://galaxyxena.soe.ucsc.edu:443/xena"
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
21
50
jingchunzhu
parents: 43
diff changeset
22 #testing if the url is reachable
jingchunzhu
parents: 43
diff changeset
23 try:
jingchunzhu
parents: 43
diff changeset
24 r =json.loads(xena.post(contactUrl, "(+ 1 2)"))
jingchunzhu
parents: 43
diff changeset
25
jingchunzhu
parents: 43
diff changeset
26 if r!=3.0:
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
27 fout.write("There is an error\n\n")
50
jingchunzhu
parents: 43
diff changeset
28 fout.write("You entered hub: %s\n" % (url))
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
29 fout.write("Possible causes for error:\n")
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
30 fout.write("1. The hub is not running.\n")
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
31 fout.write("2. The hub is not accessible to this galaxy instance at: %s.\n" %(GALAXY_URL))
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
32 fout.write("3. The hub url has a typo.\n")
50
jingchunzhu
parents: 43
diff changeset
33 fout.close()
jingchunzhu
parents: 43
diff changeset
34 sys.exit(1)
jingchunzhu
parents: 43
diff changeset
35
jingchunzhu
parents: 43
diff changeset
36 except:
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
37 fout.write("There is an error\n\n")
43
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
38 fout.write("You entered hub: %s\n" % (url))
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
39 fout.write("Possible causes for error:\n")
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
40 fout.write("1. The hub is not running.\n")
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
41 fout.write("2. The hub is not accessible to this galaxy instance at: %s.\n" %(GALAXY_URL))
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
42 fout.write("3. The hub url has a typo.\n")
43
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
43 fout.close()
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
44 sys.exit(1)
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
45
50
jingchunzhu
parents: 43
diff changeset
46 samples = xena.dataset_samples (contactUrl, dataset)
jingchunzhu
parents: 43
diff changeset
47 if not samples:
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
48 fout.write("There is an error\n\n")
50
jingchunzhu
parents: 43
diff changeset
49 fout.write("You entered dataset id: %s\n" % (dataset))
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
50 fout.write("Possible causes for error:\n")
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
51 fout.write("1. Dataset does not exist.\n")
50
jingchunzhu
parents: 43
diff changeset
52 fout.close()
jingchunzhu
parents: 43
diff changeset
53 sys.exit(1)
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
54
50
jingchunzhu
parents: 43
diff changeset
55 type = xena.dataset_type(contactUrl, dataset)
jingchunzhu
parents: 43
diff changeset
56 if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
52
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
57 fout.write("There is an error\n\n")
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
58 fout.write("You entered dataset id: %s\n" % (dataset))
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
59 fout.write("It's datatype=%s\n" % (type[0]))
bb840cc2603d better error handling
jingchunzhu
parents: 50
diff changeset
60 fout.write("This type of data is not supported yet.\n")
50
jingchunzhu
parents: 43
diff changeset
61 fout.close()
jingchunzhu
parents: 43
diff changeset
62 sys.exit(1)
jingchunzhu
parents: 43
diff changeset
63
jingchunzhu
parents: 43
diff changeset
64 writer = csv.writer(fout, delimiter='\t')
jingchunzhu
parents: 43
diff changeset
65 writer.writerow(["sample"]+samples)
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
66
50
jingchunzhu
parents: 43
diff changeset
67 probes = xena.dataset_field(contactUrl, dataset)
jingchunzhu
parents: 43
diff changeset
68 start=0
jingchunzhu
parents: 43
diff changeset
69 size =100
jingchunzhu
parents: 43
diff changeset
70 N= len(probes)
jingchunzhu
parents: 43
diff changeset
71 for i in range (start, N,size):
jingchunzhu
parents: 43
diff changeset
72 results = xena.dataset_probe_values (contactUrl, dataset, samples, probes[i:i+size])
jingchunzhu
parents: 43
diff changeset
73 print ".",
jingchunzhu
parents: 43
diff changeset
74 for j in range (0, size):
jingchunzhu
parents: 43
diff changeset
75 if i+j == N:
jingchunzhu
parents: 43
diff changeset
76 break
jingchunzhu
parents: 43
diff changeset
77 writer.writerow([probes[i+j]]+results[j])
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
78
50
jingchunzhu
parents: 43
diff changeset
79 fout.close()
jingchunzhu
parents: 43
diff changeset
80 sys.exit(0)
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
81
50
jingchunzhu
parents: 43
diff changeset
82 if __name__ == "__main__":
jingchunzhu
parents: 43
diff changeset
83 main()
jingchunzhu
parents: 43
diff changeset
84