annotate getXenaData.py @ 45:129ba676fd68

remove more strange xmls
author jingchunzhu <jingchunzhu@gmail.com>
date Mon, 27 Jul 2015 15:39:35 -0700
parents 78d6e6772e30
children 3167c1a26101
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
1 # getXenaData.py
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
2 import os, sys, string, json, csv
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
3 import xena_query as xena
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
4
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
5 if len(sys.argv[:])!=4:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
6 print "python getXenaData.py hub datasetId outputfile\n"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
7 sys.exit(1)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
8
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
9 url = sys.argv[1]
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
10 dataset = sys.argv[2]
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
11 output = sys.argv[3]
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
12
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
13 fout = open(output,'w')
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
14
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
15 if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
16 url = "https://galaxyxena.soe.ucsc.edu:443/xena"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
17
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
18 #testing if the url is reachable
43
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
19 try:
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
20 r =json.loads(xena.post(url, "(+ 1 2)"))
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
21 if r!=3.0:
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
22 print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you."
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
23 print "You entered hub: %s" % (url)
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
24 fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n")
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
25 fout.write("You entered hub: %s\n" % (url))
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
26 fout.close()
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
27 sys.exit(1)
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
28 except:
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
29 print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you."
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
30 print "You entered hub: %s" % (url)
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
31 fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n")
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
32 fout.write("You entered hub: %s\n" % (url))
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
33 fout.close()
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
34 sys.exit(1)
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
35
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
36 samples = xena.dataset_samples (url, dataset)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
37 if not samples:
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
38 print "Dataset does not exist"
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
39 print "You entered dataset id: %s" % (dataset)
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
40 fout.write("Dataset does not exists\n")
43
78d6e6772e30 more reliable
jingchunzhu <jingchunzhu@gmail.com>
parents: 42
diff changeset
41 fout.write("You entered dataset id: %s\n" % (dataset))
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
42 fout.close()
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
43 sys.exit(1)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
44
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
45 type = xena.dataset_type(url, dataset)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
46 if type[0] not in ["genomicMatrix", "clinicalMatrix"]:
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
47 print "The type of data is not supported"
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
48 print "datatype=%s" % (type[0])
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
49 fout.write("The type of data is not supported\n")
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
50 fout.write("datatype=%s\n" % (type[0]))
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
51 fout.close()
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
52 sys.exit(1)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
53
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
54 writer = csv.writer(fout, delimiter='\t')
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
55 writer.writerow(["sample"]+samples)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
56
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
57 probes = xena.dataset_field(url, dataset)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
58 start=0
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
59 size =100
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
60 N= len(probes)
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
61 for i in range (start, N,size):
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
62 results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size])
42
bc9784300015 more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents: 41
diff changeset
63 print ".",
41
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
64 for j in range (0, size):
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
65 if i+j == N:
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
66 break
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
67 writer.writerow([probes[i+j]]+results[j])
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
68
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
69 fout.close()
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
70 print "done"
02b0824c7d60 Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff changeset
71 sys.exit(0)