Mercurial > repos > melissacline > ucsc_xena_platform
annotate getXenaData.py @ 45:129ba676fd68
remove more strange xmls
author | jingchunzhu <jingchunzhu@gmail.com> |
---|---|
date | Mon, 27 Jul 2015 15:39:35 -0700 |
parents | 78d6e6772e30 |
children | 3167c1a26101 |
rev | line source |
---|---|
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
1 # getXenaData.py |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
2 import os, sys, string, json, csv |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
3 import xena_query as xena |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
4 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
5 if len(sys.argv[:])!=4: |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
6 print "python getXenaData.py hub datasetId outputfile\n" |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
7 sys.exit(1) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
8 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
9 url = sys.argv[1] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
10 dataset = sys.argv[2] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
11 output = sys.argv[3] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
12 |
42
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
13 fout = open(output,'w') |
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
14 |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
15 if string.find(url,"galaxyxena") !=-1 and string.find(url,"ucsc.edu")!=-1: |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
16 url = "https://galaxyxena.soe.ucsc.edu:443/xena" |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
17 |
42
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
18 #testing if the url is reachable |
43 | 19 try: |
20 r =json.loads(xena.post(url, "(+ 1 2)")) | |
21 if r!=3.0: | |
22 print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you." | |
23 print "You entered hub: %s" % (url) | |
24 fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n") | |
25 fout.write("You entered hub: %s\n" % (url)) | |
26 fout.close() | |
27 sys.exit(1) | |
28 except: | |
29 print "The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you." | |
30 print "You entered hub: %s" % (url) | |
31 fout.write("The hub seems can not be reached, either it is not running, the url has a typo, or it is not accessible to you.\n") | |
32 fout.write("You entered hub: %s\n" % (url)) | |
33 fout.close() | |
34 sys.exit(1) | |
42
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
35 |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
36 samples = xena.dataset_samples (url, dataset) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
37 if not samples: |
42
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
38 print "Dataset does not exist" |
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
39 print "You entered dataset id: %s" % (dataset) |
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
40 fout.write("Dataset does not exists\n") |
43 | 41 fout.write("You entered dataset id: %s\n" % (dataset)) |
42
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
42 fout.close() |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
43 sys.exit(1) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
44 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
45 type = xena.dataset_type(url, dataset) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
46 if type[0] not in ["genomicMatrix", "clinicalMatrix"]: |
42
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
47 print "The type of data is not supported" |
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
48 print "datatype=%s" % (type[0]) |
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
49 fout.write("The type of data is not supported\n") |
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
50 fout.write("datatype=%s\n" % (type[0])) |
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
51 fout.close() |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
52 sys.exit(1) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
53 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
54 writer = csv.writer(fout, delimiter='\t') |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
55 writer.writerow(["sample"]+samples) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
56 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
57 probes = xena.dataset_field(url, dataset) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
58 start=0 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
59 size =100 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
60 N= len(probes) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
61 for i in range (start, N,size): |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
62 results = xena.dataset_probe_values (url, dataset, samples, probes[i:i+size]) |
42
bc9784300015
more reliable, error handling
jingchunzhu <jingchunzhu@gmail.com>
parents:
41
diff
changeset
|
63 print ".", |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
64 for j in range (0, size): |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
65 if i+j == N: |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
66 break |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
67 writer.writerow([probes[i+j]]+results[j]) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
68 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
69 fout.close() |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
70 print "done" |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
diff
changeset
|
71 sys.exit(0) |