Mercurial > repos > melissacline > ucsc_xena_platform
annotate xena_query.py @ 47:b19e848f9886
update xena to v16
author | jingchunzhu |
---|---|
date | Fri, 07 Aug 2015 17:01:10 -0700 |
parents | 02b0824c7d60 |
children |
rev | line source |
---|---|
0 | 1 """ |
2 Utilities for xena queries. | |
3 | |
4 A basic query example. | |
5 Queries are scheme expressions. | |
6 | |
7 >>> import xena_query as xena | |
8 >>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(+ 1 2)") | |
9 '3.0' | |
10 | |
11 >>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(let [x 2 y (+ x 3)] (* x y))") | |
12 '10.0' | |
13 | |
14 Looking up sample ids for the TCGA LGG cohort. | |
15 | |
16 >>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", | |
17 xena.patient_to_sample_query("TCGA.LGG.sampleMap", | |
18 ["TCGA-CS-4938", | |
19 "TCGA-HT-7693", | |
20 "TCGA-CS-6665", | |
21 "TCGA-S9-A7J2", | |
22 "TCGA-FG-A6J3"])) | |
23 '{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}' | |
24 | |
25 >>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", | |
26 xena.find_sample_by_field_query("TCGA.LGG.sampleMap", | |
27 "_PATIENT", | |
28 ["TCGA-CS-4938", | |
29 "TCGA-HT-7693", | |
30 "TCGA-CS-6665", | |
31 "TCGA-S9-A7J2", | |
32 "TCGA-FG-A6J3"])) | |
33 '{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}' | |
34 >>> import json | |
35 >>> json.loads(r) | |
36 {u'TCGA.LGG.sampleMap': [u'TCGA-CS-4938-01', u'TCGA-CS-6665-01', u'TCGA-FG-A6J3-01', u'TCGA-HT-7693-01', u'TCGA-S9-A7J2-01']} | |
37 """ | |
38 | |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
39 import urllib2, json |
0 | 40 import re |
41 | |
42 def compose1(f, g): | |
43 def composed(*args, **kwargs): | |
44 return f(g(*args, **kwargs)) | |
45 return composed | |
46 | |
47 # funcitonal composition, e.g. | |
48 # compose(f, g)(a, ...) == f(g(a, ...)) | |
49 compose = lambda *funcs: reduce(compose1, funcs) | |
50 | |
51 def quote(s): | |
52 return '"' + s + '"' | |
53 | |
54 def array_fmt(l): | |
55 return '[' + ', '.join((quote(s) for s in l)) + ']' | |
56 | |
57 # The strategy here is | |
58 # o Do table scan on code to find codes matching field values | |
59 # o Do IN query on unpack(field, x) to find rows matching codes | |
60 # o Project to unpack(sample, x) to get sampleID code | |
61 # o Join with code to get sampleID values | |
62 # | |
63 # Note the :limit on the table scan. This makes the table scan exit after we've | |
64 # found enough values, rather than continuing to the end. We can do this because | |
65 # enumerated values are unique. An alternative would be to index all the enumerated | |
66 # values in the db. | |
67 | |
68 cohort_query_str = """ | |
69 (map :cohort (query {:select [:%distinct.cohort] | |
70 :from [:dataset] | |
71 :where [:not [:is nil :cohort]]})) | |
72 """ | |
73 | |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
74 datasets_list_in_cohort_str =""" |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
75 (map :name (query {:select [:name :type :datasubtype :probemap :text :status] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
76 :from [:dataset] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
77 :where [:= :cohort %s]})) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
78 """ |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
79 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
80 dataset_type_str = """ |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
81 (map :type (query {:select [:type] |
0 | 82 :from [:dataset] |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
83 :where [:= :name %s]})) |
0 | 84 """ |
85 | |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
86 dataset_field_str = """ |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
87 (map :name (query {:select [:field.name] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
88 :from [:dataset] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
89 :join [:field [:= :dataset.id :dataset_id]] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
90 :where [:= :dataset.name %s]})) |
0 | 91 """ |
92 | |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
93 dataset_samples_str = """ |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
94 (map :value (query {:select [:value] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
95 :from [:dataset] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
96 :join [:field [:= :dataset.id :dataset_id] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
97 :code [:= :field.id :field_id]] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
98 :where [:and |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
99 [:= :dataset.name %s] |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
100 [:= :field.name "sampleID"]]})) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
101 """ |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
102 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
103 dataset_probe_str = """ |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
104 (fetch [{:table %s |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
105 :columns %s |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
106 :samples %s}]) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
107 """ |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
108 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
109 |
0 | 110 def find_sample_by_field_query(cohort, field, values): |
111 """Return a xena query which looks up sample ids for the given field=values.""" | |
112 return sample_query_str % (quote(cohort), quote(field), array_fmt(values)) | |
113 | |
114 def patient_to_sample_query(cohort, patients): | |
115 """Return a xena query which looks up sample ids for the given patients.""" | |
116 return find_sample_by_field_query(cohort, "_PATIENT", patients) | |
117 | |
118 headers = { 'Content-Type' : "text/plain" } | |
119 | |
120 def post(url, query): | |
121 """POST a xena data query to the given url.""" | |
122 req = urllib2.Request(url + '/data/', query, headers) | |
123 response = urllib2.urlopen(req) | |
124 result = response.read() | |
125 return result | |
126 | |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
127 def find_cohorts(url): |
0 | 128 """ Return a list of cohorts on a host at a specific url """ |
129 """ return example: ["chinSF2007_public","TCGA.BRCA.sampleMap","cohort3"] """ | |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
130 return json.loads(post(url,cohort_query_str)) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
131 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
132 def dataset_field(host, dataset): |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
133 """return probes or features of a dataset""" |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
134 return json.loads(post(host, dataset_field_str % (quote(dataset)))) |
0 | 135 |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
136 def datasets_list_in_cohort (host, cohort): |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
137 """return datasets in a cohort""" |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
138 return json.loads(post(host, datasets_list_in_cohort_str % (quote(cohort)))) |
0 | 139 |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
140 def dataset_samples (host, dataset): |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
141 return json.loads(post(host, dataset_samples_str % (quote(dataset)))) |
0 | 142 |
41
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
143 def dataset_probe_values (host, dataset, samples, probes): |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
144 return json.loads(post(host, dataset_probe_str % (quote(dataset), array_fmt(probes), array_fmt(samples)))) |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
145 |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
146 def dataset_type (host, dataset): |
02b0824c7d60
Download data from any hub in the federated xena platform
jingchunzhu <jingchunzhu@gmail.com>
parents:
0
diff
changeset
|
147 return json.loads(post(host, dataset_type_str % (quote(dataset)))) |
0 | 148 |
149 def strip_first_url_dir(path): | |
150 return re.sub(r'^[^/]*', '', path) | |
151 | |
152 # proj/<proj>/xena/<proj>/<path> | |
153 # download/<proj>/xena/<path> | |
154 def name_to_url(base_url, name): | |
155 return base_url.replace('/proj/', '/download/') + strip_first_url_dir(name) |