Mercurial > repos > melissacline > start_xena
annotate xena_query.py @ 10:1374011ce9bc default tip
Updating the xena jar to version 0.3
author | melissacline |
---|---|
date | Tue, 23 Sep 2014 21:42:44 -0700 |
parents | 75c7d80df9c1 |
children |
rev | line source |
---|---|
6
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
1 """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
2 Utilities for xena queries. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
3 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
4 A basic query example. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
5 Queries are scheme expressions. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
6 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
7 >>> import xena_query as xena |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
8 >>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(+ 1 2)") |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
9 '3.0' |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
10 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
11 >>> xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", "(let [x 2 y (+ x 3)] (* x y))") |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
12 '10.0' |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
13 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
14 Looking up sample ids for the TCGA LGG cohort. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
15 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
16 >>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
17 xena.patient_to_sample_query("TCGA.LGG.sampleMap", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
18 ["TCGA-CS-4938", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
19 "TCGA-HT-7693", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
20 "TCGA-CS-6665", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
21 "TCGA-S9-A7J2", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
22 "TCGA-FG-A6J3"])) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
23 '{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}' |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
24 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
25 >>> r = xena.post("https://genome-cancer.ucsc.edu/proj/public/xena", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
26 xena.find_sample_by_field_query("TCGA.LGG.sampleMap", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
27 "_PATIENT", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
28 ["TCGA-CS-4938", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
29 "TCGA-HT-7693", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
30 "TCGA-CS-6665", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
31 "TCGA-S9-A7J2", |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
32 "TCGA-FG-A6J3"])) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
33 '{"TCGA.LGG.sampleMap":["TCGA-CS-4938-01","TCGA-CS-6665-01","TCGA-FG-A6J3-01","TCGA-HT-7693-01","TCGA-S9-A7J2-01"]}' |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
34 >>> import json |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
35 >>> json.loads(r) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
36 {u'TCGA.LGG.sampleMap': [u'TCGA-CS-4938-01', u'TCGA-CS-6665-01', u'TCGA-FG-A6J3-01', u'TCGA-HT-7693-01', u'TCGA-S9-A7J2-01']} |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
37 """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
38 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
39 import urllib2 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
40 import re |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
41 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
42 def compose1(f, g): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
43 def composed(*args, **kwargs): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
44 return f(g(*args, **kwargs)) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
45 return composed |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
46 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
47 # funcitonal composition, e.g. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
48 # compose(f, g)(a, ...) == f(g(a, ...)) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
49 compose = lambda *funcs: reduce(compose1, funcs) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
50 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
51 def quote(s): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
52 return '"' + s + '"' |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
53 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
54 def array_fmt(l): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
55 return '[' + ', '.join((quote(s) for s in l)) + ']' |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
56 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
57 # The strategy here is |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
58 # o Do table scan on code to find codes matching field values |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
59 # o Do IN query on unpack(field, x) to find rows matching codes |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
60 # o Project to unpack(sample, x) to get sampleID code |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
61 # o Join with code to get sampleID values |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
62 # |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
63 # Note the :limit on the table scan. This makes the table scan exit after we've |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
64 # found enough values, rather than continuing to the end. We can do this because |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
65 # enumerated values are unique. An alternative would be to index all the enumerated |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
66 # values in the db. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
67 sample_query_str = """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
68 (let [cohort %s |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
69 field_id-dataset (car (query {:select [[:field.id :field_id] [:dataset.id :dataset]] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
70 :from [:dataset] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
71 :join [:field [:= :dataset_id :dataset.id]] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
72 :where [:and [:= :cohort cohort] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
73 [:= :field.name %s]]})) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
74 values %s |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
75 field_id (:field_id field_id-dataset) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
76 dataset (:dataset field_id-dataset) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
77 sample (:id (car (query {:select [:field.id] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
78 :from [:field] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
79 :where [:and [:= :dataset_id dataset] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
80 [:= :field.name "sampleID"]]}))) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
81 N (- (:rows (car (query {:select [:rows] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
82 :from [:dataset] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
83 :where [:= :id dataset]}))) 1)] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
84 {cohort (map :value (query {:select [:value] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
85 :from [{:select [:x #sql/call [:unpack field_id, :x]] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
86 :from [#sql/call [:system_range 0 N]] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
87 :where [:in #sql/call [:unpack field_id, :x] {:select [:ordering] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
88 :from [:code] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
89 :where [:and [:= :field_id field_id] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
90 [:in :value values]] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
91 :limit (count values)}]}] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
92 :join [:code [:and [:= :field_id sample] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
93 [:= :ordering #sql/call [:unpack sample :x]]]]}))}) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
94 """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
95 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
96 cohort_query_str = """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
97 (map :cohort (query {:select [:%distinct.cohort] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
98 :from [:dataset] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
99 :where [:not [:is nil :cohort]]})) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
100 """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
101 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
102 datasets_list_in_cohort_query = """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
103 (map :text (query {:select [:text] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
104 :from [:dataset] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
105 :where [:= :cohort %s ]}) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
106 """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
107 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
108 datasets_type_pattern_str = """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
109 (map :name (query {:select [:name] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
110 :from [:dataset] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
111 :where [:and [:= :type %s] |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
112 [:like :name %s]]})) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
113 """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
114 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
115 def find_sample_by_field_query(cohort, field, values): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
116 """Return a xena query which looks up sample ids for the given field=values.""" |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
117 return sample_query_str % (quote(cohort), quote(field), array_fmt(values)) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
118 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
119 def patient_to_sample_query(cohort, patients): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
120 """Return a xena query which looks up sample ids for the given patients.""" |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
121 return find_sample_by_field_query(cohort, "_PATIENT", patients) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
122 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
123 headers = { 'Content-Type' : "text/plain" } |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
124 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
125 def post(url, query): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
126 """POST a xena data query to the given url.""" |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
127 req = urllib2.Request(url + '/data/', query, headers) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
128 response = urllib2.urlopen(req) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
129 result = response.read() |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
130 return result |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
131 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
132 def find_cohorts(): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
133 """ Return a list of cohorts on a host at a specific url """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
134 """ return example: ["chinSF2007_public","TCGA.BRCA.sampleMap","cohort3"] """ |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
135 return cohort_query_str |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
136 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
137 def find_datasets_in_cohort(url, cohort): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
138 """ Return a list of datasets in a specific cohort on server=url. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
139 Each dataset is a dictionary of the data's metadata. |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
140 This should be refactored to be consistent with the other methods.""" |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
141 return map(json.loads, |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
142 json.loads(post(url, datasets_list_in_cohort_query % (quote(cohort))))) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
143 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
144 def find_datasets_type_pattern(type, pattern): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
145 """Return a xena query which returns a list of datasets |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
146 filtered by a pattern on the dataset name. The pattern is sql: |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
147 % is wildcard.""" |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
148 return datasets_type_pattern_str % (quote(type), quote(pattern)) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
149 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
150 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
151 def strip_first_url_dir(path): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
152 return re.sub(r'^[^/]*', '', path) |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
153 |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
154 # proj/<proj>/xena/<proj>/<path> |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
155 # download/<proj>/xena/<path> |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
156 def name_to_url(base_url, name): |
75c7d80df9c1
Adding the xena_query python API to the install bundle
melissacline
parents:
diff
changeset
|
157 return base_url.replace('/proj/', '/download/') + strip_first_url_dir(name) |