Mercurial > repos > melissacline > ucsc_cancer_utilities
comparison seg2matrix/CGData/BaseMatrix.py @ 31:ab20c0d04f4a
add seg2matrix tool
author | jingchunzhu |
---|---|
date | Fri, 24 Jul 2015 13:10:11 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
30:7a7a52e9b019 | 31:ab20c0d04f4a |
---|---|
1 | |
2 import csv | |
3 import CGData | |
4 import math | |
5 from copy import copy | |
6 try: | |
7 import numpy | |
8 except ImportError: | |
9 numpy = None | |
10 | |
11 class BaseMatrix(CGData.CGDataMatrixObject): | |
12 """ | |
13 Core matrix class. Implements data matrix using numpy or native python objects | |
14 depending up avaliblity and user request | |
15 """ | |
16 corner_name = "#" | |
17 element_type = str | |
18 null_type = None | |
19 def __init__(self,type=str): | |
20 CGData.CGDataMatrixObject.__init__(self) | |
21 self.free() | |
22 if 'cgformat' in self and 'valueType' in self['cgformat']: | |
23 if self['cgformat']["valueType"] == 'float': | |
24 self.element_type = float | |
25 else: | |
26 self.element_type = type | |
27 | |
28 def free(self): | |
29 self.col_map = {} | |
30 self.row_map = {} | |
31 self.matrix = None | |
32 | |
33 def init_blank(self, cols, rows, skip_numpy=False): | |
34 """ | |
35 Initlize matrix with NA (or nan) values using row/column names | |
36 provided by user. User can also force usage of native python objects | |
37 (which is useful for string based matrices, and numpy matrices fix cel string length) | |
38 """ | |
39 if numpy is not None and not skip_numpy: | |
40 self.matrix = numpy.matrix( numpy.zeros( (len(rows), len(cols)), dtype=self.element_type) ) | |
41 self.matrix.fill( numpy.nan ) | |
42 else: | |
43 self.matrix = [] | |
44 for i in range(len(rows)): | |
45 self.matrix.append([self.null_type]*len(cols)) | |
46 for i, c in enumerate(cols): | |
47 self.col_map[c] = i | |
48 for i, r in enumerate(rows): | |
49 self.row_map[r] = i | |
50 self.loaded = True | |
51 | |
52 def read(self, handle, skip_vals=False): | |
53 self.col_map = {} | |
54 self.row_map = {} | |
55 pos_hash = None | |
56 | |
57 if numpy is not None: | |
58 #txtMatrix = numpy.loadtxt(handle, delimiter="\t", comments="%%%%%%%%%%%%%%", dtype=str) | |
59 t = [] | |
60 for line in handle: | |
61 t.append(line.replace("\n", "").split("\t")) | |
62 txtMatrix = numpy.array(t) | |
63 del t | |
64 if self.element_type == float: | |
65 txtMatrix[ txtMatrix=="NA" ] = 'nan' | |
66 txtMatrix[ txtMatrix=="null" ] = 'nan' | |
67 self.matrix = numpy.matrix( numpy.zeros( (txtMatrix.shape[0]-1, txtMatrix.shape[1]-1) ) ) | |
68 self.matrix.fill(numpy.nan) | |
69 for i in range(self.matrix.shape[0]): | |
70 for j in range(self.matrix.shape[1]): | |
71 try: | |
72 self.matrix[i,j] = self.element_type(txtMatrix[i+1,j+1]) | |
73 except ValueError: | |
74 pass | |
75 else: | |
76 self.matrix = numpy.matrix(txtMatrix[1:,1:], dtype=self.element_type) | |
77 | |
78 for i, col in enumerate( txtMatrix[0,1:] ): | |
79 self.col_map[col] = i | |
80 for i, row in enumerate( txtMatrix[1:,0] ): | |
81 self.row_map[row] = i | |
82 else: | |
83 self.matrix = [] | |
84 for row in csv.reader(handle, delimiter="\t"): | |
85 if pos_hash is None: | |
86 pos_hash = {} | |
87 pos = 0 | |
88 for name in row[1:]: | |
89 i = 1 | |
90 orig_name = name | |
91 while name in pos_hash: | |
92 name = orig_name + "#" + str(i) | |
93 i += 1 | |
94 pos_hash[name] = pos | |
95 pos += 1 | |
96 else: | |
97 newRow = [] | |
98 if not skip_vals: | |
99 newRow = [self.null_type] * (len(pos_hash)) | |
100 for col in pos_hash: | |
101 i = pos_hash[col] + 1 | |
102 if row[i] != 'NA' and row[i] != 'null' and row[i] != 'NONE' and row[i] != "N/A" and len(row[i]): | |
103 newRow[i - 1] = self.element_type(row[i]) | |
104 self.row_map[row[0]] = len(self.matrix) | |
105 self.matrix.append(newRow) | |
106 | |
107 self.col_map = {} | |
108 for col in pos_hash: | |
109 self.col_map[col] = pos_hash[col] | |
110 self.loaded = True | |
111 | |
112 def write(self, handle, missing='NA'): | |
113 write = csv.writer(handle, delimiter="\t", lineterminator='\n') | |
114 col_list = self.get_col_list() | |
115 | |
116 write.writerow([self.corner_name] + col_list) | |
117 for rowName in self.row_map: | |
118 out = [rowName] | |
119 row = self.get_row(rowName) | |
120 for col in col_list: | |
121 val = row[self.col_map[col]] | |
122 if val == self.null_type or val is None or (type(val)==float and math.isnan(val)): | |
123 val = missing | |
124 out.append(val) | |
125 write.writerow(out) | |
126 | |
127 def read_keyset(self, handle, key_predicate): | |
128 if key_predicate == "rowKeySrc": | |
129 reader = csv.reader( handle, delimiter="\t") | |
130 head = None | |
131 for row in reader: | |
132 if head is None: | |
133 head = row | |
134 else: | |
135 yield row[0] | |
136 | |
137 if key_predicate=="columnKeySrc": | |
138 reader = csv.reader( handle, delimiter="\t") | |
139 head = None | |
140 for row in reader: | |
141 for col in row[1:]: | |
142 yield col | |
143 break | |
144 | |
145 def get_col_namespace(self): | |
146 """ | |
147 Return the name of the column namespace | |
148 """ | |
149 return self.get("colNamespace", None) | |
150 | |
151 def get_row_namespace(self): | |
152 """ | |
153 Return the name of the row namespace | |
154 """ | |
155 return self.get("rowNamespace", None) | |
156 | |
157 def get_col_list(self): | |
158 """ | |
159 Returns names of columns | |
160 """ | |
161 if not self.loaded: | |
162 self.load( ) | |
163 out = self.col_map.keys() | |
164 out.sort( lambda x,y: self.col_map[x]-self.col_map[y]) | |
165 return out | |
166 | |
167 def get_row_list(self): | |
168 """ | |
169 Returns names of rows | |
170 """ | |
171 out = self.row_map.keys() | |
172 out.sort( lambda x,y: self.row_map[x]-self.row_map[y]) | |
173 return out | |
174 | |
175 def get_row_pos(self, row): | |
176 return self.row_map[row] | |
177 | |
178 def get_col_pos(self, col): | |
179 return self.col_map[col] | |
180 | |
181 def get_row_count(self): | |
182 return len(self.row_map) | |
183 | |
184 def get_col_count(self): | |
185 return len(self.col_map) | |
186 | |
187 def get_row_map(self): | |
188 return copy(self.row_map) | |
189 | |
190 def get_col_map(self): | |
191 return copy(self.col_map) | |
192 | |
193 def get_shape(self): | |
194 return len(self.row_map), len(self.col_map) | |
195 | |
196 def get_row(self, row_name): | |
197 if not self.loaded: | |
198 self.load( ) | |
199 if isinstance(self.matrix, list): | |
200 return self.matrix[ self.row_map[row_name] ] | |
201 else: | |
202 return self.matrix[ self.row_map[row_name] ].tolist()[0] | |
203 | |
204 def get_col(self, col_name): | |
205 if not self.loaded: | |
206 self.load( ) | |
207 if isinstance(self.matrix, list): | |
208 out = [] | |
209 for row_name in self.get_row_list(): | |
210 out.append( self.get_val(col_name, row_name) ) | |
211 return out | |
212 else: | |
213 return self.matrix[:,self.col_map[col_name]].reshape(-1).tolist()[0] | |
214 | |
215 def get_val(self, col_name, row_name): | |
216 """ | |
217 Get cell value based on row and column names | |
218 """ | |
219 if isinstance(self.matrix, list): | |
220 return self.matrix[self.row_map[row_name]][self.col_map[col_name]] | |
221 return self.matrix[self.row_map[row_name],self.col_map[col_name]] | |
222 | |
223 def set_val(self, col_name, row_name, value): | |
224 """ | |
225 Set cell value based on row and column names | |
226 """ | |
227 if isinstance(self.matrix, list): | |
228 self.matrix[self.row_map[row_name]][self.col_map[col_name]] = value | |
229 else: | |
230 self.matrix[self.row_map[row_name],self.col_map[col_name]] = value | |
231 | |
232 def write_gct(self, handle, missing=''): | |
233 write = csv.writer(handle, delimiter="\t", lineterminator='\n') | |
234 cols = self.get_col_list() | |
235 write.writerow(["#1.2"]) | |
236 write.writerow([len(self.get_row_list()), len(self.get_col_list())]) | |
237 write.writerow(["NAME", "Description"] + cols) | |
238 for row in self.get_row_list(): | |
239 out = [row, row] | |
240 for col in cols: | |
241 val = self.get_val(row_name=row, col_name=col) | |
242 if val is None: | |
243 val = missing | |
244 out.append(val) | |
245 write.writerow(out) | |
246 | |
247 |