comparison seg2matrix/CGData/BaseMatrix.py @ 31:ab20c0d04f4a

add seg2matrix tool
author jingchunzhu
date Fri, 24 Jul 2015 13:10:11 -0700
parents
children
comparison
equal deleted inserted replaced
30:7a7a52e9b019 31:ab20c0d04f4a
1
2 import csv
3 import CGData
4 import math
5 from copy import copy
6 try:
7 import numpy
8 except ImportError:
9 numpy = None
10
11 class BaseMatrix(CGData.CGDataMatrixObject):
12 """
13 Core matrix class. Implements data matrix using numpy or native python objects
14 depending up avaliblity and user request
15 """
16 corner_name = "#"
17 element_type = str
18 null_type = None
19 def __init__(self,type=str):
20 CGData.CGDataMatrixObject.__init__(self)
21 self.free()
22 if 'cgformat' in self and 'valueType' in self['cgformat']:
23 if self['cgformat']["valueType"] == 'float':
24 self.element_type = float
25 else:
26 self.element_type = type
27
28 def free(self):
29 self.col_map = {}
30 self.row_map = {}
31 self.matrix = None
32
33 def init_blank(self, cols, rows, skip_numpy=False):
34 """
35 Initlize matrix with NA (or nan) values using row/column names
36 provided by user. User can also force usage of native python objects
37 (which is useful for string based matrices, and numpy matrices fix cel string length)
38 """
39 if numpy is not None and not skip_numpy:
40 self.matrix = numpy.matrix( numpy.zeros( (len(rows), len(cols)), dtype=self.element_type) )
41 self.matrix.fill( numpy.nan )
42 else:
43 self.matrix = []
44 for i in range(len(rows)):
45 self.matrix.append([self.null_type]*len(cols))
46 for i, c in enumerate(cols):
47 self.col_map[c] = i
48 for i, r in enumerate(rows):
49 self.row_map[r] = i
50 self.loaded = True
51
52 def read(self, handle, skip_vals=False):
53 self.col_map = {}
54 self.row_map = {}
55 pos_hash = None
56
57 if numpy is not None:
58 #txtMatrix = numpy.loadtxt(handle, delimiter="\t", comments="%%%%%%%%%%%%%%", dtype=str)
59 t = []
60 for line in handle:
61 t.append(line.replace("\n", "").split("\t"))
62 txtMatrix = numpy.array(t)
63 del t
64 if self.element_type == float:
65 txtMatrix[ txtMatrix=="NA" ] = 'nan'
66 txtMatrix[ txtMatrix=="null" ] = 'nan'
67 self.matrix = numpy.matrix( numpy.zeros( (txtMatrix.shape[0]-1, txtMatrix.shape[1]-1) ) )
68 self.matrix.fill(numpy.nan)
69 for i in range(self.matrix.shape[0]):
70 for j in range(self.matrix.shape[1]):
71 try:
72 self.matrix[i,j] = self.element_type(txtMatrix[i+1,j+1])
73 except ValueError:
74 pass
75 else:
76 self.matrix = numpy.matrix(txtMatrix[1:,1:], dtype=self.element_type)
77
78 for i, col in enumerate( txtMatrix[0,1:] ):
79 self.col_map[col] = i
80 for i, row in enumerate( txtMatrix[1:,0] ):
81 self.row_map[row] = i
82 else:
83 self.matrix = []
84 for row in csv.reader(handle, delimiter="\t"):
85 if pos_hash is None:
86 pos_hash = {}
87 pos = 0
88 for name in row[1:]:
89 i = 1
90 orig_name = name
91 while name in pos_hash:
92 name = orig_name + "#" + str(i)
93 i += 1
94 pos_hash[name] = pos
95 pos += 1
96 else:
97 newRow = []
98 if not skip_vals:
99 newRow = [self.null_type] * (len(pos_hash))
100 for col in pos_hash:
101 i = pos_hash[col] + 1
102 if row[i] != 'NA' and row[i] != 'null' and row[i] != 'NONE' and row[i] != "N/A" and len(row[i]):
103 newRow[i - 1] = self.element_type(row[i])
104 self.row_map[row[0]] = len(self.matrix)
105 self.matrix.append(newRow)
106
107 self.col_map = {}
108 for col in pos_hash:
109 self.col_map[col] = pos_hash[col]
110 self.loaded = True
111
112 def write(self, handle, missing='NA'):
113 write = csv.writer(handle, delimiter="\t", lineterminator='\n')
114 col_list = self.get_col_list()
115
116 write.writerow([self.corner_name] + col_list)
117 for rowName in self.row_map:
118 out = [rowName]
119 row = self.get_row(rowName)
120 for col in col_list:
121 val = row[self.col_map[col]]
122 if val == self.null_type or val is None or (type(val)==float and math.isnan(val)):
123 val = missing
124 out.append(val)
125 write.writerow(out)
126
127 def read_keyset(self, handle, key_predicate):
128 if key_predicate == "rowKeySrc":
129 reader = csv.reader( handle, delimiter="\t")
130 head = None
131 for row in reader:
132 if head is None:
133 head = row
134 else:
135 yield row[0]
136
137 if key_predicate=="columnKeySrc":
138 reader = csv.reader( handle, delimiter="\t")
139 head = None
140 for row in reader:
141 for col in row[1:]:
142 yield col
143 break
144
145 def get_col_namespace(self):
146 """
147 Return the name of the column namespace
148 """
149 return self.get("colNamespace", None)
150
151 def get_row_namespace(self):
152 """
153 Return the name of the row namespace
154 """
155 return self.get("rowNamespace", None)
156
157 def get_col_list(self):
158 """
159 Returns names of columns
160 """
161 if not self.loaded:
162 self.load( )
163 out = self.col_map.keys()
164 out.sort( lambda x,y: self.col_map[x]-self.col_map[y])
165 return out
166
167 def get_row_list(self):
168 """
169 Returns names of rows
170 """
171 out = self.row_map.keys()
172 out.sort( lambda x,y: self.row_map[x]-self.row_map[y])
173 return out
174
175 def get_row_pos(self, row):
176 return self.row_map[row]
177
178 def get_col_pos(self, col):
179 return self.col_map[col]
180
181 def get_row_count(self):
182 return len(self.row_map)
183
184 def get_col_count(self):
185 return len(self.col_map)
186
187 def get_row_map(self):
188 return copy(self.row_map)
189
190 def get_col_map(self):
191 return copy(self.col_map)
192
193 def get_shape(self):
194 return len(self.row_map), len(self.col_map)
195
196 def get_row(self, row_name):
197 if not self.loaded:
198 self.load( )
199 if isinstance(self.matrix, list):
200 return self.matrix[ self.row_map[row_name] ]
201 else:
202 return self.matrix[ self.row_map[row_name] ].tolist()[0]
203
204 def get_col(self, col_name):
205 if not self.loaded:
206 self.load( )
207 if isinstance(self.matrix, list):
208 out = []
209 for row_name in self.get_row_list():
210 out.append( self.get_val(col_name, row_name) )
211 return out
212 else:
213 return self.matrix[:,self.col_map[col_name]].reshape(-1).tolist()[0]
214
215 def get_val(self, col_name, row_name):
216 """
217 Get cell value based on row and column names
218 """
219 if isinstance(self.matrix, list):
220 return self.matrix[self.row_map[row_name]][self.col_map[col_name]]
221 return self.matrix[self.row_map[row_name],self.col_map[col_name]]
222
223 def set_val(self, col_name, row_name, value):
224 """
225 Set cell value based on row and column names
226 """
227 if isinstance(self.matrix, list):
228 self.matrix[self.row_map[row_name]][self.col_map[col_name]] = value
229 else:
230 self.matrix[self.row_map[row_name],self.col_map[col_name]] = value
231
232 def write_gct(self, handle, missing=''):
233 write = csv.writer(handle, delimiter="\t", lineterminator='\n')
234 cols = self.get_col_list()
235 write.writerow(["#1.2"])
236 write.writerow([len(self.get_row_list()), len(self.get_col_list())])
237 write.writerow(["NAME", "Description"] + cols)
238 for row in self.get_row_list():
239 out = [row, row]
240 for col in cols:
241 val = self.get_val(row_name=row, col_name=col)
242 if val is None:
243 val = missing
244 out.append(val)
245 write.writerow(out)
246
247