31
|
1
|
|
2 import csv
|
|
3 import CGData
|
|
4 import math
|
|
5 from copy import copy
|
|
6 try:
|
|
7 import numpy
|
|
8 except ImportError:
|
|
9 numpy = None
|
|
10
|
|
11 class BaseMatrix(CGData.CGDataMatrixObject):
|
|
12 """
|
|
13 Core matrix class. Implements data matrix using numpy or native python objects
|
|
14 depending up avaliblity and user request
|
|
15 """
|
|
16 corner_name = "#"
|
|
17 element_type = str
|
|
18 null_type = None
|
|
19 def __init__(self,type=str):
|
|
20 CGData.CGDataMatrixObject.__init__(self)
|
|
21 self.free()
|
|
22 if 'cgformat' in self and 'valueType' in self['cgformat']:
|
|
23 if self['cgformat']["valueType"] == 'float':
|
|
24 self.element_type = float
|
|
25 else:
|
|
26 self.element_type = type
|
|
27
|
|
28 def free(self):
|
|
29 self.col_map = {}
|
|
30 self.row_map = {}
|
|
31 self.matrix = None
|
|
32
|
|
33 def init_blank(self, cols, rows, skip_numpy=False):
|
|
34 """
|
|
35 Initlize matrix with NA (or nan) values using row/column names
|
|
36 provided by user. User can also force usage of native python objects
|
|
37 (which is useful for string based matrices, and numpy matrices fix cel string length)
|
|
38 """
|
|
39 if numpy is not None and not skip_numpy:
|
|
40 self.matrix = numpy.matrix( numpy.zeros( (len(rows), len(cols)), dtype=self.element_type) )
|
|
41 self.matrix.fill( numpy.nan )
|
|
42 else:
|
|
43 self.matrix = []
|
|
44 for i in range(len(rows)):
|
|
45 self.matrix.append([self.null_type]*len(cols))
|
|
46 for i, c in enumerate(cols):
|
|
47 self.col_map[c] = i
|
|
48 for i, r in enumerate(rows):
|
|
49 self.row_map[r] = i
|
|
50 self.loaded = True
|
|
51
|
|
52 def read(self, handle, skip_vals=False):
|
|
53 self.col_map = {}
|
|
54 self.row_map = {}
|
|
55 pos_hash = None
|
|
56
|
|
57 if numpy is not None:
|
|
58 #txtMatrix = numpy.loadtxt(handle, delimiter="\t", comments="%%%%%%%%%%%%%%", dtype=str)
|
|
59 t = []
|
|
60 for line in handle:
|
|
61 t.append(line.replace("\n", "").split("\t"))
|
|
62 txtMatrix = numpy.array(t)
|
|
63 del t
|
|
64 if self.element_type == float:
|
|
65 txtMatrix[ txtMatrix=="NA" ] = 'nan'
|
|
66 txtMatrix[ txtMatrix=="null" ] = 'nan'
|
|
67 self.matrix = numpy.matrix( numpy.zeros( (txtMatrix.shape[0]-1, txtMatrix.shape[1]-1) ) )
|
|
68 self.matrix.fill(numpy.nan)
|
|
69 for i in range(self.matrix.shape[0]):
|
|
70 for j in range(self.matrix.shape[1]):
|
|
71 try:
|
|
72 self.matrix[i,j] = self.element_type(txtMatrix[i+1,j+1])
|
|
73 except ValueError:
|
|
74 pass
|
|
75 else:
|
|
76 self.matrix = numpy.matrix(txtMatrix[1:,1:], dtype=self.element_type)
|
|
77
|
|
78 for i, col in enumerate( txtMatrix[0,1:] ):
|
|
79 self.col_map[col] = i
|
|
80 for i, row in enumerate( txtMatrix[1:,0] ):
|
|
81 self.row_map[row] = i
|
|
82 else:
|
|
83 self.matrix = []
|
|
84 for row in csv.reader(handle, delimiter="\t"):
|
|
85 if pos_hash is None:
|
|
86 pos_hash = {}
|
|
87 pos = 0
|
|
88 for name in row[1:]:
|
|
89 i = 1
|
|
90 orig_name = name
|
|
91 while name in pos_hash:
|
|
92 name = orig_name + "#" + str(i)
|
|
93 i += 1
|
|
94 pos_hash[name] = pos
|
|
95 pos += 1
|
|
96 else:
|
|
97 newRow = []
|
|
98 if not skip_vals:
|
|
99 newRow = [self.null_type] * (len(pos_hash))
|
|
100 for col in pos_hash:
|
|
101 i = pos_hash[col] + 1
|
|
102 if row[i] != 'NA' and row[i] != 'null' and row[i] != 'NONE' and row[i] != "N/A" and len(row[i]):
|
|
103 newRow[i - 1] = self.element_type(row[i])
|
|
104 self.row_map[row[0]] = len(self.matrix)
|
|
105 self.matrix.append(newRow)
|
|
106
|
|
107 self.col_map = {}
|
|
108 for col in pos_hash:
|
|
109 self.col_map[col] = pos_hash[col]
|
|
110 self.loaded = True
|
|
111
|
|
112 def write(self, handle, missing='NA'):
|
|
113 write = csv.writer(handle, delimiter="\t", lineterminator='\n')
|
|
114 col_list = self.get_col_list()
|
|
115
|
|
116 write.writerow([self.corner_name] + col_list)
|
|
117 for rowName in self.row_map:
|
|
118 out = [rowName]
|
|
119 row = self.get_row(rowName)
|
|
120 for col in col_list:
|
|
121 val = row[self.col_map[col]]
|
|
122 if val == self.null_type or val is None or (type(val)==float and math.isnan(val)):
|
|
123 val = missing
|
|
124 out.append(val)
|
|
125 write.writerow(out)
|
|
126
|
|
127 def read_keyset(self, handle, key_predicate):
|
|
128 if key_predicate == "rowKeySrc":
|
|
129 reader = csv.reader( handle, delimiter="\t")
|
|
130 head = None
|
|
131 for row in reader:
|
|
132 if head is None:
|
|
133 head = row
|
|
134 else:
|
|
135 yield row[0]
|
|
136
|
|
137 if key_predicate=="columnKeySrc":
|
|
138 reader = csv.reader( handle, delimiter="\t")
|
|
139 head = None
|
|
140 for row in reader:
|
|
141 for col in row[1:]:
|
|
142 yield col
|
|
143 break
|
|
144
|
|
145 def get_col_namespace(self):
|
|
146 """
|
|
147 Return the name of the column namespace
|
|
148 """
|
|
149 return self.get("colNamespace", None)
|
|
150
|
|
151 def get_row_namespace(self):
|
|
152 """
|
|
153 Return the name of the row namespace
|
|
154 """
|
|
155 return self.get("rowNamespace", None)
|
|
156
|
|
157 def get_col_list(self):
|
|
158 """
|
|
159 Returns names of columns
|
|
160 """
|
|
161 if not self.loaded:
|
|
162 self.load( )
|
|
163 out = self.col_map.keys()
|
|
164 out.sort( lambda x,y: self.col_map[x]-self.col_map[y])
|
|
165 return out
|
|
166
|
|
167 def get_row_list(self):
|
|
168 """
|
|
169 Returns names of rows
|
|
170 """
|
|
171 out = self.row_map.keys()
|
|
172 out.sort( lambda x,y: self.row_map[x]-self.row_map[y])
|
|
173 return out
|
|
174
|
|
175 def get_row_pos(self, row):
|
|
176 return self.row_map[row]
|
|
177
|
|
178 def get_col_pos(self, col):
|
|
179 return self.col_map[col]
|
|
180
|
|
181 def get_row_count(self):
|
|
182 return len(self.row_map)
|
|
183
|
|
184 def get_col_count(self):
|
|
185 return len(self.col_map)
|
|
186
|
|
187 def get_row_map(self):
|
|
188 return copy(self.row_map)
|
|
189
|
|
190 def get_col_map(self):
|
|
191 return copy(self.col_map)
|
|
192
|
|
193 def get_shape(self):
|
|
194 return len(self.row_map), len(self.col_map)
|
|
195
|
|
196 def get_row(self, row_name):
|
|
197 if not self.loaded:
|
|
198 self.load( )
|
|
199 if isinstance(self.matrix, list):
|
|
200 return self.matrix[ self.row_map[row_name] ]
|
|
201 else:
|
|
202 return self.matrix[ self.row_map[row_name] ].tolist()[0]
|
|
203
|
|
204 def get_col(self, col_name):
|
|
205 if not self.loaded:
|
|
206 self.load( )
|
|
207 if isinstance(self.matrix, list):
|
|
208 out = []
|
|
209 for row_name in self.get_row_list():
|
|
210 out.append( self.get_val(col_name, row_name) )
|
|
211 return out
|
|
212 else:
|
|
213 return self.matrix[:,self.col_map[col_name]].reshape(-1).tolist()[0]
|
|
214
|
|
215 def get_val(self, col_name, row_name):
|
|
216 """
|
|
217 Get cell value based on row and column names
|
|
218 """
|
|
219 if isinstance(self.matrix, list):
|
|
220 return self.matrix[self.row_map[row_name]][self.col_map[col_name]]
|
|
221 return self.matrix[self.row_map[row_name],self.col_map[col_name]]
|
|
222
|
|
223 def set_val(self, col_name, row_name, value):
|
|
224 """
|
|
225 Set cell value based on row and column names
|
|
226 """
|
|
227 if isinstance(self.matrix, list):
|
|
228 self.matrix[self.row_map[row_name]][self.col_map[col_name]] = value
|
|
229 else:
|
|
230 self.matrix[self.row_map[row_name],self.col_map[col_name]] = value
|
|
231
|
|
232 def write_gct(self, handle, missing=''):
|
|
233 write = csv.writer(handle, delimiter="\t", lineterminator='\n')
|
|
234 cols = self.get_col_list()
|
|
235 write.writerow(["#1.2"])
|
|
236 write.writerow([len(self.get_row_list()), len(self.get_col_list())])
|
|
237 write.writerow(["NAME", "Description"] + cols)
|
|
238 for row in self.get_row_list():
|
|
239 out = [row, row]
|
|
240 for col in cols:
|
|
241 val = self.get_val(row_name=row, col_name=col)
|
|
242 if val is None:
|
|
243 val = missing
|
|
244 out.append(val)
|
|
245 write.writerow(out)
|
|
246
|
|
247
|