Mercurial > repos > melissacline > ucsc_cancer_utilities
comparison seg2matrix/CGData/BaseMatrix.py @ 31:ab20c0d04f4a
add seg2matrix tool
| author | jingchunzhu |
|---|---|
| date | Fri, 24 Jul 2015 13:10:11 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 30:7a7a52e9b019 | 31:ab20c0d04f4a |
|---|---|
| 1 | |
| 2 import csv | |
| 3 import CGData | |
| 4 import math | |
| 5 from copy import copy | |
| 6 try: | |
| 7 import numpy | |
| 8 except ImportError: | |
| 9 numpy = None | |
| 10 | |
| 11 class BaseMatrix(CGData.CGDataMatrixObject): | |
| 12 """ | |
| 13 Core matrix class. Implements data matrix using numpy or native python objects | |
| 14 depending up avaliblity and user request | |
| 15 """ | |
| 16 corner_name = "#" | |
| 17 element_type = str | |
| 18 null_type = None | |
| 19 def __init__(self,type=str): | |
| 20 CGData.CGDataMatrixObject.__init__(self) | |
| 21 self.free() | |
| 22 if 'cgformat' in self and 'valueType' in self['cgformat']: | |
| 23 if self['cgformat']["valueType"] == 'float': | |
| 24 self.element_type = float | |
| 25 else: | |
| 26 self.element_type = type | |
| 27 | |
| 28 def free(self): | |
| 29 self.col_map = {} | |
| 30 self.row_map = {} | |
| 31 self.matrix = None | |
| 32 | |
| 33 def init_blank(self, cols, rows, skip_numpy=False): | |
| 34 """ | |
| 35 Initlize matrix with NA (or nan) values using row/column names | |
| 36 provided by user. User can also force usage of native python objects | |
| 37 (which is useful for string based matrices, and numpy matrices fix cel string length) | |
| 38 """ | |
| 39 if numpy is not None and not skip_numpy: | |
| 40 self.matrix = numpy.matrix( numpy.zeros( (len(rows), len(cols)), dtype=self.element_type) ) | |
| 41 self.matrix.fill( numpy.nan ) | |
| 42 else: | |
| 43 self.matrix = [] | |
| 44 for i in range(len(rows)): | |
| 45 self.matrix.append([self.null_type]*len(cols)) | |
| 46 for i, c in enumerate(cols): | |
| 47 self.col_map[c] = i | |
| 48 for i, r in enumerate(rows): | |
| 49 self.row_map[r] = i | |
| 50 self.loaded = True | |
| 51 | |
| 52 def read(self, handle, skip_vals=False): | |
| 53 self.col_map = {} | |
| 54 self.row_map = {} | |
| 55 pos_hash = None | |
| 56 | |
| 57 if numpy is not None: | |
| 58 #txtMatrix = numpy.loadtxt(handle, delimiter="\t", comments="%%%%%%%%%%%%%%", dtype=str) | |
| 59 t = [] | |
| 60 for line in handle: | |
| 61 t.append(line.replace("\n", "").split("\t")) | |
| 62 txtMatrix = numpy.array(t) | |
| 63 del t | |
| 64 if self.element_type == float: | |
| 65 txtMatrix[ txtMatrix=="NA" ] = 'nan' | |
| 66 txtMatrix[ txtMatrix=="null" ] = 'nan' | |
| 67 self.matrix = numpy.matrix( numpy.zeros( (txtMatrix.shape[0]-1, txtMatrix.shape[1]-1) ) ) | |
| 68 self.matrix.fill(numpy.nan) | |
| 69 for i in range(self.matrix.shape[0]): | |
| 70 for j in range(self.matrix.shape[1]): | |
| 71 try: | |
| 72 self.matrix[i,j] = self.element_type(txtMatrix[i+1,j+1]) | |
| 73 except ValueError: | |
| 74 pass | |
| 75 else: | |
| 76 self.matrix = numpy.matrix(txtMatrix[1:,1:], dtype=self.element_type) | |
| 77 | |
| 78 for i, col in enumerate( txtMatrix[0,1:] ): | |
| 79 self.col_map[col] = i | |
| 80 for i, row in enumerate( txtMatrix[1:,0] ): | |
| 81 self.row_map[row] = i | |
| 82 else: | |
| 83 self.matrix = [] | |
| 84 for row in csv.reader(handle, delimiter="\t"): | |
| 85 if pos_hash is None: | |
| 86 pos_hash = {} | |
| 87 pos = 0 | |
| 88 for name in row[1:]: | |
| 89 i = 1 | |
| 90 orig_name = name | |
| 91 while name in pos_hash: | |
| 92 name = orig_name + "#" + str(i) | |
| 93 i += 1 | |
| 94 pos_hash[name] = pos | |
| 95 pos += 1 | |
| 96 else: | |
| 97 newRow = [] | |
| 98 if not skip_vals: | |
| 99 newRow = [self.null_type] * (len(pos_hash)) | |
| 100 for col in pos_hash: | |
| 101 i = pos_hash[col] + 1 | |
| 102 if row[i] != 'NA' and row[i] != 'null' and row[i] != 'NONE' and row[i] != "N/A" and len(row[i]): | |
| 103 newRow[i - 1] = self.element_type(row[i]) | |
| 104 self.row_map[row[0]] = len(self.matrix) | |
| 105 self.matrix.append(newRow) | |
| 106 | |
| 107 self.col_map = {} | |
| 108 for col in pos_hash: | |
| 109 self.col_map[col] = pos_hash[col] | |
| 110 self.loaded = True | |
| 111 | |
| 112 def write(self, handle, missing='NA'): | |
| 113 write = csv.writer(handle, delimiter="\t", lineterminator='\n') | |
| 114 col_list = self.get_col_list() | |
| 115 | |
| 116 write.writerow([self.corner_name] + col_list) | |
| 117 for rowName in self.row_map: | |
| 118 out = [rowName] | |
| 119 row = self.get_row(rowName) | |
| 120 for col in col_list: | |
| 121 val = row[self.col_map[col]] | |
| 122 if val == self.null_type or val is None or (type(val)==float and math.isnan(val)): | |
| 123 val = missing | |
| 124 out.append(val) | |
| 125 write.writerow(out) | |
| 126 | |
| 127 def read_keyset(self, handle, key_predicate): | |
| 128 if key_predicate == "rowKeySrc": | |
| 129 reader = csv.reader( handle, delimiter="\t") | |
| 130 head = None | |
| 131 for row in reader: | |
| 132 if head is None: | |
| 133 head = row | |
| 134 else: | |
| 135 yield row[0] | |
| 136 | |
| 137 if key_predicate=="columnKeySrc": | |
| 138 reader = csv.reader( handle, delimiter="\t") | |
| 139 head = None | |
| 140 for row in reader: | |
| 141 for col in row[1:]: | |
| 142 yield col | |
| 143 break | |
| 144 | |
| 145 def get_col_namespace(self): | |
| 146 """ | |
| 147 Return the name of the column namespace | |
| 148 """ | |
| 149 return self.get("colNamespace", None) | |
| 150 | |
| 151 def get_row_namespace(self): | |
| 152 """ | |
| 153 Return the name of the row namespace | |
| 154 """ | |
| 155 return self.get("rowNamespace", None) | |
| 156 | |
| 157 def get_col_list(self): | |
| 158 """ | |
| 159 Returns names of columns | |
| 160 """ | |
| 161 if not self.loaded: | |
| 162 self.load( ) | |
| 163 out = self.col_map.keys() | |
| 164 out.sort( lambda x,y: self.col_map[x]-self.col_map[y]) | |
| 165 return out | |
| 166 | |
| 167 def get_row_list(self): | |
| 168 """ | |
| 169 Returns names of rows | |
| 170 """ | |
| 171 out = self.row_map.keys() | |
| 172 out.sort( lambda x,y: self.row_map[x]-self.row_map[y]) | |
| 173 return out | |
| 174 | |
| 175 def get_row_pos(self, row): | |
| 176 return self.row_map[row] | |
| 177 | |
| 178 def get_col_pos(self, col): | |
| 179 return self.col_map[col] | |
| 180 | |
| 181 def get_row_count(self): | |
| 182 return len(self.row_map) | |
| 183 | |
| 184 def get_col_count(self): | |
| 185 return len(self.col_map) | |
| 186 | |
| 187 def get_row_map(self): | |
| 188 return copy(self.row_map) | |
| 189 | |
| 190 def get_col_map(self): | |
| 191 return copy(self.col_map) | |
| 192 | |
| 193 def get_shape(self): | |
| 194 return len(self.row_map), len(self.col_map) | |
| 195 | |
| 196 def get_row(self, row_name): | |
| 197 if not self.loaded: | |
| 198 self.load( ) | |
| 199 if isinstance(self.matrix, list): | |
| 200 return self.matrix[ self.row_map[row_name] ] | |
| 201 else: | |
| 202 return self.matrix[ self.row_map[row_name] ].tolist()[0] | |
| 203 | |
| 204 def get_col(self, col_name): | |
| 205 if not self.loaded: | |
| 206 self.load( ) | |
| 207 if isinstance(self.matrix, list): | |
| 208 out = [] | |
| 209 for row_name in self.get_row_list(): | |
| 210 out.append( self.get_val(col_name, row_name) ) | |
| 211 return out | |
| 212 else: | |
| 213 return self.matrix[:,self.col_map[col_name]].reshape(-1).tolist()[0] | |
| 214 | |
| 215 def get_val(self, col_name, row_name): | |
| 216 """ | |
| 217 Get cell value based on row and column names | |
| 218 """ | |
| 219 if isinstance(self.matrix, list): | |
| 220 return self.matrix[self.row_map[row_name]][self.col_map[col_name]] | |
| 221 return self.matrix[self.row_map[row_name],self.col_map[col_name]] | |
| 222 | |
| 223 def set_val(self, col_name, row_name, value): | |
| 224 """ | |
| 225 Set cell value based on row and column names | |
| 226 """ | |
| 227 if isinstance(self.matrix, list): | |
| 228 self.matrix[self.row_map[row_name]][self.col_map[col_name]] = value | |
| 229 else: | |
| 230 self.matrix[self.row_map[row_name],self.col_map[col_name]] = value | |
| 231 | |
| 232 def write_gct(self, handle, missing=''): | |
| 233 write = csv.writer(handle, delimiter="\t", lineterminator='\n') | |
| 234 cols = self.get_col_list() | |
| 235 write.writerow(["#1.2"]) | |
| 236 write.writerow([len(self.get_row_list()), len(self.get_col_list())]) | |
| 237 write.writerow(["NAME", "Description"] + cols) | |
| 238 for row in self.get_row_list(): | |
| 239 out = [row, row] | |
| 240 for col in cols: | |
| 241 val = self.get_val(row_name=row, col_name=col) | |
| 242 if val is None: | |
| 243 val = missing | |
| 244 out.append(val) | |
| 245 write.writerow(out) | |
| 246 | |
| 247 |
