Mercurial > repos > melissacline > ucsc_cancer_utilities
comparison seg2matrix/CGData/BaseTable.py @ 31:ab20c0d04f4a
add seg2matrix tool
| author | jingchunzhu |
|---|---|
| date | Fri, 24 Jul 2015 13:10:11 -0700 |
| parents | |
| children | b6f5d2d1b047 |
comparison
equal
deleted
inserted
replaced
| 30:7a7a52e9b019 | 31:ab20c0d04f4a |
|---|---|
| 1 | |
| 2 from CGData import CGObjectBase | |
| 3 | |
| 4 import csv | |
| 5 import types | |
| 6 | |
| 7 class TableRow(object): | |
| 8 def __init__(self): | |
| 9 pass | |
| 10 | |
| 11 def __str__(self): | |
| 12 return "<" + ",".join( "%s=%s" % (col, getattr(self,col)) for col in self.__format__['columnOrder']) + ">" | |
| 13 | |
| 14 class InvalidFormat(Exception): | |
| 15 def __init__(self, txt): | |
| 16 Exception.__init__(self, txt) | |
| 17 | |
| 18 class BaseTable(CGObjectBase): | |
| 19 def __init__(self): | |
| 20 super(BaseTable,self).__init__() | |
| 21 self.__row_class__ = type( "TableRow_" + self['cgformat']['name'], (TableRow,), dict(__format__=self.__format__) ) | |
| 22 self.free() | |
| 23 | |
| 24 def free(self): | |
| 25 self.firstKey = None | |
| 26 self.secondKey = None | |
| 27 self.groupKey = None | |
| 28 self.loaded = False | |
| 29 if 'primaryKey' in self['cgformat']: | |
| 30 self.firstKey = self['cgformat']['primaryKey'] | |
| 31 setattr(self, self['cgformat']['primaryKey'] + "_map", {} ) | |
| 32 self.groupKey = False | |
| 33 | |
| 34 #setup the map for groupKeys | |
| 35 if 'groupKey' in self['cgformat']: | |
| 36 self.firstKey = self['cgformat']['groupKey'] | |
| 37 setattr(self, self['cgformat']['groupKey'] + "_map", {} ) | |
| 38 self.groupKey = True | |
| 39 | |
| 40 if 'secondaryKey' in self['cgformat']: | |
| 41 self.secondKey = self['cgformat']['secondaryKey'] | |
| 42 | |
| 43 def read(self, handle): | |
| 44 cols = self['cgformat']['columnOrder'] | |
| 45 colType = {} | |
| 46 for col in cols: | |
| 47 if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'type' in self['cgformat']['columnDef'][col]: | |
| 48 if self['cgformat']['columnDef'][col]['type'] == 'float': | |
| 49 colType[col] = float | |
| 50 elif self['cgformat']['columnDef'][col]['type'] == 'int': | |
| 51 colType[col] = int | |
| 52 else: | |
| 53 colType[col] = str | |
| 54 else: | |
| 55 colType[col] = str | |
| 56 | |
| 57 read = csv.reader(handle, delimiter="\t") | |
| 58 | |
| 59 storeMap = getattr(self, self.firstKey + "_map") | |
| 60 comment = None | |
| 61 if 'comment' in self['cgformat']: | |
| 62 comment = self['cgformat']['comment'] | |
| 63 linenum = 0 | |
| 64 for row in read: | |
| 65 linenum += 1 | |
| 66 r = self.__row_class__() | |
| 67 if comment is None or not row[0].startswith(comment): | |
| 68 for i, col in enumerate(cols): | |
| 69 isOptional = False | |
| 70 if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'optional' in self['cgformat']['columnDef'][col]: | |
| 71 isOptional = self['cgformat']['columnDef'][col]['optional'] | |
| 72 if len(row) > i: | |
| 73 try: | |
| 74 setattr(r, col, colType[col](row[i])) | |
| 75 except ValueError: | |
| 76 raise ValueError( "col invalid type %s on line %d" % (row[i], linenum)) | |
| 77 else: | |
| 78 if isOptional: | |
| 79 setattr(r, col, None) | |
| 80 else: | |
| 81 print row | |
| 82 raise InvalidFormat("missing colum " + col) | |
| 83 | |
| 84 if not self.groupKey: | |
| 85 if self.secondKey is not None: | |
| 86 key1 = getattr(r, self.firstKey ) | |
| 87 key2 = getattr(r, self.secondKey ) | |
| 88 if key1 not in storeMap: | |
| 89 storeMap[key1] = {} | |
| 90 storeMap[key1][key2] = r | |
| 91 else: | |
| 92 storeMap[ getattr(r, self.firstKey ) ] = r | |
| 93 else: | |
| 94 key1 = getattr(r, self.firstKey ) | |
| 95 if self.secondKey is not None: | |
| 96 key2 = getattr(r, self.secondKey ) | |
| 97 if key1 not in storeMap: | |
| 98 storeMap[key1] = {} | |
| 99 if key2 not in storeMap[key1]: | |
| 100 storeMap[key1][key2] = [] | |
| 101 storeMap[key1][key2].append(r) | |
| 102 else: | |
| 103 if key1 not in storeMap: | |
| 104 storeMap[key1] = [] | |
| 105 storeMap[key1].append(r) | |
| 106 self.loaded = True | |
| 107 | |
| 108 """ | |
| 109 def __getattr__(self, item): | |
| 110 if not self.loaded: | |
| 111 self.load() | |
| 112 | |
| 113 if item == "get_" + self.firstKey + "_list": | |
| 114 return self.__get_firstmap__().keys | |
| 115 if item == "get_by_" + self.firstKey: | |
| 116 return self.__get_firstmap__().__getitem__ | |
| 117 if item == "get_" + self.firstKey + "_values": | |
| 118 return self.__get_firstmap__().values | |
| 119 if item == "get_" + self.firstKey + "_map": | |
| 120 return self.__get_firstmap__ | |
| 121 if item == "has_" + self.firstKey: | |
| 122 return self.__get_firstmap__().__contains__ | |
| 123 raise AttributeError(item) | |
| 124 """ | |
| 125 | |
| 126 def get_key_list(self): | |
| 127 """ | |
| 128 List keys | |
| 129 """ | |
| 130 if not self.loaded: | |
| 131 self.load() | |
| 132 return self.__get_firstmap__().keys() | |
| 133 | |
| 134 def get_by(self, key): | |
| 135 """ | |
| 136 get by key | |
| 137 """ | |
| 138 if not self.loaded: | |
| 139 self.load() | |
| 140 return self.__get_firstmap__().__getitem__(key) | |
| 141 | |
| 142 def get_values(self): | |
| 143 """ | |
| 144 get values | |
| 145 """ | |
| 146 if not self.loaded: | |
| 147 self.load() | |
| 148 return self.__get_firstmap__().values() | |
| 149 | |
| 150 def get_map(self): | |
| 151 """ | |
| 152 get key map | |
| 153 """ | |
| 154 if not self.loaded: | |
| 155 self.load() | |
| 156 return self.__get_firstmap__() | |
| 157 | |
| 158 def has_key(self, key): | |
| 159 """ | |
| 160 Does the table have a key | |
| 161 """ | |
| 162 if not self.loaded: | |
| 163 self.load() | |
| 164 return self.__get_firstmap__().__contains__(key) | |
| 165 def __get_firstmap__(self): | |
| 166 return getattr(self, self.firstKey + "_map") | |
| 167 | |
| 168 def init_blank(self): | |
| 169 self.free() | |
| 170 self['cgdata'] = { 'type' : self['cgformat']['name'] } | |
| 171 self.loaded = True | |
| 172 | |
| 173 def insert(self, name, vals): | |
| 174 storeMap = getattr(self, self.firstKey + "_map") | |
| 175 cols = self['cgformat']['columnOrder'] | |
| 176 r = self.__row_class__() | |
| 177 for col in cols: | |
| 178 isOptional = False | |
| 179 if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'optional' in self['cgformat']['columnDef'][col]: | |
| 180 isOptional = self['cgformat']['columnDef'][col]['optional'] | |
| 181 if col in vals: | |
| 182 setattr(r, col, vals[col]) | |
| 183 else: | |
| 184 if isOptional: | |
| 185 setattr(r, col, None) | |
| 186 else: | |
| 187 raise InvalidFormat("missing colum " + col) | |
| 188 | |
| 189 if not self.groupKey: | |
| 190 if self.secondKey is not None: | |
| 191 key1 = getattr(r, self.firstKey ) | |
| 192 key2 = getattr(r, self.secondKey ) | |
| 193 if key1 not in storeMap: | |
| 194 storeMap[key1] = {} | |
| 195 storeMap[key1][key2] = r | |
| 196 else: | |
| 197 storeMap[ getattr(r, self.firstKey ) ] = r | |
| 198 else: | |
| 199 key1 = getattr(r, self.firstKey ) | |
| 200 if self.secondKey is not None: | |
| 201 key2 = getattr(r, self.secondKey ) | |
| 202 if key1 not in storeMap: | |
| 203 storeMap[key1] = {} | |
| 204 if key2 not in storeMap[key1]: | |
| 205 storeMap[key1][key2] = [] | |
| 206 storeMap[key1][key2].append(r) | |
| 207 else: | |
| 208 if key1 not in storeMap: | |
| 209 storeMap[key1] = [] | |
| 210 storeMap[key1].append(r) | |
| 211 | |
| 212 def write(self, handle): | |
| 213 writer = csv.writer(handle, delimiter="\t", lineterminator="\n") | |
| 214 for row in self.row_iter(): | |
| 215 orow = [] | |
| 216 for col in self['cgformat']['columnOrder']: | |
| 217 orow.append( getattr(row, col) ) | |
| 218 writer.writerow(orow) | |
| 219 | |
| 220 | |
| 221 def row_iter(self): | |
| 222 if not self.groupKey: | |
| 223 keyMap = getattr(self, self.firstKey + "_map") | |
| 224 for rowKey in keyMap: | |
| 225 yield keyMap[rowKey] | |
| 226 else: | |
| 227 keyMap = getattr(self, self.firstKey + "_map") | |
| 228 for rowKey in keyMap: | |
| 229 for elem in keyMap[rowKey]: | |
| 230 yield elem | |
| 231 | |
| 232 def __get_firstmap__(self): | |
| 233 return getattr(self, self.firstKey + "_map") |
