Mercurial > repos > melissacline > ucsc_cancer_utilities
diff seg2matrix/CGData/BaseTable.py @ 31:ab20c0d04f4a
add seg2matrix tool
author | jingchunzhu |
---|---|
date | Fri, 24 Jul 2015 13:10:11 -0700 |
parents | |
children | b6f5d2d1b047 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seg2matrix/CGData/BaseTable.py Fri Jul 24 13:10:11 2015 -0700 @@ -0,0 +1,233 @@ + +from CGData import CGObjectBase + +import csv +import types + +class TableRow(object): + def __init__(self): + pass + + def __str__(self): + return "<" + ",".join( "%s=%s" % (col, getattr(self,col)) for col in self.__format__['columnOrder']) + ">" + +class InvalidFormat(Exception): + def __init__(self, txt): + Exception.__init__(self, txt) + +class BaseTable(CGObjectBase): + def __init__(self): + super(BaseTable,self).__init__() + self.__row_class__ = type( "TableRow_" + self['cgformat']['name'], (TableRow,), dict(__format__=self.__format__) ) + self.free() + + def free(self): + self.firstKey = None + self.secondKey = None + self.groupKey = None + self.loaded = False + if 'primaryKey' in self['cgformat']: + self.firstKey = self['cgformat']['primaryKey'] + setattr(self, self['cgformat']['primaryKey'] + "_map", {} ) + self.groupKey = False + + #setup the map for groupKeys + if 'groupKey' in self['cgformat']: + self.firstKey = self['cgformat']['groupKey'] + setattr(self, self['cgformat']['groupKey'] + "_map", {} ) + self.groupKey = True + + if 'secondaryKey' in self['cgformat']: + self.secondKey = self['cgformat']['secondaryKey'] + + def read(self, handle): + cols = self['cgformat']['columnOrder'] + colType = {} + for col in cols: + if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'type' in self['cgformat']['columnDef'][col]: + if self['cgformat']['columnDef'][col]['type'] == 'float': + colType[col] = float + elif self['cgformat']['columnDef'][col]['type'] == 'int': + colType[col] = int + else: + colType[col] = str + else: + colType[col] = str + + read = csv.reader(handle, delimiter="\t") + + storeMap = getattr(self, self.firstKey + "_map") + comment = None + if 'comment' in self['cgformat']: + comment = self['cgformat']['comment'] + linenum = 0 + for row in read: + linenum += 1 + r = self.__row_class__() + if comment is None or not row[0].startswith(comment): + for i, col in enumerate(cols): + isOptional = False + if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'optional' in self['cgformat']['columnDef'][col]: + isOptional = self['cgformat']['columnDef'][col]['optional'] + if len(row) > i: + try: + setattr(r, col, colType[col](row[i])) + except ValueError: + raise ValueError( "col invalid type %s on line %d" % (row[i], linenum)) + else: + if isOptional: + setattr(r, col, None) + else: + print row + raise InvalidFormat("missing colum " + col) + + if not self.groupKey: + if self.secondKey is not None: + key1 = getattr(r, self.firstKey ) + key2 = getattr(r, self.secondKey ) + if key1 not in storeMap: + storeMap[key1] = {} + storeMap[key1][key2] = r + else: + storeMap[ getattr(r, self.firstKey ) ] = r + else: + key1 = getattr(r, self.firstKey ) + if self.secondKey is not None: + key2 = getattr(r, self.secondKey ) + if key1 not in storeMap: + storeMap[key1] = {} + if key2 not in storeMap[key1]: + storeMap[key1][key2] = [] + storeMap[key1][key2].append(r) + else: + if key1 not in storeMap: + storeMap[key1] = [] + storeMap[key1].append(r) + self.loaded = True + + """ + def __getattr__(self, item): + if not self.loaded: + self.load() + + if item == "get_" + self.firstKey + "_list": + return self.__get_firstmap__().keys + if item == "get_by_" + self.firstKey: + return self.__get_firstmap__().__getitem__ + if item == "get_" + self.firstKey + "_values": + return self.__get_firstmap__().values + if item == "get_" + self.firstKey + "_map": + return self.__get_firstmap__ + if item == "has_" + self.firstKey: + return self.__get_firstmap__().__contains__ + raise AttributeError(item) + """ + + def get_key_list(self): + """ + List keys + """ + if not self.loaded: + self.load() + return self.__get_firstmap__().keys() + + def get_by(self, key): + """ + get by key + """ + if not self.loaded: + self.load() + return self.__get_firstmap__().__getitem__(key) + + def get_values(self): + """ + get values + """ + if not self.loaded: + self.load() + return self.__get_firstmap__().values() + + def get_map(self): + """ + get key map + """ + if not self.loaded: + self.load() + return self.__get_firstmap__() + + def has_key(self, key): + """ + Does the table have a key + """ + if not self.loaded: + self.load() + return self.__get_firstmap__().__contains__(key) + def __get_firstmap__(self): + return getattr(self, self.firstKey + "_map") + + def init_blank(self): + self.free() + self['cgdata'] = { 'type' : self['cgformat']['name'] } + self.loaded = True + + def insert(self, name, vals): + storeMap = getattr(self, self.firstKey + "_map") + cols = self['cgformat']['columnOrder'] + r = self.__row_class__() + for col in cols: + isOptional = False + if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'optional' in self['cgformat']['columnDef'][col]: + isOptional = self['cgformat']['columnDef'][col]['optional'] + if col in vals: + setattr(r, col, vals[col]) + else: + if isOptional: + setattr(r, col, None) + else: + raise InvalidFormat("missing colum " + col) + + if not self.groupKey: + if self.secondKey is not None: + key1 = getattr(r, self.firstKey ) + key2 = getattr(r, self.secondKey ) + if key1 not in storeMap: + storeMap[key1] = {} + storeMap[key1][key2] = r + else: + storeMap[ getattr(r, self.firstKey ) ] = r + else: + key1 = getattr(r, self.firstKey ) + if self.secondKey is not None: + key2 = getattr(r, self.secondKey ) + if key1 not in storeMap: + storeMap[key1] = {} + if key2 not in storeMap[key1]: + storeMap[key1][key2] = [] + storeMap[key1][key2].append(r) + else: + if key1 not in storeMap: + storeMap[key1] = [] + storeMap[key1].append(r) + + def write(self, handle): + writer = csv.writer(handle, delimiter="\t", lineterminator="\n") + for row in self.row_iter(): + orow = [] + for col in self['cgformat']['columnOrder']: + orow.append( getattr(row, col) ) + writer.writerow(orow) + + + def row_iter(self): + if not self.groupKey: + keyMap = getattr(self, self.firstKey + "_map") + for rowKey in keyMap: + yield keyMap[rowKey] + else: + keyMap = getattr(self, self.firstKey + "_map") + for rowKey in keyMap: + for elem in keyMap[rowKey]: + yield elem + + def __get_firstmap__(self): + return getattr(self, self.firstKey + "_map")