Mercurial > repos > melissacline > ucsc_cancer_utilities
comparison seg2matrix/CGData/BaseTable.py @ 31:ab20c0d04f4a
add seg2matrix tool
author | jingchunzhu |
---|---|
date | Fri, 24 Jul 2015 13:10:11 -0700 |
parents | |
children | b6f5d2d1b047 |
comparison
equal
deleted
inserted
replaced
30:7a7a52e9b019 | 31:ab20c0d04f4a |
---|---|
1 | |
2 from CGData import CGObjectBase | |
3 | |
4 import csv | |
5 import types | |
6 | |
7 class TableRow(object): | |
8 def __init__(self): | |
9 pass | |
10 | |
11 def __str__(self): | |
12 return "<" + ",".join( "%s=%s" % (col, getattr(self,col)) for col in self.__format__['columnOrder']) + ">" | |
13 | |
14 class InvalidFormat(Exception): | |
15 def __init__(self, txt): | |
16 Exception.__init__(self, txt) | |
17 | |
18 class BaseTable(CGObjectBase): | |
19 def __init__(self): | |
20 super(BaseTable,self).__init__() | |
21 self.__row_class__ = type( "TableRow_" + self['cgformat']['name'], (TableRow,), dict(__format__=self.__format__) ) | |
22 self.free() | |
23 | |
24 def free(self): | |
25 self.firstKey = None | |
26 self.secondKey = None | |
27 self.groupKey = None | |
28 self.loaded = False | |
29 if 'primaryKey' in self['cgformat']: | |
30 self.firstKey = self['cgformat']['primaryKey'] | |
31 setattr(self, self['cgformat']['primaryKey'] + "_map", {} ) | |
32 self.groupKey = False | |
33 | |
34 #setup the map for groupKeys | |
35 if 'groupKey' in self['cgformat']: | |
36 self.firstKey = self['cgformat']['groupKey'] | |
37 setattr(self, self['cgformat']['groupKey'] + "_map", {} ) | |
38 self.groupKey = True | |
39 | |
40 if 'secondaryKey' in self['cgformat']: | |
41 self.secondKey = self['cgformat']['secondaryKey'] | |
42 | |
43 def read(self, handle): | |
44 cols = self['cgformat']['columnOrder'] | |
45 colType = {} | |
46 for col in cols: | |
47 if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'type' in self['cgformat']['columnDef'][col]: | |
48 if self['cgformat']['columnDef'][col]['type'] == 'float': | |
49 colType[col] = float | |
50 elif self['cgformat']['columnDef'][col]['type'] == 'int': | |
51 colType[col] = int | |
52 else: | |
53 colType[col] = str | |
54 else: | |
55 colType[col] = str | |
56 | |
57 read = csv.reader(handle, delimiter="\t") | |
58 | |
59 storeMap = getattr(self, self.firstKey + "_map") | |
60 comment = None | |
61 if 'comment' in self['cgformat']: | |
62 comment = self['cgformat']['comment'] | |
63 linenum = 0 | |
64 for row in read: | |
65 linenum += 1 | |
66 r = self.__row_class__() | |
67 if comment is None or not row[0].startswith(comment): | |
68 for i, col in enumerate(cols): | |
69 isOptional = False | |
70 if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'optional' in self['cgformat']['columnDef'][col]: | |
71 isOptional = self['cgformat']['columnDef'][col]['optional'] | |
72 if len(row) > i: | |
73 try: | |
74 setattr(r, col, colType[col](row[i])) | |
75 except ValueError: | |
76 raise ValueError( "col invalid type %s on line %d" % (row[i], linenum)) | |
77 else: | |
78 if isOptional: | |
79 setattr(r, col, None) | |
80 else: | |
81 print row | |
82 raise InvalidFormat("missing colum " + col) | |
83 | |
84 if not self.groupKey: | |
85 if self.secondKey is not None: | |
86 key1 = getattr(r, self.firstKey ) | |
87 key2 = getattr(r, self.secondKey ) | |
88 if key1 not in storeMap: | |
89 storeMap[key1] = {} | |
90 storeMap[key1][key2] = r | |
91 else: | |
92 storeMap[ getattr(r, self.firstKey ) ] = r | |
93 else: | |
94 key1 = getattr(r, self.firstKey ) | |
95 if self.secondKey is not None: | |
96 key2 = getattr(r, self.secondKey ) | |
97 if key1 not in storeMap: | |
98 storeMap[key1] = {} | |
99 if key2 not in storeMap[key1]: | |
100 storeMap[key1][key2] = [] | |
101 storeMap[key1][key2].append(r) | |
102 else: | |
103 if key1 not in storeMap: | |
104 storeMap[key1] = [] | |
105 storeMap[key1].append(r) | |
106 self.loaded = True | |
107 | |
108 """ | |
109 def __getattr__(self, item): | |
110 if not self.loaded: | |
111 self.load() | |
112 | |
113 if item == "get_" + self.firstKey + "_list": | |
114 return self.__get_firstmap__().keys | |
115 if item == "get_by_" + self.firstKey: | |
116 return self.__get_firstmap__().__getitem__ | |
117 if item == "get_" + self.firstKey + "_values": | |
118 return self.__get_firstmap__().values | |
119 if item == "get_" + self.firstKey + "_map": | |
120 return self.__get_firstmap__ | |
121 if item == "has_" + self.firstKey: | |
122 return self.__get_firstmap__().__contains__ | |
123 raise AttributeError(item) | |
124 """ | |
125 | |
126 def get_key_list(self): | |
127 """ | |
128 List keys | |
129 """ | |
130 if not self.loaded: | |
131 self.load() | |
132 return self.__get_firstmap__().keys() | |
133 | |
134 def get_by(self, key): | |
135 """ | |
136 get by key | |
137 """ | |
138 if not self.loaded: | |
139 self.load() | |
140 return self.__get_firstmap__().__getitem__(key) | |
141 | |
142 def get_values(self): | |
143 """ | |
144 get values | |
145 """ | |
146 if not self.loaded: | |
147 self.load() | |
148 return self.__get_firstmap__().values() | |
149 | |
150 def get_map(self): | |
151 """ | |
152 get key map | |
153 """ | |
154 if not self.loaded: | |
155 self.load() | |
156 return self.__get_firstmap__() | |
157 | |
158 def has_key(self, key): | |
159 """ | |
160 Does the table have a key | |
161 """ | |
162 if not self.loaded: | |
163 self.load() | |
164 return self.__get_firstmap__().__contains__(key) | |
165 def __get_firstmap__(self): | |
166 return getattr(self, self.firstKey + "_map") | |
167 | |
168 def init_blank(self): | |
169 self.free() | |
170 self['cgdata'] = { 'type' : self['cgformat']['name'] } | |
171 self.loaded = True | |
172 | |
173 def insert(self, name, vals): | |
174 storeMap = getattr(self, self.firstKey + "_map") | |
175 cols = self['cgformat']['columnOrder'] | |
176 r = self.__row_class__() | |
177 for col in cols: | |
178 isOptional = False | |
179 if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'optional' in self['cgformat']['columnDef'][col]: | |
180 isOptional = self['cgformat']['columnDef'][col]['optional'] | |
181 if col in vals: | |
182 setattr(r, col, vals[col]) | |
183 else: | |
184 if isOptional: | |
185 setattr(r, col, None) | |
186 else: | |
187 raise InvalidFormat("missing colum " + col) | |
188 | |
189 if not self.groupKey: | |
190 if self.secondKey is not None: | |
191 key1 = getattr(r, self.firstKey ) | |
192 key2 = getattr(r, self.secondKey ) | |
193 if key1 not in storeMap: | |
194 storeMap[key1] = {} | |
195 storeMap[key1][key2] = r | |
196 else: | |
197 storeMap[ getattr(r, self.firstKey ) ] = r | |
198 else: | |
199 key1 = getattr(r, self.firstKey ) | |
200 if self.secondKey is not None: | |
201 key2 = getattr(r, self.secondKey ) | |
202 if key1 not in storeMap: | |
203 storeMap[key1] = {} | |
204 if key2 not in storeMap[key1]: | |
205 storeMap[key1][key2] = [] | |
206 storeMap[key1][key2].append(r) | |
207 else: | |
208 if key1 not in storeMap: | |
209 storeMap[key1] = [] | |
210 storeMap[key1].append(r) | |
211 | |
212 def write(self, handle): | |
213 writer = csv.writer(handle, delimiter="\t", lineterminator="\n") | |
214 for row in self.row_iter(): | |
215 orow = [] | |
216 for col in self['cgformat']['columnOrder']: | |
217 orow.append( getattr(row, col) ) | |
218 writer.writerow(orow) | |
219 | |
220 | |
221 def row_iter(self): | |
222 if not self.groupKey: | |
223 keyMap = getattr(self, self.firstKey + "_map") | |
224 for rowKey in keyMap: | |
225 yield keyMap[rowKey] | |
226 else: | |
227 keyMap = getattr(self, self.firstKey + "_map") | |
228 for rowKey in keyMap: | |
229 for elem in keyMap[rowKey]: | |
230 yield elem | |
231 | |
232 def __get_firstmap__(self): | |
233 return getattr(self, self.firstKey + "_map") |