annotate seg2matrix/CGData/__init__.py @ 47:23d98125d20c

parse snpEff output
author jingchunzhu
date Thu, 13 Aug 2015 23:26:33 -0700
parents ab20c0d04f4a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
1
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
2 import os
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
3 import re
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
4 import json
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
5 import functools
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
6 from zipfile import ZipFile
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
7 import sys
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
8 import hashlib
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
9 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
10 CGData object style:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
11
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
12 Every file type documented in the CGData specification has an equivilent object
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
13 to parse and manipulate the contents of that file type. For <dataType> there
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
14 should be a CGData.<dataType> object with a <CGData> class. These classes
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
15 should extend the baseObject class. For loading they implement the 'read'
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
16 function which will parse the contents of a file from a passed file handle.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
17 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
18
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
19
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
20 OBJECT_MAP = {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
21 'genomicSegment': ('CGData.GenomicSegment', 'GenomicSegment'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
22 'genomicMatrix': ('CGData.GenomicMatrix', 'GenomicMatrix'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
23 'probeMap': ('CGData.ProbeMap', 'ProbeMap'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
24 'probeLoc': ('CGData.ProbeLoc', 'ProbeLoc'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
25 'aliasMap' : ('CGData.AliasMap', 'AliasMap'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
26 'idDAG': ('CGData.IDDag', 'IDDag'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
27 'clinicalMatrix': ('CGData.ClinicalMatrix', 'ClinicalMatrix'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
28 'dataSubType': ('CGData.DataSubType', 'DataSubType'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
29 'assembly': ('CGData.Assembly', 'Assembly'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
30 'featureDescription': ('CGData.FeatureDescription', 'FeatureDescription'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
31 'refGene' : ('CGData.RefGene', 'RefGene'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
32 'idList' : ('CGData.IDList', 'IDList')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
33 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
34
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
35 class FormatException(Exception):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
36
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
37 def __init__(self, str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
38 Exception.__init__(self, str)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
39
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
40
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
41 def has_type(type_str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
42 return type_str in OBJECT_MAP
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
43
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
44 def get_type(type_str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
45 mod_name, cls_name = OBJECT_MAP[type_str]
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
46 module = __import__(mod_name, globals(), locals(), [ cls_name ])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
47 cls = getattr(module, cls_name)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
48 return cls
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
49
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
50 class UnimplementedException(Exception):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
51 def __init__(self, str="Method not implemented"):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
52 Exception.__init__(self, str)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
53
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
54 class CGObjectBase(dict):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
55 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
56 This is the base object for CGData loadable objects.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
57 The methods covered in the base case cover usage meta-information
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
58 loading/unloading and manipulation as well as zip (cgz) file access.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
59 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
60 __format__ = None
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
61 def __init__(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
62 self.path = None
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
63 self.zip = None
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
64 self.light_mode = False
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
65 self.loaded = False
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
66 if 'cgformat' not in self and self.__format__ is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
67 self['cgformat'] = self.__format__
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
68 super(CGObjectBase,self).__init__()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
69
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
70 def load(self, path=None, **kw):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
71 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
72 Load a data object in from path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
73 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
74 if path is None and self.path is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
75 path = self.path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
76 if path is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
77 raise OSError( "Path not defined" )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
78
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
79 if self.zip is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
80 if os.path.exists(path):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
81 dhandle = open(path,'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
82 self.read(dhandle, **kw)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
83 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
84 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
85 z = ZipFile(self.zip)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
86 dhandle = z.open(self.path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
87 self.read(dhandle, **kw)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
88 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
89 z.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
90
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
91 self.path = path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
92 if (os.path.exists(path + ".json")):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
93 mhandle = open(path + ".json",'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
94 meta = json.loads(mhandle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
95 meta = dict((k, v) for k, v in meta.iteritems() if v != None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
96 self.update(meta)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
97 mhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
98 self.loaded = True
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
99
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
100 def unload(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
101 """Call to start freeing up memory"""
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
102 self.free()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
103 self.loaded = False
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
104
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
105 def store(self, path=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
106 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
107 Store an object onto the path provided.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
108 Will write a path and a path.json file.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
109 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
110 if path is None and self.path is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
111 path = self.path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
112 if path is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
113 raise OSError( "Path not defined" )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
114 meta = {}
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
115 meta.update(self)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
116 if 'cgformat' in meta:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
117 del meta['cgformat']
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
118 mHandle = open(path + ".json", "w")
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
119 mHandle.write(json.dumps(meta))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
120 mHandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
121 if not self.light_mode:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
122 self.path = path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
123 dhandle = open(path, "w")
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
124 self.write(dhandle)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
125 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
126
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
127 def load_keyset(self, key_predicate):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
128 if self.path is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
129 if self.zip is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
130 if os.path.exists(self.path):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
131 dhandle = open(self.path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
132 out = self.read_keyset(dhandle, key_predicate)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
133 for a in out:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
134 yield a
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
135 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
136 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
137 z = ZipFile(self.zip)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
138 dhandle = z.open(self.path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
139 out = self.read_keyset(dhandle, key_predicate)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
140 for a in out:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
141 yield a
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
142 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
143 z.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
144
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
145 def read_keyset(self, handle, key_predicate=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
146 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
147
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
148 def read(self, handle):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
149 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
150 The read method is implemented by the subclass that
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
151 inherits from CGObjectBase. It is passed a handle
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
152 to a file (which may be on file, in a compressed object, or
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
153 from a network source). The implementing class then uses his handle
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
154 to populate it's data structures.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
155 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
156 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
157
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
158 def write(self, handle):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
159 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
160 The write method is implemented by the subclass that
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
161 inherits from CGObjectBase. It is passed a handle to an
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
162 output file, which it can use 'write' method calls to emit
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
163 it's data.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
164 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
165 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
166
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
167 def get_name(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
168 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
169 Get object name
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
170 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
171 return self.get( 'cgdata', {} ).get( 'name', None )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
172
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
173 def get_type(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
174 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
175 Get object type
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
176 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
177 return self.get('cgdata', {}).get('type', None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
178
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
179 def get_link_map(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
180 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
181 Get a dict that represents the declared file relationships from the meta-info
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
182 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
183 out = {}
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
184 if "cgformat" in self:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
185 if "links" in self["cgformat"]:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
186 for field in self['cgformat']['links']:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
187 if field in self['cgdata']:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
188 if isinstance(self['cgdata'][field], str) or isinstance(self['cgdata'][field], unicode) :
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
189 out[field] = { 'type' : field, 'name' : self['cgdata'][field] }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
190 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
191 out[field] = { 'type' : self['cgdata'][field]['type'], 'name' : self['cgdata'][field]['name'] }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
192
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
193 for e in ['columnKeySrc', 'rowKeySrc' ]:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
194 if e in self['cgdata']:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
195 if e not in out:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
196 out[e] = {}
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
197 link = self['cgdata'][e]
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
198 out[e] = { 'type' : link['type'], 'name' : link['name'] }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
199 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
200
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
201 def add_history(self, desc):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
202 if not 'history' in self:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
203 self[ 'history' ] = []
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
204 self[ 'history' ].append( desc )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
205
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
206
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
207 class CGDataMatrixObject(CGObjectBase):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
208
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
209 def __init__(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
210 CGObjectBase.__init__(self)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
211
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
212
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
213 def get_col_namespace(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
214 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
215 Return the name of the column namespace
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
216 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
217 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
218
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
219 def get_row_namespace(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
220 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
221 Return the name of the row namespace
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
222 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
223 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
224
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
225 def get_col_list(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
226 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
227 Returns names of columns
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
228 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
229 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
230
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
231 def get_row_list(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
232 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
233 Returns names of rows
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
234 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
235 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
236
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
237 def get_row_map(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
238 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
239 Returns map of row name indexes
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
240 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
241 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
242
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
243 def get_col_map(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
244 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
245 Returns map of row name indexes
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
246 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
247 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
248
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
249
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
250 def get_row_pos(self, row):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
251 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
252
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
253 def get_col_pos(self, col):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
254 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
255
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
256 def get_row_count(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
257 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
258
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
259 def get_col_count(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
260 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
261
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
262 def get_row(self, row_name):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
263 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
264
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
265 def get_col(self, col_name):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
266 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
267
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
268
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
269 def cg_new(type_str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
270 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
271 cg_new takes a type string and creates a new object from the
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
272 class named, it uses an internally defined map to find all
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
273 official CGData data types. So if a 'genomicMatrix' is requested
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
274 a CGData.GenomicMatrix.GenomicMatrix is initialized.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
275
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
276 type_str -- A string name of a CGData type, ie 'genomicMatrix'
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
277 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
278 mod_name, cls_name = OBJECT_MAP[type_str]
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
279 module = __import__(mod_name, globals(), locals(), [ cls_name ])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
280 cls = getattr(module, cls_name)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
281 out = cls()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
282 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
283
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
284 def load(path, zip=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
285 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
286 load is a the automatic CGData loading function. There has to
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
287 be a '.json' file for this function to work. It inspects the
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
288 '.json' file and uses the 'type' field to determine the
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
289 appropriate object loader to use. The object is created
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
290 (using the cg_new function) and the 'read' method is passed
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
291 a handle to the data file. If the 'zip' parameter is not None,
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
292 then it is used as the path to a zipfile, and the path parameter
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
293 is used as an path inside the zip file to the object data
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
294
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
295 path -- path to file (in file system space if zip is None, otherwise
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
296 it is the location in the zip file)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
297 zip -- path to zip file (None by default)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
298 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
299 if not path.endswith(".json"):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
300 path = path + ".json"
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
301
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
302 data_path = re.sub(r'.json$', '', path)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
303
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
304 try:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
305 handle = open(path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
306 meta = json.loads(handle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
307 except IOError:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
308 raise FormatException("Meta-info (%s) file not found" % (path))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
309
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
310 # Throw away empty values
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
311 meta = dict((k, v) for k, v in meta.iteritems() if v != None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
312
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
313 if meta['cgdata']['type'] in OBJECT_MAP:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
314 out = cg_new(meta['cgdata']['type'])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
315 out.update( meta )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
316 out.path = data_path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
317 out.load(data_path)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
318 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
319 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
320 raise FormatException("%s class not found" % (meta['cgdata']['type']))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
321
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
322
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
323 def light_load(path, zip=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
324 if not path.endswith(".json"):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
325 path = path + ".json"
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
326
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
327 data_path = re.sub(r'.json$', '', path)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
328
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
329 if zip is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
330 try:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
331 handle = open(path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
332 meta = json.loads(handle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
333 except IOError:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
334 raise FormatException("Meta-info (%s) file not found" % (path))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
335 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
336 z = ZipFile(zip)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
337 handle = z.open(path,'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
338 meta = json.loads(handle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
339 handle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
340 z.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
341
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
342 # Throw away empty values
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
343 meta = dict((k, v) for k, v in meta.iteritems() if v != None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
344
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
345 if meta['cgdata']['type'] in OBJECT_MAP:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
346 out = cg_new(meta['cgdata']['type'])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
347 out.update( meta )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
348 out.path = data_path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
349 out.zip = zip
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
350 out.light_mode = True
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
351 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
352 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
353 raise FormatException("%s class not found" % (meta['cgdata']['type']))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
354
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
355 global LOG_LEVEL
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
356 LOG_LEVEL = 2
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
357
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
358 def info(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
359 if LOG_LEVEL < 2:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
360 sys.stderr.write("LOG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
361 #errorLogHandle.write("LOG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
362
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
363 def debug(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
364 if LOG_LEVEL < 1:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
365 sys.stderr.write("DEBUG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
366 #errorLogHandle.write("LOG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
367
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
368 def warn(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
369 if LOG_LEVEL < 3:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
370 sys.stderr.write("WARNING: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
371 #errorLogHandle.write("WARNING: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
372
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
373
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
374 def error(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
375 sys.stderr.write("ERROR: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
376 #errorLogHandle.write("ERROR: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
377