annotate seg2matrix/CGData/__init__.py @ 37:e81019e3ac99

Updated synapseGetDataset to look at the filename rather than the (no longer existant) content type field to determine if the data is in zip format
author melissacline
date Mon, 27 Jul 2015 16:29:24 -0700
parents ab20c0d04f4a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
1
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
2 import os
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
3 import re
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
4 import json
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
5 import functools
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
6 from zipfile import ZipFile
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
7 import sys
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
8 import hashlib
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
9 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
10 CGData object style:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
11
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
12 Every file type documented in the CGData specification has an equivilent object
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
13 to parse and manipulate the contents of that file type. For <dataType> there
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
14 should be a CGData.<dataType> object with a <CGData> class. These classes
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
15 should extend the baseObject class. For loading they implement the 'read'
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
16 function which will parse the contents of a file from a passed file handle.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
17 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
18
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
19
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
20 OBJECT_MAP = {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
21 'genomicSegment': ('CGData.GenomicSegment', 'GenomicSegment'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
22 'genomicMatrix': ('CGData.GenomicMatrix', 'GenomicMatrix'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
23 'probeMap': ('CGData.ProbeMap', 'ProbeMap'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
24 'probeLoc': ('CGData.ProbeLoc', 'ProbeLoc'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
25 'aliasMap' : ('CGData.AliasMap', 'AliasMap'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
26 'idDAG': ('CGData.IDDag', 'IDDag'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
27 'clinicalMatrix': ('CGData.ClinicalMatrix', 'ClinicalMatrix'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
28 'dataSubType': ('CGData.DataSubType', 'DataSubType'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
29 'assembly': ('CGData.Assembly', 'Assembly'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
30 'featureDescription': ('CGData.FeatureDescription', 'FeatureDescription'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
31 'refGene' : ('CGData.RefGene', 'RefGene'),
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
32 'idList' : ('CGData.IDList', 'IDList')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
33 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
34
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
35 class FormatException(Exception):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
36
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
37 def __init__(self, str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
38 Exception.__init__(self, str)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
39
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
40
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
41 def has_type(type_str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
42 return type_str in OBJECT_MAP
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
43
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
44 def get_type(type_str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
45 mod_name, cls_name = OBJECT_MAP[type_str]
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
46 module = __import__(mod_name, globals(), locals(), [ cls_name ])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
47 cls = getattr(module, cls_name)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
48 return cls
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
49
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
50 class UnimplementedException(Exception):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
51 def __init__(self, str="Method not implemented"):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
52 Exception.__init__(self, str)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
53
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
54 class CGObjectBase(dict):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
55 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
56 This is the base object for CGData loadable objects.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
57 The methods covered in the base case cover usage meta-information
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
58 loading/unloading and manipulation as well as zip (cgz) file access.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
59 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
60 __format__ = None
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
61 def __init__(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
62 self.path = None
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
63 self.zip = None
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
64 self.light_mode = False
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
65 self.loaded = False
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
66 if 'cgformat' not in self and self.__format__ is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
67 self['cgformat'] = self.__format__
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
68 super(CGObjectBase,self).__init__()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
69
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
70 def load(self, path=None, **kw):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
71 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
72 Load a data object in from path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
73 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
74 if path is None and self.path is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
75 path = self.path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
76 if path is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
77 raise OSError( "Path not defined" )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
78
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
79 if self.zip is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
80 if os.path.exists(path):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
81 dhandle = open(path,'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
82 self.read(dhandle, **kw)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
83 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
84 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
85 z = ZipFile(self.zip)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
86 dhandle = z.open(self.path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
87 self.read(dhandle, **kw)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
88 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
89 z.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
90
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
91 self.path = path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
92 if (os.path.exists(path + ".json")):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
93 mhandle = open(path + ".json",'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
94 meta = json.loads(mhandle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
95 meta = dict((k, v) for k, v in meta.iteritems() if v != None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
96 self.update(meta)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
97 mhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
98 self.loaded = True
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
99
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
100 def unload(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
101 """Call to start freeing up memory"""
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
102 self.free()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
103 self.loaded = False
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
104
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
105 def store(self, path=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
106 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
107 Store an object onto the path provided.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
108 Will write a path and a path.json file.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
109 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
110 if path is None and self.path is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
111 path = self.path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
112 if path is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
113 raise OSError( "Path not defined" )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
114 meta = {}
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
115 meta.update(self)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
116 if 'cgformat' in meta:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
117 del meta['cgformat']
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
118 mHandle = open(path + ".json", "w")
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
119 mHandle.write(json.dumps(meta))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
120 mHandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
121 if not self.light_mode:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
122 self.path = path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
123 dhandle = open(path, "w")
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
124 self.write(dhandle)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
125 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
126
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
127 def load_keyset(self, key_predicate):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
128 if self.path is not None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
129 if self.zip is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
130 if os.path.exists(self.path):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
131 dhandle = open(self.path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
132 out = self.read_keyset(dhandle, key_predicate)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
133 for a in out:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
134 yield a
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
135 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
136 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
137 z = ZipFile(self.zip)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
138 dhandle = z.open(self.path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
139 out = self.read_keyset(dhandle, key_predicate)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
140 for a in out:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
141 yield a
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
142 dhandle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
143 z.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
144
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
145 def read_keyset(self, handle, key_predicate=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
146 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
147
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
148 def read(self, handle):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
149 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
150 The read method is implemented by the subclass that
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
151 inherits from CGObjectBase. It is passed a handle
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
152 to a file (which may be on file, in a compressed object, or
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
153 from a network source). The implementing class then uses his handle
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
154 to populate it's data structures.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
155 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
156 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
157
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
158 def write(self, handle):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
159 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
160 The write method is implemented by the subclass that
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
161 inherits from CGObjectBase. It is passed a handle to an
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
162 output file, which it can use 'write' method calls to emit
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
163 it's data.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
164 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
165 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
166
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
167 def get_name(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
168 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
169 Get object name
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
170 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
171 return self.get( 'cgdata', {} ).get( 'name', None )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
172
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
173 def get_type(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
174 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
175 Get object type
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
176 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
177 return self.get('cgdata', {}).get('type', None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
178
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
179 def get_link_map(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
180 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
181 Get a dict that represents the declared file relationships from the meta-info
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
182 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
183 out = {}
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
184 if "cgformat" in self:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
185 if "links" in self["cgformat"]:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
186 for field in self['cgformat']['links']:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
187 if field in self['cgdata']:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
188 if isinstance(self['cgdata'][field], str) or isinstance(self['cgdata'][field], unicode) :
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
189 out[field] = { 'type' : field, 'name' : self['cgdata'][field] }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
190 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
191 out[field] = { 'type' : self['cgdata'][field]['type'], 'name' : self['cgdata'][field]['name'] }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
192
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
193 for e in ['columnKeySrc', 'rowKeySrc' ]:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
194 if e in self['cgdata']:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
195 if e not in out:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
196 out[e] = {}
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
197 link = self['cgdata'][e]
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
198 out[e] = { 'type' : link['type'], 'name' : link['name'] }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
199 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
200
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
201 def add_history(self, desc):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
202 if not 'history' in self:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
203 self[ 'history' ] = []
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
204 self[ 'history' ].append( desc )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
205
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
206
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
207 class CGDataMatrixObject(CGObjectBase):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
208
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
209 def __init__(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
210 CGObjectBase.__init__(self)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
211
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
212
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
213 def get_col_namespace(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
214 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
215 Return the name of the column namespace
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
216 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
217 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
218
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
219 def get_row_namespace(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
220 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
221 Return the name of the row namespace
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
222 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
223 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
224
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
225 def get_col_list(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
226 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
227 Returns names of columns
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
228 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
229 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
230
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
231 def get_row_list(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
232 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
233 Returns names of rows
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
234 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
235 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
236
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
237 def get_row_map(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
238 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
239 Returns map of row name indexes
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
240 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
241 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
242
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
243 def get_col_map(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
244 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
245 Returns map of row name indexes
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
246 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
247 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
248
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
249
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
250 def get_row_pos(self, row):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
251 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
252
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
253 def get_col_pos(self, col):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
254 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
255
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
256 def get_row_count(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
257 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
258
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
259 def get_col_count(self):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
260 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
261
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
262 def get_row(self, row_name):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
263 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
264
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
265 def get_col(self, col_name):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
266 raise UnimplementedException()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
267
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
268
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
269 def cg_new(type_str):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
270 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
271 cg_new takes a type string and creates a new object from the
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
272 class named, it uses an internally defined map to find all
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
273 official CGData data types. So if a 'genomicMatrix' is requested
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
274 a CGData.GenomicMatrix.GenomicMatrix is initialized.
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
275
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
276 type_str -- A string name of a CGData type, ie 'genomicMatrix'
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
277 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
278 mod_name, cls_name = OBJECT_MAP[type_str]
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
279 module = __import__(mod_name, globals(), locals(), [ cls_name ])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
280 cls = getattr(module, cls_name)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
281 out = cls()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
282 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
283
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
284 def load(path, zip=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
285 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
286 load is a the automatic CGData loading function. There has to
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
287 be a '.json' file for this function to work. It inspects the
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
288 '.json' file and uses the 'type' field to determine the
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
289 appropriate object loader to use. The object is created
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
290 (using the cg_new function) and the 'read' method is passed
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
291 a handle to the data file. If the 'zip' parameter is not None,
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
292 then it is used as the path to a zipfile, and the path parameter
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
293 is used as an path inside the zip file to the object data
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
294
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
295 path -- path to file (in file system space if zip is None, otherwise
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
296 it is the location in the zip file)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
297 zip -- path to zip file (None by default)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
298 """
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
299 if not path.endswith(".json"):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
300 path = path + ".json"
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
301
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
302 data_path = re.sub(r'.json$', '', path)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
303
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
304 try:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
305 handle = open(path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
306 meta = json.loads(handle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
307 except IOError:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
308 raise FormatException("Meta-info (%s) file not found" % (path))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
309
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
310 # Throw away empty values
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
311 meta = dict((k, v) for k, v in meta.iteritems() if v != None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
312
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
313 if meta['cgdata']['type'] in OBJECT_MAP:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
314 out = cg_new(meta['cgdata']['type'])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
315 out.update( meta )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
316 out.path = data_path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
317 out.load(data_path)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
318 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
319 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
320 raise FormatException("%s class not found" % (meta['cgdata']['type']))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
321
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
322
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
323 def light_load(path, zip=None):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
324 if not path.endswith(".json"):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
325 path = path + ".json"
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
326
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
327 data_path = re.sub(r'.json$', '', path)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
328
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
329 if zip is None:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
330 try:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
331 handle = open(path, 'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
332 meta = json.loads(handle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
333 except IOError:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
334 raise FormatException("Meta-info (%s) file not found" % (path))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
335 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
336 z = ZipFile(zip)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
337 handle = z.open(path,'rU')
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
338 meta = json.loads(handle.read())
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
339 handle.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
340 z.close()
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
341
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
342 # Throw away empty values
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
343 meta = dict((k, v) for k, v in meta.iteritems() if v != None)
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
344
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
345 if meta['cgdata']['type'] in OBJECT_MAP:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
346 out = cg_new(meta['cgdata']['type'])
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
347 out.update( meta )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
348 out.path = data_path
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
349 out.zip = zip
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
350 out.light_mode = True
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
351 return out
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
352 else:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
353 raise FormatException("%s class not found" % (meta['cgdata']['type']))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
354
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
355 global LOG_LEVEL
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
356 LOG_LEVEL = 2
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
357
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
358 def info(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
359 if LOG_LEVEL < 2:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
360 sys.stderr.write("LOG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
361 #errorLogHandle.write("LOG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
362
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
363 def debug(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
364 if LOG_LEVEL < 1:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
365 sys.stderr.write("DEBUG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
366 #errorLogHandle.write("LOG: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
367
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
368 def warn(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
369 if LOG_LEVEL < 3:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
370 sys.stderr.write("WARNING: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
371 #errorLogHandle.write("WARNING: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
372
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
373
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
374 def error(eStr):
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
375 sys.stderr.write("ERROR: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
376 #errorLogHandle.write("ERROR: %s\n" % (eStr))
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
377