0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys
|
|
4 import numpy as np
|
|
5 import matplotlib.ticker as ticker
|
|
6 import scipy.spatial.distance as spd
|
|
7 import scipy.cluster.hierarchy as sph
|
|
8 from scipy import stats
|
|
9 import matplotlib
|
|
10 #matplotlib.use('Agg')
|
|
11 import pylab
|
|
12 import pandas as pd
|
|
13 from matplotlib.patches import Rectangle
|
|
14 from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
15 import matplotlib.pyplot as plt
|
|
16 import matplotlib.gridspec as gridspec
|
|
17 import cPickle as pickle
|
|
18
|
|
19 sys.setrecursionlimit(10000)
|
|
20
|
|
21 # samples on rows
|
|
22
|
|
23 class SqrtNorm(matplotlib.colors.Normalize):
|
|
24 """
|
|
25 Normalize a given value to the 0-1 range on a square root scale
|
|
26 """
|
|
27 def __call__(self, value, clip=None):
|
|
28 if clip is None:
|
|
29 clip = self.clip
|
|
30
|
|
31 result, is_scalar = self.process_value(value)
|
|
32
|
|
33 result = np.ma.masked_less_equal(result, 0, copy=False)
|
|
34
|
|
35 self.autoscale_None(result)
|
|
36 vmin, vmax = self.vmin, self.vmax
|
|
37 if vmin > vmax:
|
|
38 raise ValueError("minvalue must be less than or equal to maxvalue")
|
|
39 elif vmin <= 0:
|
|
40 raise ValueError("values must all be positive")
|
|
41 elif vmin == vmax:
|
|
42 result.fill(0)
|
|
43 else:
|
|
44 if clip:
|
|
45 mask = np.ma.getmask(result)
|
|
46 result = np.ma.array(np.clip(result.filled(vmax), vmin, vmax),
|
|
47 mask=mask)
|
|
48 # in-place equivalent of above can be much faster
|
|
49 resdat = result.data
|
|
50 mask = result.mask
|
|
51 if mask is np.ma.nomask:
|
|
52 mask = (resdat <= 0)
|
|
53 else:
|
|
54 mask |= resdat <= 0
|
|
55 matplotlib.cbook._putmask(resdat, mask, 1)
|
|
56 np.sqrt(resdat, resdat)
|
|
57 resdat -= np.sqrt(vmin)
|
|
58 resdat /= (np.sqrt(vmax) - np.sqrt(vmin))
|
|
59 result = np.ma.array(resdat, mask=mask, copy=False)
|
|
60 if is_scalar:
|
|
61 result = result[0]
|
|
62 return result
|
|
63
|
|
64 def inverse(self, value):
|
|
65 if not self.scaled():
|
|
66 raise ValueError("Not invertible until scaled")
|
|
67 vmin, vmax = self.vmin, self.vmax
|
|
68
|
|
69 if matplotlib.cbook.iterable(value):
|
|
70 val = np.ma.asarray(value)
|
|
71 return vmin * np.ma.power((vmax / vmin), val)
|
|
72 else:
|
|
73 return vmin * pow((vmax / vmin), value)
|
|
74
|
|
75 def autoscale(self, A):
|
|
76 '''
|
|
77 Set *vmin*, *vmax* to min, max of *A*.
|
|
78 '''
|
|
79 A = np.ma.masked_less_equal(A, 0, copy=False)
|
|
80 self.vmin = np.ma.min(A)
|
|
81 self.vmax = np.ma.max(A)
|
|
82
|
|
83 def autoscale_None(self, A):
|
|
84 ' autoscale only None-valued vmin or vmax'
|
|
85 if self.vmin is not None and self.vmax is not None:
|
|
86 return
|
|
87 A = np.ma.masked_less_equal(A, 0, copy=False)
|
|
88 if self.vmin is None:
|
|
89 self.vmin = np.ma.min(A)
|
|
90 if self.vmax is None:
|
|
91 self.vmax = np.ma.max(A)
|
|
92
|
|
93 class DataMatrix:
|
|
94 datatype = 'data_matrix'
|
|
95
|
|
96 @staticmethod
|
|
97 def input_parameters( parser ):
|
|
98 dm_param = parser.add_argument_group('Input data matrix parameters')
|
|
99 arg = dm_param.add_argument
|
|
100
|
|
101 arg( '--sep', type=str, default='\t' )
|
|
102 arg( '--out_table', type=str, default=None,
|
|
103 help = 'Write processed data matrix to file' )
|
|
104 arg( '--fname_row', type=int, default=0,
|
|
105 help = "row number containing the names of the features "
|
|
106 "[default 0, specify -1 if no names are present in the matrix")
|
|
107 arg( '--sname_row', type=int, default=0,
|
|
108 help = "column number containing the names of the samples "
|
|
109 "[default 0, specify -1 if no names are present in the matrix")
|
|
110 arg( '--metadata_rows', type=str, default=None,
|
|
111 help = "Row numbers to use as metadata"
|
|
112 "[default None, meaning no metadata")
|
|
113 arg( '--skip_rows', type=str, default=None,
|
|
114 help = "Row numbers to skip (0-indexed, comma separated) from the input file"
|
|
115 "[default None, meaning no rows skipped")
|
|
116 arg( '--sperc', type=int, default=90,
|
|
117 help = "Percentile of sample value distribution for sample selection" )
|
|
118 arg( '--fperc', type=int, default=90,
|
|
119 help = "Percentile of feature value distribution for sample selection" )
|
|
120 arg( '--stop', type=int, default=None,
|
|
121 help = "Number of top samples to select (ordering based on percentile specified by --sperc)" )
|
|
122 arg( '--ftop', type=int, default=None,
|
|
123 help = "Number of top features to select (ordering based on percentile specified by --fperc)" )
|
|
124 arg( '--def_na', type=float, default=None,
|
|
125 help = "Set the default value for missing values [default None which means no replacement]")
|
|
126
|
|
127 def __init__( self, input_file, args ):
|
|
128 self.args = args
|
|
129 self.metadata_rows = []
|
|
130 self.metadata_table = None
|
|
131 toskip = [int(l) for l in self.args.skip_rows.split(",")] if self.args.skip_rows else []
|
|
132 if self.args.metadata_rows:
|
|
133 self.metadata_rows = list([int(a) for a in self.args.metadata_rows.split(",")])
|
|
134 mdr = self.metadata_rows[::]
|
|
135 for t in toskip:
|
|
136 for i,m in enumerate(mdr):
|
|
137 if t <= m:
|
|
138 self.metadata_rows[i] -= 1
|
|
139 if self.metadata_rows:
|
|
140 header = [self.args.fname_row]+self.metadata_rows if self.args.fname_row > -1 else self.metadata_rows
|
|
141 else:
|
|
142 header = self.args.fname_row if self.args.fname_row > -1 else None
|
|
143 self.table = pd.read_table(
|
|
144 input_file, sep = self.args.sep, # skipinitialspace = True,
|
|
145 skiprows = sorted(toskip) if isinstance(toskip, list) else toskip,
|
|
146 header = sorted(header) if isinstance(header, list) else header,
|
|
147 index_col = self.args.sname_row if self.args.sname_row > -1 else None
|
|
148 )
|
|
149
|
|
150 def select( perc, top ):
|
|
151 self.table['perc'] = self.table.apply(lambda x: stats.scoreatpercentile(x,perc),axis=1)
|
|
152 m = sorted(self.table['perc'])[-top]
|
|
153 self.table = self.table[self.table['perc'] >= m ]
|
|
154 del self.table['perc']
|
|
155
|
|
156 if not self.args.def_na is None:
|
|
157 self.table = self.table.fillna( self.args.def_na )
|
|
158
|
|
159 if self.args.ftop:
|
|
160 select( self.args.fperc, self.args.ftop )
|
|
161
|
|
162 if self.args.stop:
|
|
163 self.table = self.table.T
|
|
164 select( self.args.sperc, self.args.stop )
|
|
165 self.table = self.table.T
|
|
166
|
|
167
|
|
168 # add missing values
|
|
169
|
|
170 def get_numpy_matrix( self ):
|
|
171 return np.matrix(self.table)
|
|
172
|
|
173 #def get_metadata_matrix( self ):
|
|
174 # return self.table.columns
|
|
175
|
|
176 def get_snames( self ):
|
|
177 #return list(self.table.index)
|
|
178 return self.table.columns
|
|
179
|
|
180 def get_fnames( self ):
|
|
181 #print self.table.columns.names
|
|
182 #print self.table.columns
|
|
183 return list(self.table.index)
|
|
184
|
|
185 def get_averages(self, by_row = True) :
|
|
186 return self.table.mean(axis = 1 if by_row else 0)
|
|
187
|
|
188 def save_matrix( self, output_file ):
|
|
189 self.table.to_csv( output_file, sep = '\t' )
|
|
190
|
|
191 class DistMatrix:
|
|
192 datatype = 'distance_matrix'
|
|
193
|
|
194 @staticmethod
|
|
195 def input_parameters( parser ):
|
|
196 dm_param = parser.add_argument_group('Distance parameters')
|
|
197 arg = dm_param.add_argument
|
|
198
|
|
199 dist_funcs = [ "euclidean","minkowski","cityblock","seuclidean",
|
|
200 "sqeuclidean","cosine","correlation","hamming",
|
|
201 "jaccard","chebyshev","canberra","braycurtis",
|
|
202 "mahalanobis","yule","matching","dice",
|
|
203 "kulsinski","rogerstanimoto","russellrao","sokalmichener",
|
|
204 "sokalsneath","wminkowski","ward" ]
|
|
205
|
|
206 arg( '--f_dist_f', type=str, default="correlation",
|
|
207 help = "Distance function for features [default correlation]")
|
|
208 arg( '--s_dist_f', type=str, default="euclidean",
|
|
209 help = "Distance function for sample [default euclidean]")
|
|
210 arg( '--load_dist_matrix_f', type=str, default=None,
|
|
211 help = "Load the distance matrix to be used for features [default None].")
|
|
212 arg( '--load_dist_matrix_s', type=str, default=None,
|
|
213 help = "Load the distance matrix to be used for samples [default None].")
|
|
214 arg( '--save_dist_matrix_f', type=str, default=None,
|
|
215 help = "Save the distance matrix for features to file [default None].")
|
|
216 arg( '--save_dist_matrix_s', type=str, default=None,
|
|
217 help = "Save the distance matrix for samples to file [default None].")
|
|
218
|
|
219 def __init__( self, data, args = None ):
|
|
220 self.sdf = args.s_dist_f
|
|
221 self.fdf = args.f_dist_f
|
|
222
|
|
223 self.s_cdist_matrix, self.f_cdist_matrix = None, None
|
|
224
|
|
225 self.numpy_full_matrix = (data if
|
|
226 type(data) == np.matrixlib.defmatrix.matrix else None)
|
|
227
|
|
228 def compute_f_dists( self ):
|
|
229 if args.load_dist_matrix_f:
|
|
230 with open( args.load_dist_matrix_f ) as inp:
|
|
231 self.f_cdist_matrix = pickle.load( inp )
|
|
232
|
|
233 else:
|
|
234 dt = self.numpy_full_matrix
|
|
235
|
|
236 if self.fdf == "spearman":
|
|
237 dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
|
|
238 self.f_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
|
|
239 return
|
|
240
|
|
241 if self.fdf == "pearson":
|
|
242 self.fdf = 'correlation'
|
|
243
|
|
244 self.f_cdist_matrix = spd.pdist( dt, self.fdf )
|
|
245
|
|
246 if args.save_dist_matrix_f:
|
|
247 with open( args.save_dist_matrix_f, "wb" ) as outf:
|
|
248 pickle.dump( self.f_cdist_matrix, outf )
|
|
249
|
|
250 def compute_s_dists( self ):
|
|
251 if args.load_dist_matrix_s:
|
|
252 with open( args.load_dist_matrix_s ) as inp:
|
|
253 self.s_cdist_matrix = pickle.load( inp )
|
|
254 else:
|
|
255 dt = self.numpy_full_matrix.transpose()
|
|
256
|
|
257 if self.sdf == "spearman":
|
|
258 dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
|
|
259 self.s_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
|
|
260 return
|
|
261
|
|
262 if self.sdf == "pearson":
|
|
263 self.sdf = 'correlation'
|
|
264
|
|
265 self.s_cdist_matrix = spd.pdist( dt, self.sdf )
|
|
266
|
|
267 if args.save_dist_matrix_s:
|
|
268 with open( args.save_dist_matrix_s, "wb" ) as outf:
|
|
269 pickle.dump( self.s_cdist_matrix, outf )
|
|
270
|
|
271 def get_s_dm( self ):
|
|
272 return self.s_cdist_matrix
|
|
273
|
|
274 def get_f_dm( self ):
|
|
275 return self.f_cdist_matrix
|
|
276
|
|
277 class HClustering:
|
|
278 datatype = 'hclustering'
|
|
279
|
|
280 @staticmethod
|
|
281 def input_parameters( parser ):
|
|
282 cl_param = parser.add_argument_group('Clustering parameters')
|
|
283 arg = cl_param.add_argument
|
|
284
|
|
285 linkage_method = [ "single","complete","average",
|
|
286 "weighted","centroid","median",
|
|
287 "ward" ]
|
|
288 arg( '--no_fclustering', action='store_true',
|
|
289 help = "avoid clustering features" )
|
|
290 arg( '--no_sclustering', action='store_true',
|
|
291 help = "avoid clustering samples" )
|
|
292 arg( '--flinkage', type=str, default="average",
|
|
293 help = "Linkage method for feature clustering [default average]")
|
|
294 arg( '--slinkage', type=str, default="average",
|
|
295 help = "Linkage method for sample clustering [default average]")
|
|
296
|
|
297 def get_reordered_matrix( self, matrix, sclustering = True, fclustering = True ):
|
|
298 if not sclustering and not fclustering:
|
|
299 return matrix
|
|
300
|
|
301 idx1 = self.sdendrogram['leaves'] if sclustering else None # !!!!!!!!!!!
|
|
302 idx2 = self.fdendrogram['leaves'][::-1] if fclustering else None
|
|
303
|
|
304 if sclustering and fclustering:
|
|
305 return matrix[idx2,:][:,idx1]
|
|
306 if fclustering:
|
|
307 return matrix[idx2,:][:]
|
|
308 if sclustering: # !!!!!!!!!!!!
|
|
309 return matrix[:][:,idx1]
|
|
310
|
|
311 def get_reordered_sample_labels( self, slabels ):
|
|
312 return [slabels[i] for i in self.sdendrogram['leaves']]
|
|
313
|
|
314 def get_reordered_feature_labels( self, flabels ):
|
|
315 return [flabels[i] for i in self.fdendrogram['leaves']]
|
|
316
|
|
317 def __init__( self, s_dm, f_dm, args = None ):
|
|
318 self.s_dm = s_dm
|
|
319 self.f_dm = f_dm
|
|
320 self.args = args
|
|
321 self.sclusters = None
|
|
322 self.fclusters = None
|
|
323 self.sdendrogram = None
|
|
324 self.fdendrogram = None
|
|
325
|
|
326 def shcluster( self, dendrogram = True ):
|
|
327 self.shclusters = sph.linkage( self.s_dm, args.slinkage )
|
|
328 if dendrogram:
|
|
329 self.sdendrogram = sph.dendrogram( self.shclusters, no_plot=True )
|
|
330
|
|
331 def fhcluster( self, dendrogram = True ):
|
|
332 self.fhclusters = sph.linkage( self.f_dm, args.flinkage )
|
|
333 if dendrogram:
|
|
334 self.fdendrogram = sph.dendrogram( self.fhclusters, no_plot=True )
|
|
335
|
|
336 def get_shclusters( self ):
|
|
337 return self.shclusters
|
|
338
|
|
339 def get_fhclusters( self ):
|
|
340 return self.fhclusters
|
|
341
|
|
342 def get_sdendrogram( self ):
|
|
343 return self.sdendrogram
|
|
344
|
|
345 def get_fdendrogram( self ):
|
|
346 return self.fdendrogram
|
|
347
|
|
348
|
|
349 class Heatmap:
|
|
350 datatype = 'heatmap'
|
|
351
|
|
352 bbcyr = {'red': ( (0.0, 0.0, 0.0),
|
|
353 (0.25, 0.0, 0.0),
|
|
354 (0.50, 0.0, 0.0),
|
|
355 (0.75, 1.0, 1.0),
|
|
356 (1.0, 1.0, 1.0)),
|
|
357 'green': ( (0.0, 0.0, 0.0),
|
|
358 (0.25, 0.0, 0.0),
|
|
359 (0.50, 1.0, 1.0),
|
|
360 (0.75, 1.0, 1.0),
|
|
361 (1.0, 0.0, 1.0)),
|
|
362 'blue': ( (0.0, 0.0, 0.0),
|
|
363 (0.25, 1.0, 1.0),
|
|
364 (0.50, 1.0, 1.0),
|
|
365 (0.75, 0.0, 0.0),
|
|
366 (1.0, 0.0, 1.0))}
|
|
367
|
|
368 bbcry = {'red': ( (0.0, 0.0, 0.0),
|
|
369 (0.25, 0.0, 0.0),
|
|
370 (0.50, 0.0, 0.0),
|
|
371 (0.75, 1.0, 1.0),
|
|
372 (1.0, 1.0, 1.0)),
|
|
373 'green': ( (0.0, 0.0, 0.0),
|
|
374 (0.25, 0.0, 0.0),
|
|
375 (0.50, 1.0, 1.0),
|
|
376 (0.75, 0.0, 0.0),
|
|
377 (1.0, 1.0, 1.0)),
|
|
378 'blue': ( (0.0, 0.0, 0.0),
|
|
379 (0.25, 1.0, 1.0),
|
|
380 (0.50, 1.0, 1.0),
|
|
381 (0.75, 0.0, 0.0),
|
|
382 (1.0, 0.0, 1.0))}
|
|
383
|
|
384 bcry = {'red': ( (0.0, 0.0, 0.0),
|
|
385 (0.33, 0.0, 0.0),
|
|
386 (0.66, 1.0, 1.0),
|
|
387 (1.0, 1.0, 1.0)),
|
|
388 'green': ( (0.0, 0.0, 0.0),
|
|
389 (0.33, 1.0, 1.0),
|
|
390 (0.66, 0.0, 0.0),
|
|
391 (1.0, 1.0, 1.0)),
|
|
392 'blue': ( (0.0, 1.0, 1.0),
|
|
393 (0.33, 1.0, 1.0),
|
|
394 (0.66, 0.0, 0.0),
|
|
395 (1.0, 0.0, 1.0))}
|
|
396
|
|
397
|
|
398 my_colormaps = [ ('bbcyr',bbcyr),
|
|
399 ('bbcry',bbcry),
|
|
400 ('bcry',bcry)]
|
|
401
|
|
402 dcols = ['#ca0000','#0087ff','#00ba1d','#cf00ff','#00dbe2','#ffaf00','#0017f4','#006012','#e175ff','#877878','#050505','#b5cf00','#ff8a8a','#aa6400','#50008a','#00ff58']
|
|
403
|
|
404
|
|
405 @staticmethod
|
|
406 def input_parameters( parser ):
|
|
407 hm_param = parser.add_argument_group('Heatmap options')
|
|
408 arg = hm_param.add_argument
|
|
409
|
|
410 arg( '--dpi', type=int, default=150,
|
|
411 help = "Image resolution in dpi [default 150]")
|
|
412 arg( '-l', '--log_scale', action='store_true',
|
|
413 help = "Log scale" )
|
|
414 arg( '-s', '--sqrt_scale', action='store_true',
|
|
415 help = "Square root scale" )
|
|
416 arg( '--no_slabels', action='store_true',
|
|
417 help = "Do not show sample labels" )
|
|
418 arg( '--minv', type=float, default=None,
|
|
419 help = "Minimum value to display in the color map [default None meaning automatic]" )
|
|
420 arg( '--maxv', type=float, default=None,
|
|
421 help = "Maximum value to display in the color map [default None meaning automatic]" )
|
|
422 arg( '--no_flabels', action='store_true',
|
|
423 help = "Do not show feature labels" )
|
|
424 arg( '--max_slabel_len', type=int, default=25,
|
|
425 help = "Max number of chars to report for sample labels [default 15]" )
|
|
426 arg( '--max_flabel_len', type=int, default=25,
|
|
427 help = "Max number of chars to report for feature labels [default 15]" )
|
|
428 arg( '--flabel_size', type=int, default=10,
|
|
429 help = "Feature label font size [default 10]" )
|
|
430 arg( '--slabel_size', type=int, default=10,
|
|
431 help = "Sample label font size [default 10]" )
|
|
432 arg( '--fdend_width', type=float, default=1.0,
|
|
433 help = "Width of the feature dendrogram [default 1 meaning 100%% of default heatmap width]")
|
|
434 arg( '--sdend_height', type=float, default=1.0,
|
|
435 help = "Height of the sample dendrogram [default 1 meaning 100%% of default heatmap height]")
|
|
436 arg( '--metadata_height', type=float, default=.05,
|
|
437 help = "Height of the metadata panel [default 0.05 meaning 5%% of default heatmap height]")
|
|
438 arg( '--metadata_separation', type=float, default=.01,
|
|
439 help = "Distance between the metadata and data panels. [default 0.001 meaning 0.1%% of default heatmap height]")
|
|
440 arg( '--image_size', type=float, default=8,
|
|
441 help = "Size of the largest between width and eight size for the image in inches [default 8]")
|
|
442 arg( '--cell_aspect_ratio', type=float, default=1.0,
|
|
443 help = "Aspect ratio between width and height for the cells of the heatmap [default 1.0]")
|
|
444 col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu',
|
|
445 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired',
|
|
446 'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr',
|
|
447 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn',
|
|
448 'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu',
|
|
449 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone',
|
|
450 'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth',
|
|
451 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow',
|
|
452 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray',
|
|
453 'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow',
|
|
454 'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in Heatmap.my_colormaps]
|
|
455 for n,c in Heatmap.my_colormaps:
|
|
456 my_cmap = matplotlib.colors.LinearSegmentedColormap(n,c,256)
|
|
457 pylab.register_cmap(name=n,cmap=my_cmap)
|
|
458 arg( '-c','--colormap', type=str, choices = col_maps, default = 'bbcry' )
|
|
459 arg( '--bottom_c', type=str, default = None,
|
|
460 help = "Color to use for cells below the minimum value of the scale [default None meaning bottom color of the scale]")
|
|
461 arg( '--top_c', type=str, default = None,
|
|
462 help = "Color to use for cells below the maximum value of the scale [default None meaning bottom color of the scale]")
|
|
463 arg( '--nan_c', type=str, default = None,
|
|
464 help = "Color to use for nan cells [default None]")
|
|
465
|
|
466
|
|
467
|
|
468 """
|
|
469 arg( '--', type=str, default="average",
|
|
470 help = "Linkage method for feature clustering [default average]")
|
|
471 arg( '--slinkage', type=str, default="average",
|
|
472 help = "Linkage method for sample clustering [default average]")
|
|
473 """
|
|
474
|
|
475 def __init__( self, numpy_matrix, sdendrogram, fdendrogram, snames, fnames, fnames_meta, args = None ):
|
|
476 self.numpy_matrix = numpy_matrix
|
|
477 self.sdendrogram = sdendrogram
|
|
478 self.fdendrogram = fdendrogram
|
|
479 self.snames = snames
|
|
480 self.fnames = fnames
|
|
481 self.fnames_meta = fnames_meta
|
|
482 self.ns,self.nf = self.numpy_matrix.shape
|
|
483 self.args = args
|
|
484
|
|
485 def make_legend( self, dmap, titles, out_fn ):
|
|
486 figlegend = plt.figure(figsize=(1+3*len(titles),2), frameon = False)
|
|
487
|
|
488 gs = gridspec.GridSpec( 1, len(dmap), wspace = 2.0 )
|
|
489
|
|
490 for i,(d,title) in enumerate(zip(dmap,titles)):
|
|
491 legax = plt.subplot(gs[i],frameon = False)
|
|
492 for k,v in sorted(d.items(),key=lambda x:x[1]):
|
|
493 rect = Rectangle( [0.0, 0.0], 0.0, 0.0,
|
|
494 facecolor = self.dcols[v%len(self.dcols)],
|
|
495 label = k,
|
|
496 edgecolor='b', lw = 0.0)
|
|
497
|
|
498 legax.add_patch(rect)
|
|
499 #remove_splines( legax )
|
|
500 legax.set_xticks([])
|
|
501 legax.set_yticks([])
|
|
502 legax.legend( loc = 2, frameon = False, title = title)
|
|
503 """
|
|
504 ncol = legend_ncol, bbox_to_anchor=(1.01, 3.),
|
|
505 borderpad = 0.0, labelspacing = 0.0,
|
|
506 handlelength = 0.5, handletextpad = 0.3,
|
|
507 borderaxespad = 0.0, columnspacing = 0.3,
|
|
508 prop = {'size':fontsize}, frameon = False)
|
|
509 """
|
|
510 if out_fn:
|
|
511 figlegend.savefig(out_fn, bbox_inches='tight')
|
|
512
|
|
513 def draw( self ):
|
|
514
|
|
515 rat = float(self.ns)/self.nf
|
|
516 rat *= self.args.cell_aspect_ratio
|
|
517 x,y = (self.args.image_size,rat*self.args.image_size) if rat < 1 else (self.args.image_size/rat,self.args.image_size)
|
|
518 fig = plt.figure( figsize=(x,y), facecolor = 'w' )
|
|
519
|
|
520 cm = pylab.get_cmap(self.args.colormap)
|
|
521 bottom_col = [ cm._segmentdata['red'][0][1],
|
|
522 cm._segmentdata['green'][0][1],
|
|
523 cm._segmentdata['blue'][0][1] ]
|
|
524 if self.args.bottom_c:
|
|
525 bottom_col = self.args.bottom_c
|
|
526 cm.set_under( bottom_col )
|
|
527 top_col = [ cm._segmentdata['red'][-1][1],
|
|
528 cm._segmentdata['green'][-1][1],
|
|
529 cm._segmentdata['blue'][-1][1] ]
|
|
530 if self.args.top_c:
|
|
531 top_col = self.args.top_c
|
|
532 cm.set_over( top_col )
|
|
533
|
|
534 if self.args.nan_c:
|
|
535 cm.set_bad( self.args.nan_c )
|
|
536
|
|
537 def make_ticklabels_invisible(ax):
|
|
538 for tl in ax.get_xticklabels() + ax.get_yticklabels():
|
|
539 tl.set_visible(False)
|
|
540 ax.set_xticks([])
|
|
541 ax.set_yticks([])
|
|
542
|
|
543 def remove_splines( ax ):
|
|
544 for v in ['right','left','top','bottom']:
|
|
545 ax.spines[v].set_color('none')
|
|
546
|
|
547 def shrink_labels( labels, n ):
|
|
548 shrink = lambda x: x[:n/2]+" [...] "+x[-n/2:]
|
|
549 return [(shrink(str(l)) if len(str(l)) > n else l) for l in labels]
|
|
550
|
|
551
|
|
552 #gs = gridspec.GridSpec( 4, 2,
|
|
553 # width_ratios=[1.0-fr_ns,fr_ns],
|
|
554 # height_ratios=[.03,0.03,1.0-fr_nf,fr_nf],
|
|
555 # wspace = 0.0, hspace = 0.0 )
|
|
556
|
|
557 fr_ns = float(self.ns)/max([self.ns,self.nf])
|
|
558 fr_nf = float(self.nf)/max([self.ns,self.nf])
|
|
559
|
|
560 buf_space = 0.05
|
|
561 minv = min( [buf_space*8, 8*rat*buf_space] )
|
|
562 if minv < 0.05:
|
|
563 buf_space /= minv/0.05
|
|
564 metadata_height = self.args.metadata_height if type(snames[0]) is tuple and len(snames[0]) > 1 else 0.000001
|
|
565 gs = gridspec.GridSpec( 6, 4,
|
|
566 width_ratios=[ buf_space, buf_space*2, .08*self.args.fdend_width,0.9],
|
|
567 height_ratios=[ buf_space, buf_space*2, .08*self.args.sdend_height, metadata_height, self.args.metadata_separation, 0.9],
|
|
568 wspace = 0.0, hspace = 0.0 )
|
|
569
|
|
570 ax_hm = plt.subplot(gs[23], axisbg = bottom_col )
|
|
571 ax_metadata = plt.subplot(gs[15], axisbg = bottom_col )
|
|
572 ax_hm_y2 = ax_hm.twinx()
|
|
573
|
|
574 norm_f = matplotlib.colors.Normalize
|
|
575 if self.args.log_scale:
|
|
576 norm_f = matplotlib.colors.LogNorm
|
|
577 elif self.args.sqrt_scale:
|
|
578 norm_f = SqrtNorm
|
|
579 minv, maxv = 0.0, None
|
|
580
|
|
581 maps, values, ndv = [], [], 0
|
|
582 if type(snames[0]) is tuple and len(snames[0]) > 1:
|
|
583 metadata = zip(*[list(s[1:]) for s in snames])
|
|
584 for m in metadata:
|
|
585 mmap = dict([(v[1],ndv+v[0]) for v in enumerate(list(set(m)))])
|
|
586 values.append([mmap[v] for v in m])
|
|
587 ndv += len(mmap)
|
|
588 maps.append(mmap)
|
|
589 dcols = []
|
|
590 mdmat = np.matrix(values)
|
|
591 while len(dcols) < ndv:
|
|
592 dcols += self.dcols
|
|
593 cmap = matplotlib.colors.ListedColormap(dcols[:ndv])
|
|
594 bounds = [float(f)-0.5 for f in range(ndv+1)]
|
|
595 imm = ax_metadata.imshow( mdmat, #origin='lower',
|
|
596 interpolation = 'nearest',
|
|
597 aspect='auto',
|
|
598 extent = [0, self.nf, 0, self.ns],
|
|
599 cmap=cmap,
|
|
600 vmin=bounds[0],
|
|
601 vmax=bounds[-1],
|
|
602 )
|
|
603 remove_splines( ax_metadata )
|
|
604 ax_metadata_y2 = ax_metadata.twinx()
|
|
605 ax_metadata_y2.set_ylim(0,len(self.fnames_meta))
|
|
606 ax_metadata.set_yticks([])
|
|
607 ax_metadata_y2.set_ylim(0,len(self.fnames_meta))
|
|
608 ax_metadata_y2.tick_params(length=0)
|
|
609 ax_metadata_y2.set_yticks(np.arange(len(self.fnames_meta))+0.5)
|
|
610 ax_metadata_y2.set_yticklabels(self.fnames_meta[::-1], va='center',size=self.args.flabel_size)
|
|
611 else:
|
|
612 ax_metadata.set_yticks([])
|
|
613
|
|
614 ax_metadata.set_xticks([])
|
|
615
|
|
616 im = ax_hm.imshow( self.numpy_matrix, #origin='lower',
|
|
617 interpolation = 'nearest', aspect='auto',
|
|
618 extent = [0, self.nf, 0, self.ns],
|
|
619 cmap=cm,
|
|
620 vmin=self.args.minv,
|
|
621 vmax=self.args.maxv,
|
|
622 norm = norm_f( vmin=minv if minv > 0.0 else None, vmax=maxv)
|
|
623 )
|
|
624
|
|
625 #ax_hm.set_ylim([0,800])
|
|
626 ax_hm.set_xticks(np.arange(len(list(snames)))+0.5)
|
|
627 if not self.args.no_slabels:
|
|
628 snames_short = shrink_labels( list([s[0] for s in snames]) if type(snames[0]) is tuple else snames, self.args.max_slabel_len )
|
|
629 ax_hm.set_xticklabels(snames_short,rotation=90,va='top',ha='center',size=self.args.slabel_size)
|
|
630 else:
|
|
631 ax_hm.set_xticklabels([])
|
|
632 ax_hm_y2.set_ylim([0,self.ns])
|
|
633 ax_hm_y2.set_yticks(np.arange(len(fnames))+0.5)
|
|
634 if not self.args.no_flabels:
|
|
635 fnames_short = shrink_labels( fnames, self.args.max_flabel_len )
|
|
636 ax_hm_y2.set_yticklabels(fnames_short,va='center',size=self.args.flabel_size)
|
|
637 else:
|
|
638 ax_hm_y2.set_yticklabels( [] )
|
|
639 ax_hm.set_yticks([])
|
|
640 remove_splines( ax_hm )
|
|
641 ax_hm.tick_params(length=0)
|
|
642 ax_hm_y2.tick_params(length=0)
|
|
643 #ax_hm.set_xlim([0,self.ns])
|
|
644 ax_cm = plt.subplot(gs[3], axisbg = 'r', frameon = False)
|
|
645 #fig.colorbar(im, ax_cm, orientation = 'horizontal', spacing = 'proportional', format = ticker.LogFormatterMathtext() )
|
|
646 fig.colorbar(im, ax_cm, orientation = 'horizontal', spacing='proportional' if self.args.sqrt_scale else 'uniform' ) # , format = ticker.LogFormatterMathtext() )
|
|
647
|
|
648 if not self.args.no_sclustering:
|
|
649 ax_den_top = plt.subplot(gs[11], axisbg = 'r', frameon = False)
|
|
650 sph._plot_dendrogram( self.sdendrogram['icoord'], self.sdendrogram['dcoord'], self.sdendrogram['ivl'],
|
|
651 self.ns + 1, self.nf + 1, 1, 'top', no_labels=True,
|
|
652 color_list=self.sdendrogram['color_list'] )
|
|
653 ymax = max([max(a) for a in self.sdendrogram['dcoord']])
|
|
654 ax_den_top.set_ylim([0,ymax])
|
|
655 make_ticklabels_invisible( ax_den_top )
|
|
656 if not self.args.no_fclustering:
|
|
657 ax_den_right = plt.subplot(gs[22], axisbg = 'b', frameon = False)
|
|
658 sph._plot_dendrogram( self.fdendrogram['icoord'], self.fdendrogram['dcoord'], self.fdendrogram['ivl'],
|
|
659 self.ns + 1, self.nf + 1, 1, 'right', no_labels=True,
|
|
660 color_list=self.fdendrogram['color_list'] )
|
|
661 xmax = max([max(a) for a in self.fdendrogram['dcoord']])
|
|
662 ax_den_right.set_xlim([xmax,0])
|
|
663 make_ticklabels_invisible( ax_den_right )
|
|
664
|
|
665
|
|
666 if not self.args.out:
|
|
667 plt.show( )
|
|
668 else:
|
|
669 fig.savefig( self.args.out, bbox_inches='tight', dpi = self.args.dpi )
|
|
670 if maps:
|
|
671 self.make_legend( maps, fnames_meta, self.args.legend_file )
|
|
672
|
|
673
|
|
674
|
|
675 class ReadCmd:
|
|
676
|
|
677 def __init__( self ):
|
|
678 import argparse as ap
|
|
679 import textwrap
|
|
680
|
|
681 p = ap.ArgumentParser( description= "TBA" )
|
|
682 arg = p.add_argument
|
|
683
|
|
684 arg( '-i', '--inp', '--in', metavar='INPUT_FILE', type=str, nargs='?', default=sys.stdin,
|
|
685 help= "The input matrix" )
|
|
686 arg( '-o', '--out', metavar='OUTPUT_FILE', type=str, nargs='?', default=None,
|
|
687 help= "The output image file [image on screen of not specified]" )
|
|
688 arg( '--legend_file', metavar='LEGEND_FILE', type=str, nargs='?', default=None,
|
|
689 help= "The output file for the legend of the provided metadata" )
|
|
690
|
|
691 input_types = [DataMatrix.datatype,DistMatrix.datatype]
|
|
692 arg( '-t', '--input_type', metavar='INPUT_TYPE', type=str, choices = input_types,
|
|
693 default='data_matrix',
|
|
694 help= "The input type can be a data matrix or distance matrix [default data_matrix]" )
|
|
695
|
|
696 DataMatrix.input_parameters( p )
|
|
697 DistMatrix.input_parameters( p )
|
|
698 HClustering.input_parameters( p )
|
|
699 Heatmap.input_parameters( p )
|
|
700
|
|
701 self.args = p.parse_args()
|
|
702
|
|
703 def check_consistency( self ):
|
|
704 pass
|
|
705
|
|
706 def get_args( self ):
|
|
707 return self.args
|
|
708
|
|
709 if __name__ == '__main__':
|
|
710
|
|
711 read = ReadCmd( )
|
|
712 read.check_consistency()
|
|
713 args = read.get_args()
|
|
714
|
|
715 if args.input_type == DataMatrix.datatype:
|
|
716 dm = DataMatrix( args.inp, args )
|
|
717 if args.out_table:
|
|
718 dm.save_matrix( args.out_table )
|
|
719
|
|
720 distm = DistMatrix( dm.get_numpy_matrix(), args = args )
|
|
721 if not args.no_sclustering:
|
|
722 distm.compute_s_dists()
|
|
723 if not args.no_fclustering:
|
|
724 distm.compute_f_dists()
|
|
725 elif args.input_type == DataMatrix.datatype:
|
|
726 # distm = read...
|
|
727 pass
|
|
728 else:
|
|
729 pass
|
|
730
|
|
731 cl = HClustering( distm.get_s_dm(), distm.get_f_dm(), args = args )
|
|
732 if not args.no_sclustering:
|
|
733 cl.shcluster()
|
|
734 if not args.no_fclustering:
|
|
735 cl.fhcluster()
|
|
736
|
|
737 hmp = dm.get_numpy_matrix()
|
|
738 fnames = dm.get_fnames()
|
|
739 snames = dm.get_snames()
|
|
740 fnames_meta = snames.names[1:]
|
|
741 #if not args.no_sclustering or not args.no_fclustering ):
|
|
742
|
|
743 hmp = cl.get_reordered_matrix( hmp, sclustering = not args.no_sclustering, fclustering = not args.no_fclustering )
|
|
744 if not args.no_sclustering:
|
|
745 snames = cl.get_reordered_sample_labels( snames )
|
|
746 if not args.no_fclustering:
|
|
747 fnames = cl.get_reordered_feature_labels( fnames )
|
|
748 else:
|
|
749 fnames = fnames[::-1]
|
|
750
|
|
751 hm = Heatmap( hmp, cl.sdendrogram, cl.fdendrogram, snames, fnames, fnames_meta, args = args )
|
|
752 hm.draw()
|
|
753
|
|
754
|
|
755
|
|
756
|
|
757
|
|
758
|