annotate hclust2/hclust2.py @ 60:39126c375dd4 draft default tip

Uploaded
author george-weingart
date Sat, 06 Sep 2014 15:42:27 -0400
parents cac6247cb1d3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
2
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
3 import sys
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
4 import numpy as np
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
5 import matplotlib.ticker as ticker
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
6 import scipy.spatial.distance as spd
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
7 import scipy.cluster.hierarchy as sph
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
8 from scipy import stats
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
9 import matplotlib
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
10 #matplotlib.use('Agg')
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
11 import pylab
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
12 import pandas as pd
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
13 from matplotlib.patches import Rectangle
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
14 from mpl_toolkits.axes_grid1 import make_axes_locatable
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
15 import matplotlib.pyplot as plt
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
16 import matplotlib.gridspec as gridspec
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
17 import cPickle as pickle
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
18
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
19 sys.setrecursionlimit(10000)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
20
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
21 # samples on rows
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
22
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
23 class SqrtNorm(matplotlib.colors.Normalize):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
24 """
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
25 Normalize a given value to the 0-1 range on a square root scale
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
26 """
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
27 def __call__(self, value, clip=None):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
28 if clip is None:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
29 clip = self.clip
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
30
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
31 result, is_scalar = self.process_value(value)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
32
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
33 result = np.ma.masked_less_equal(result, 0, copy=False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
34
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
35 self.autoscale_None(result)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
36 vmin, vmax = self.vmin, self.vmax
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
37 if vmin > vmax:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
38 raise ValueError("minvalue must be less than or equal to maxvalue")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
39 elif vmin <= 0:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
40 raise ValueError("values must all be positive")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
41 elif vmin == vmax:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
42 result.fill(0)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
43 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
44 if clip:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
45 mask = np.ma.getmask(result)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
46 result = np.ma.array(np.clip(result.filled(vmax), vmin, vmax),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
47 mask=mask)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
48 # in-place equivalent of above can be much faster
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
49 resdat = result.data
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
50 mask = result.mask
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
51 if mask is np.ma.nomask:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
52 mask = (resdat <= 0)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
53 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
54 mask |= resdat <= 0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
55 matplotlib.cbook._putmask(resdat, mask, 1)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
56 np.sqrt(resdat, resdat)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
57 resdat -= np.sqrt(vmin)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
58 resdat /= (np.sqrt(vmax) - np.sqrt(vmin))
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
59 result = np.ma.array(resdat, mask=mask, copy=False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
60 if is_scalar:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
61 result = result[0]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
62 return result
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
63
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
64 def inverse(self, value):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
65 if not self.scaled():
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
66 raise ValueError("Not invertible until scaled")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
67 vmin, vmax = self.vmin, self.vmax
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
68
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
69 if matplotlib.cbook.iterable(value):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
70 val = np.ma.asarray(value)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
71 return vmin * np.ma.power((vmax / vmin), val)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
72 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
73 return vmin * pow((vmax / vmin), value)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
74
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
75 def autoscale(self, A):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
76 '''
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
77 Set *vmin*, *vmax* to min, max of *A*.
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
78 '''
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
79 A = np.ma.masked_less_equal(A, 0, copy=False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
80 self.vmin = np.ma.min(A)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
81 self.vmax = np.ma.max(A)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
82
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
83 def autoscale_None(self, A):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
84 ' autoscale only None-valued vmin or vmax'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
85 if self.vmin is not None and self.vmax is not None:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
86 return
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
87 A = np.ma.masked_less_equal(A, 0, copy=False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
88 if self.vmin is None:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
89 self.vmin = np.ma.min(A)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
90 if self.vmax is None:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
91 self.vmax = np.ma.max(A)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
92
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
93 class DataMatrix:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
94 datatype = 'data_matrix'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
95
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
96 @staticmethod
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
97 def input_parameters( parser ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
98 dm_param = parser.add_argument_group('Input data matrix parameters')
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
99 arg = dm_param.add_argument
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
100
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
101 arg( '--sep', type=str, default='\t' )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
102 arg( '--out_table', type=str, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
103 help = 'Write processed data matrix to file' )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
104 arg( '--fname_row', type=int, default=0,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
105 help = "row number containing the names of the features "
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
106 "[default 0, specify -1 if no names are present in the matrix")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
107 arg( '--sname_row', type=int, default=0,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
108 help = "column number containing the names of the samples "
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
109 "[default 0, specify -1 if no names are present in the matrix")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
110 arg( '--metadata_rows', type=str, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
111 help = "Row numbers to use as metadata"
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
112 "[default None, meaning no metadata")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
113 arg( '--skip_rows', type=str, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
114 help = "Row numbers to skip (0-indexed, comma separated) from the input file"
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
115 "[default None, meaning no rows skipped")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
116 arg( '--sperc', type=int, default=90,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
117 help = "Percentile of sample value distribution for sample selection" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
118 arg( '--fperc', type=int, default=90,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
119 help = "Percentile of feature value distribution for sample selection" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
120 arg( '--stop', type=int, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
121 help = "Number of top samples to select (ordering based on percentile specified by --sperc)" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
122 arg( '--ftop', type=int, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
123 help = "Number of top features to select (ordering based on percentile specified by --fperc)" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
124 arg( '--def_na', type=float, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
125 help = "Set the default value for missing values [default None which means no replacement]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
126
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
127 def __init__( self, input_file, args ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
128 self.args = args
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
129 self.metadata_rows = []
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
130 self.metadata_table = None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
131 toskip = [int(l) for l in self.args.skip_rows.split(",")] if self.args.skip_rows else []
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
132 if self.args.metadata_rows:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
133 self.metadata_rows = list([int(a) for a in self.args.metadata_rows.split(",")])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
134 mdr = self.metadata_rows[::]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
135 for t in toskip:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
136 for i,m in enumerate(mdr):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
137 if t <= m:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
138 self.metadata_rows[i] -= 1
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
139 if self.metadata_rows:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
140 header = [self.args.fname_row]+self.metadata_rows if self.args.fname_row > -1 else self.metadata_rows
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
141 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
142 header = self.args.fname_row if self.args.fname_row > -1 else None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
143 self.table = pd.read_table(
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
144 input_file, sep = self.args.sep, # skipinitialspace = True,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
145 skiprows = sorted(toskip) if isinstance(toskip, list) else toskip,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
146 header = sorted(header) if isinstance(header, list) else header,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
147 index_col = self.args.sname_row if self.args.sname_row > -1 else None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
148 )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
149
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
150 def select( perc, top ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
151 self.table['perc'] = self.table.apply(lambda x: stats.scoreatpercentile(x,perc),axis=1)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
152 m = sorted(self.table['perc'])[-top]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
153 self.table = self.table[self.table['perc'] >= m ]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
154 del self.table['perc']
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
155
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
156 if not self.args.def_na is None:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
157 self.table = self.table.fillna( self.args.def_na )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
158
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
159 if self.args.ftop:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
160 select( self.args.fperc, self.args.ftop )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
161
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
162 if self.args.stop:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
163 self.table = self.table.T
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
164 select( self.args.sperc, self.args.stop )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
165 self.table = self.table.T
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
166
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
167
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
168 # add missing values
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
169
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
170 def get_numpy_matrix( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
171 return np.matrix(self.table)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
172
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
173 #def get_metadata_matrix( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
174 # return self.table.columns
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
175
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
176 def get_snames( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
177 #return list(self.table.index)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
178 return self.table.columns
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
179
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
180 def get_fnames( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
181 #print self.table.columns.names
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
182 #print self.table.columns
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
183 return list(self.table.index)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
184
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
185 def get_averages(self, by_row = True) :
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
186 return self.table.mean(axis = 1 if by_row else 0)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
187
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
188 def save_matrix( self, output_file ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
189 self.table.to_csv( output_file, sep = '\t' )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
190
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
191 class DistMatrix:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
192 datatype = 'distance_matrix'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
193
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
194 @staticmethod
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
195 def input_parameters( parser ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
196 dm_param = parser.add_argument_group('Distance parameters')
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
197 arg = dm_param.add_argument
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
198
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
199 dist_funcs = [ "euclidean","minkowski","cityblock","seuclidean",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
200 "sqeuclidean","cosine","correlation","hamming",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
201 "jaccard","chebyshev","canberra","braycurtis",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
202 "mahalanobis","yule","matching","dice",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
203 "kulsinski","rogerstanimoto","russellrao","sokalmichener",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
204 "sokalsneath","wminkowski","ward" ]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
205
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
206 arg( '--f_dist_f', type=str, default="correlation",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
207 help = "Distance function for features [default correlation]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
208 arg( '--s_dist_f', type=str, default="euclidean",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
209 help = "Distance function for sample [default euclidean]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
210 arg( '--load_dist_matrix_f', type=str, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
211 help = "Load the distance matrix to be used for features [default None].")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
212 arg( '--load_dist_matrix_s', type=str, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
213 help = "Load the distance matrix to be used for samples [default None].")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
214 arg( '--save_dist_matrix_f', type=str, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
215 help = "Save the distance matrix for features to file [default None].")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
216 arg( '--save_dist_matrix_s', type=str, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
217 help = "Save the distance matrix for samples to file [default None].")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
218
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
219 def __init__( self, data, args = None ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
220 self.sdf = args.s_dist_f
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
221 self.fdf = args.f_dist_f
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
222
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
223 self.s_cdist_matrix, self.f_cdist_matrix = None, None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
224
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
225 self.numpy_full_matrix = (data if
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
226 type(data) == np.matrixlib.defmatrix.matrix else None)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
227
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
228 def compute_f_dists( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
229 if args.load_dist_matrix_f:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
230 with open( args.load_dist_matrix_f ) as inp:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
231 self.f_cdist_matrix = pickle.load( inp )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
232
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
233 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
234 dt = self.numpy_full_matrix
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
235
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
236 if self.fdf == "spearman":
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
237 dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
238 self.f_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
239 return
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
240
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
241 if self.fdf == "pearson":
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
242 self.fdf = 'correlation'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
243
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
244 self.f_cdist_matrix = spd.pdist( dt, self.fdf )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
245
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
246 if args.save_dist_matrix_f:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
247 with open( args.save_dist_matrix_f, "wb" ) as outf:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
248 pickle.dump( self.f_cdist_matrix, outf )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
249
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
250 def compute_s_dists( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
251 if args.load_dist_matrix_s:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
252 with open( args.load_dist_matrix_s ) as inp:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
253 self.s_cdist_matrix = pickle.load( inp )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
254 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
255 dt = self.numpy_full_matrix.transpose()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
256
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
257 if self.sdf == "spearman":
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
258 dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
259 self.s_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
260 return
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
261
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
262 if self.sdf == "pearson":
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
263 self.sdf = 'correlation'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
264
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
265 self.s_cdist_matrix = spd.pdist( dt, self.sdf )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
266
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
267 if args.save_dist_matrix_s:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
268 with open( args.save_dist_matrix_s, "wb" ) as outf:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
269 pickle.dump( self.s_cdist_matrix, outf )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
270
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
271 def get_s_dm( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
272 return self.s_cdist_matrix
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
273
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
274 def get_f_dm( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
275 return self.f_cdist_matrix
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
276
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
277 class HClustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
278 datatype = 'hclustering'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
279
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
280 @staticmethod
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
281 def input_parameters( parser ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
282 cl_param = parser.add_argument_group('Clustering parameters')
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
283 arg = cl_param.add_argument
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
284
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
285 linkage_method = [ "single","complete","average",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
286 "weighted","centroid","median",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
287 "ward" ]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
288 arg( '--no_fclustering', action='store_true',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
289 help = "avoid clustering features" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
290 arg( '--no_sclustering', action='store_true',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
291 help = "avoid clustering samples" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
292 arg( '--flinkage', type=str, default="average",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
293 help = "Linkage method for feature clustering [default average]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
294 arg( '--slinkage', type=str, default="average",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
295 help = "Linkage method for sample clustering [default average]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
296
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
297 def get_reordered_matrix( self, matrix, sclustering = True, fclustering = True ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
298 if not sclustering and not fclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
299 return matrix
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
300
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
301 idx1 = self.sdendrogram['leaves'] if sclustering else None # !!!!!!!!!!!
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
302 idx2 = self.fdendrogram['leaves'][::-1] if fclustering else None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
303
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
304 if sclustering and fclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
305 return matrix[idx2,:][:,idx1]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
306 if fclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
307 return matrix[idx2,:][:]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
308 if sclustering: # !!!!!!!!!!!!
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
309 return matrix[:][:,idx1]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
310
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
311 def get_reordered_sample_labels( self, slabels ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
312 return [slabels[i] for i in self.sdendrogram['leaves']]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
313
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
314 def get_reordered_feature_labels( self, flabels ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
315 return [flabels[i] for i in self.fdendrogram['leaves']]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
316
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
317 def __init__( self, s_dm, f_dm, args = None ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
318 self.s_dm = s_dm
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
319 self.f_dm = f_dm
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
320 self.args = args
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
321 self.sclusters = None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
322 self.fclusters = None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
323 self.sdendrogram = None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
324 self.fdendrogram = None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
325
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
326 def shcluster( self, dendrogram = True ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
327 self.shclusters = sph.linkage( self.s_dm, args.slinkage )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
328 if dendrogram:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
329 self.sdendrogram = sph.dendrogram( self.shclusters, no_plot=True )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
330
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
331 def fhcluster( self, dendrogram = True ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
332 self.fhclusters = sph.linkage( self.f_dm, args.flinkage )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
333 if dendrogram:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
334 self.fdendrogram = sph.dendrogram( self.fhclusters, no_plot=True )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
335
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
336 def get_shclusters( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
337 return self.shclusters
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
338
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
339 def get_fhclusters( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
340 return self.fhclusters
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
341
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
342 def get_sdendrogram( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
343 return self.sdendrogram
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
344
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
345 def get_fdendrogram( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
346 return self.fdendrogram
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
347
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
348
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
349 class Heatmap:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
350 datatype = 'heatmap'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
351
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
352 bbcyr = {'red': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
353 (0.25, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
354 (0.50, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
355 (0.75, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
356 (1.0, 1.0, 1.0)),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
357 'green': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
358 (0.25, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
359 (0.50, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
360 (0.75, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
361 (1.0, 0.0, 1.0)),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
362 'blue': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
363 (0.25, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
364 (0.50, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
365 (0.75, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
366 (1.0, 0.0, 1.0))}
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
367
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
368 bbcry = {'red': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
369 (0.25, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
370 (0.50, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
371 (0.75, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
372 (1.0, 1.0, 1.0)),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
373 'green': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
374 (0.25, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
375 (0.50, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
376 (0.75, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
377 (1.0, 1.0, 1.0)),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
378 'blue': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
379 (0.25, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
380 (0.50, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
381 (0.75, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
382 (1.0, 0.0, 1.0))}
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
383
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
384 bcry = {'red': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
385 (0.33, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
386 (0.66, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
387 (1.0, 1.0, 1.0)),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
388 'green': ( (0.0, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
389 (0.33, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
390 (0.66, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
391 (1.0, 1.0, 1.0)),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
392 'blue': ( (0.0, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
393 (0.33, 1.0, 1.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
394 (0.66, 0.0, 0.0),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
395 (1.0, 0.0, 1.0))}
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
396
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
397
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
398 my_colormaps = [ ('bbcyr',bbcyr),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
399 ('bbcry',bbcry),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
400 ('bcry',bcry)]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
401
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
402 dcols = ['#ca0000','#0087ff','#00ba1d','#cf00ff','#00dbe2','#ffaf00','#0017f4','#006012','#e175ff','#877878','#050505','#b5cf00','#ff8a8a','#aa6400','#50008a','#00ff58']
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
403
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
404
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
405 @staticmethod
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
406 def input_parameters( parser ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
407 hm_param = parser.add_argument_group('Heatmap options')
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
408 arg = hm_param.add_argument
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
409
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
410 arg( '--dpi', type=int, default=150,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
411 help = "Image resolution in dpi [default 150]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
412 arg( '-l', '--log_scale', action='store_true',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
413 help = "Log scale" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
414 arg( '-s', '--sqrt_scale', action='store_true',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
415 help = "Square root scale" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
416 arg( '--no_slabels', action='store_true',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
417 help = "Do not show sample labels" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
418 arg( '--minv', type=float, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
419 help = "Minimum value to display in the color map [default None meaning automatic]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
420 arg( '--maxv', type=float, default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
421 help = "Maximum value to display in the color map [default None meaning automatic]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
422 arg( '--no_flabels', action='store_true',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
423 help = "Do not show feature labels" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
424 arg( '--max_slabel_len', type=int, default=25,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
425 help = "Max number of chars to report for sample labels [default 15]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
426 arg( '--max_flabel_len', type=int, default=25,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
427 help = "Max number of chars to report for feature labels [default 15]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
428 arg( '--flabel_size', type=int, default=10,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
429 help = "Feature label font size [default 10]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
430 arg( '--slabel_size', type=int, default=10,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
431 help = "Sample label font size [default 10]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
432 arg( '--fdend_width', type=float, default=1.0,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
433 help = "Width of the feature dendrogram [default 1 meaning 100%% of default heatmap width]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
434 arg( '--sdend_height', type=float, default=1.0,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
435 help = "Height of the sample dendrogram [default 1 meaning 100%% of default heatmap height]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
436 arg( '--metadata_height', type=float, default=.05,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
437 help = "Height of the metadata panel [default 0.05 meaning 5%% of default heatmap height]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
438 arg( '--metadata_separation', type=float, default=.01,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
439 help = "Distance between the metadata and data panels. [default 0.001 meaning 0.1%% of default heatmap height]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
440 arg( '--image_size', type=float, default=8,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
441 help = "Size of the largest between width and eight size for the image in inches [default 8]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
442 arg( '--cell_aspect_ratio', type=float, default=1.0,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
443 help = "Aspect ratio between width and height for the cells of the heatmap [default 1.0]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
444 col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
445 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
446 'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
447 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
448 'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
449 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
450 'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
451 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
452 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
453 'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
454 'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in Heatmap.my_colormaps]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
455 for n,c in Heatmap.my_colormaps:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
456 my_cmap = matplotlib.colors.LinearSegmentedColormap(n,c,256)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
457 pylab.register_cmap(name=n,cmap=my_cmap)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
458 arg( '-c','--colormap', type=str, choices = col_maps, default = 'bbcry' )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
459 arg( '--bottom_c', type=str, default = None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
460 help = "Color to use for cells below the minimum value of the scale [default None meaning bottom color of the scale]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
461 arg( '--top_c', type=str, default = None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
462 help = "Color to use for cells below the maximum value of the scale [default None meaning bottom color of the scale]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
463 arg( '--nan_c', type=str, default = None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
464 help = "Color to use for nan cells [default None]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
465
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
466
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
467
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
468 """
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
469 arg( '--', type=str, default="average",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
470 help = "Linkage method for feature clustering [default average]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
471 arg( '--slinkage', type=str, default="average",
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
472 help = "Linkage method for sample clustering [default average]")
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
473 """
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
474
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
475 def __init__( self, numpy_matrix, sdendrogram, fdendrogram, snames, fnames, fnames_meta, args = None ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
476 self.numpy_matrix = numpy_matrix
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
477 self.sdendrogram = sdendrogram
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
478 self.fdendrogram = fdendrogram
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
479 self.snames = snames
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
480 self.fnames = fnames
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
481 self.fnames_meta = fnames_meta
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
482 self.ns,self.nf = self.numpy_matrix.shape
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
483 self.args = args
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
484
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
485 def make_legend( self, dmap, titles, out_fn ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
486 figlegend = plt.figure(figsize=(1+3*len(titles),2), frameon = False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
487
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
488 gs = gridspec.GridSpec( 1, len(dmap), wspace = 2.0 )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
489
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
490 for i,(d,title) in enumerate(zip(dmap,titles)):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
491 legax = plt.subplot(gs[i],frameon = False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
492 for k,v in sorted(d.items(),key=lambda x:x[1]):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
493 rect = Rectangle( [0.0, 0.0], 0.0, 0.0,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
494 facecolor = self.dcols[v%len(self.dcols)],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
495 label = k,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
496 edgecolor='b', lw = 0.0)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
497
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
498 legax.add_patch(rect)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
499 #remove_splines( legax )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
500 legax.set_xticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
501 legax.set_yticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
502 legax.legend( loc = 2, frameon = False, title = title)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
503 """
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
504 ncol = legend_ncol, bbox_to_anchor=(1.01, 3.),
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
505 borderpad = 0.0, labelspacing = 0.0,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
506 handlelength = 0.5, handletextpad = 0.3,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
507 borderaxespad = 0.0, columnspacing = 0.3,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
508 prop = {'size':fontsize}, frameon = False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
509 """
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
510 if out_fn:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
511 figlegend.savefig(out_fn, bbox_inches='tight')
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
512
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
513 def draw( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
514
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
515 rat = float(self.ns)/self.nf
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
516 rat *= self.args.cell_aspect_ratio
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
517 x,y = (self.args.image_size,rat*self.args.image_size) if rat < 1 else (self.args.image_size/rat,self.args.image_size)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
518 fig = plt.figure( figsize=(x,y), facecolor = 'w' )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
519
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
520 cm = pylab.get_cmap(self.args.colormap)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
521 bottom_col = [ cm._segmentdata['red'][0][1],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
522 cm._segmentdata['green'][0][1],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
523 cm._segmentdata['blue'][0][1] ]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
524 if self.args.bottom_c:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
525 bottom_col = self.args.bottom_c
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
526 cm.set_under( bottom_col )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
527 top_col = [ cm._segmentdata['red'][-1][1],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
528 cm._segmentdata['green'][-1][1],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
529 cm._segmentdata['blue'][-1][1] ]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
530 if self.args.top_c:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
531 top_col = self.args.top_c
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
532 cm.set_over( top_col )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
533
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
534 if self.args.nan_c:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
535 cm.set_bad( self.args.nan_c )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
536
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
537 def make_ticklabels_invisible(ax):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
538 for tl in ax.get_xticklabels() + ax.get_yticklabels():
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
539 tl.set_visible(False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
540 ax.set_xticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
541 ax.set_yticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
542
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
543 def remove_splines( ax ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
544 for v in ['right','left','top','bottom']:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
545 ax.spines[v].set_color('none')
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
546
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
547 def shrink_labels( labels, n ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
548 shrink = lambda x: x[:n/2]+" [...] "+x[-n/2:]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
549 return [(shrink(str(l)) if len(str(l)) > n else l) for l in labels]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
550
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
551
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
552 #gs = gridspec.GridSpec( 4, 2,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
553 # width_ratios=[1.0-fr_ns,fr_ns],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
554 # height_ratios=[.03,0.03,1.0-fr_nf,fr_nf],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
555 # wspace = 0.0, hspace = 0.0 )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
556
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
557 fr_ns = float(self.ns)/max([self.ns,self.nf])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
558 fr_nf = float(self.nf)/max([self.ns,self.nf])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
559
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
560 buf_space = 0.05
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
561 minv = min( [buf_space*8, 8*rat*buf_space] )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
562 if minv < 0.05:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
563 buf_space /= minv/0.05
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
564 metadata_height = self.args.metadata_height if type(snames[0]) is tuple and len(snames[0]) > 1 else 0.000001
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
565 gs = gridspec.GridSpec( 6, 4,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
566 width_ratios=[ buf_space, buf_space*2, .08*self.args.fdend_width,0.9],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
567 height_ratios=[ buf_space, buf_space*2, .08*self.args.sdend_height, metadata_height, self.args.metadata_separation, 0.9],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
568 wspace = 0.0, hspace = 0.0 )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
569
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
570 ax_hm = plt.subplot(gs[23], axisbg = bottom_col )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
571 ax_metadata = plt.subplot(gs[15], axisbg = bottom_col )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
572 ax_hm_y2 = ax_hm.twinx()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
573
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
574 norm_f = matplotlib.colors.Normalize
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
575 if self.args.log_scale:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
576 norm_f = matplotlib.colors.LogNorm
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
577 elif self.args.sqrt_scale:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
578 norm_f = SqrtNorm
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
579 minv, maxv = 0.0, None
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
580
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
581 maps, values, ndv = [], [], 0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
582 if type(snames[0]) is tuple and len(snames[0]) > 1:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
583 metadata = zip(*[list(s[1:]) for s in snames])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
584 for m in metadata:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
585 mmap = dict([(v[1],ndv+v[0]) for v in enumerate(list(set(m)))])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
586 values.append([mmap[v] for v in m])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
587 ndv += len(mmap)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
588 maps.append(mmap)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
589 dcols = []
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
590 mdmat = np.matrix(values)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
591 while len(dcols) < ndv:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
592 dcols += self.dcols
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
593 cmap = matplotlib.colors.ListedColormap(dcols[:ndv])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
594 bounds = [float(f)-0.5 for f in range(ndv+1)]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
595 imm = ax_metadata.imshow( mdmat, #origin='lower',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
596 interpolation = 'nearest',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
597 aspect='auto',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
598 extent = [0, self.nf, 0, self.ns],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
599 cmap=cmap,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
600 vmin=bounds[0],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
601 vmax=bounds[-1],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
602 )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
603 remove_splines( ax_metadata )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
604 ax_metadata_y2 = ax_metadata.twinx()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
605 ax_metadata_y2.set_ylim(0,len(self.fnames_meta))
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
606 ax_metadata.set_yticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
607 ax_metadata_y2.set_ylim(0,len(self.fnames_meta))
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
608 ax_metadata_y2.tick_params(length=0)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
609 ax_metadata_y2.set_yticks(np.arange(len(self.fnames_meta))+0.5)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
610 ax_metadata_y2.set_yticklabels(self.fnames_meta[::-1], va='center',size=self.args.flabel_size)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
611 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
612 ax_metadata.set_yticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
613
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
614 ax_metadata.set_xticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
615
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
616 im = ax_hm.imshow( self.numpy_matrix, #origin='lower',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
617 interpolation = 'nearest', aspect='auto',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
618 extent = [0, self.nf, 0, self.ns],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
619 cmap=cm,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
620 vmin=self.args.minv,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
621 vmax=self.args.maxv,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
622 norm = norm_f( vmin=minv if minv > 0.0 else None, vmax=maxv)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
623 )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
624
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
625 #ax_hm.set_ylim([0,800])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
626 ax_hm.set_xticks(np.arange(len(list(snames)))+0.5)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
627 if not self.args.no_slabels:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
628 snames_short = shrink_labels( list([s[0] for s in snames]) if type(snames[0]) is tuple else snames, self.args.max_slabel_len )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
629 ax_hm.set_xticklabels(snames_short,rotation=90,va='top',ha='center',size=self.args.slabel_size)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
630 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
631 ax_hm.set_xticklabels([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
632 ax_hm_y2.set_ylim([0,self.ns])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
633 ax_hm_y2.set_yticks(np.arange(len(fnames))+0.5)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
634 if not self.args.no_flabels:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
635 fnames_short = shrink_labels( fnames, self.args.max_flabel_len )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
636 ax_hm_y2.set_yticklabels(fnames_short,va='center',size=self.args.flabel_size)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
637 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
638 ax_hm_y2.set_yticklabels( [] )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
639 ax_hm.set_yticks([])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
640 remove_splines( ax_hm )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
641 ax_hm.tick_params(length=0)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
642 ax_hm_y2.tick_params(length=0)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
643 #ax_hm.set_xlim([0,self.ns])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
644 ax_cm = plt.subplot(gs[3], axisbg = 'r', frameon = False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
645 #fig.colorbar(im, ax_cm, orientation = 'horizontal', spacing = 'proportional', format = ticker.LogFormatterMathtext() )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
646 fig.colorbar(im, ax_cm, orientation = 'horizontal', spacing='proportional' if self.args.sqrt_scale else 'uniform' ) # , format = ticker.LogFormatterMathtext() )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
647
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
648 if not self.args.no_sclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
649 ax_den_top = plt.subplot(gs[11], axisbg = 'r', frameon = False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
650 sph._plot_dendrogram( self.sdendrogram['icoord'], self.sdendrogram['dcoord'], self.sdendrogram['ivl'],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
651 self.ns + 1, self.nf + 1, 1, 'top', no_labels=True,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
652 color_list=self.sdendrogram['color_list'] )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
653 ymax = max([max(a) for a in self.sdendrogram['dcoord']])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
654 ax_den_top.set_ylim([0,ymax])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
655 make_ticklabels_invisible( ax_den_top )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
656 if not self.args.no_fclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
657 ax_den_right = plt.subplot(gs[22], axisbg = 'b', frameon = False)
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
658 sph._plot_dendrogram( self.fdendrogram['icoord'], self.fdendrogram['dcoord'], self.fdendrogram['ivl'],
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
659 self.ns + 1, self.nf + 1, 1, 'right', no_labels=True,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
660 color_list=self.fdendrogram['color_list'] )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
661 xmax = max([max(a) for a in self.fdendrogram['dcoord']])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
662 ax_den_right.set_xlim([xmax,0])
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
663 make_ticklabels_invisible( ax_den_right )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
664
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
665
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
666 if not self.args.out:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
667 plt.show( )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
668 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
669 fig.savefig( self.args.out, bbox_inches='tight', dpi = self.args.dpi )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
670 if maps:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
671 self.make_legend( maps, fnames_meta, self.args.legend_file )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
672
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
673
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
674
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
675 class ReadCmd:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
676
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
677 def __init__( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
678 import argparse as ap
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
679 import textwrap
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
680
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
681 p = ap.ArgumentParser( description= "TBA" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
682 arg = p.add_argument
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
683
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
684 arg( '-i', '--inp', '--in', metavar='INPUT_FILE', type=str, nargs='?', default=sys.stdin,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
685 help= "The input matrix" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
686 arg( '-o', '--out', metavar='OUTPUT_FILE', type=str, nargs='?', default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
687 help= "The output image file [image on screen of not specified]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
688 arg( '--legend_file', metavar='LEGEND_FILE', type=str, nargs='?', default=None,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
689 help= "The output file for the legend of the provided metadata" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
690
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
691 input_types = [DataMatrix.datatype,DistMatrix.datatype]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
692 arg( '-t', '--input_type', metavar='INPUT_TYPE', type=str, choices = input_types,
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
693 default='data_matrix',
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
694 help= "The input type can be a data matrix or distance matrix [default data_matrix]" )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
695
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
696 DataMatrix.input_parameters( p )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
697 DistMatrix.input_parameters( p )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
698 HClustering.input_parameters( p )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
699 Heatmap.input_parameters( p )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
700
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
701 self.args = p.parse_args()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
702
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
703 def check_consistency( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
704 pass
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
705
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
706 def get_args( self ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
707 return self.args
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
708
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
709 if __name__ == '__main__':
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
710
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
711 read = ReadCmd( )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
712 read.check_consistency()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
713 args = read.get_args()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
714
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
715 if args.input_type == DataMatrix.datatype:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
716 dm = DataMatrix( args.inp, args )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
717 if args.out_table:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
718 dm.save_matrix( args.out_table )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
719
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
720 distm = DistMatrix( dm.get_numpy_matrix(), args = args )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
721 if not args.no_sclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
722 distm.compute_s_dists()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
723 if not args.no_fclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
724 distm.compute_f_dists()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
725 elif args.input_type == DataMatrix.datatype:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
726 # distm = read...
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
727 pass
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
728 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
729 pass
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
730
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
731 cl = HClustering( distm.get_s_dm(), distm.get_f_dm(), args = args )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
732 if not args.no_sclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
733 cl.shcluster()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
734 if not args.no_fclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
735 cl.fhcluster()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
736
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
737 hmp = dm.get_numpy_matrix()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
738 fnames = dm.get_fnames()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
739 snames = dm.get_snames()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
740 fnames_meta = snames.names[1:]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
741 #if not args.no_sclustering or not args.no_fclustering ):
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
742
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
743 hmp = cl.get_reordered_matrix( hmp, sclustering = not args.no_sclustering, fclustering = not args.no_fclustering )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
744 if not args.no_sclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
745 snames = cl.get_reordered_sample_labels( snames )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
746 if not args.no_fclustering:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
747 fnames = cl.get_reordered_feature_labels( fnames )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
748 else:
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
749 fnames = fnames[::-1]
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
750
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
751 hm = Heatmap( hmp, cl.sdendrogram, cl.fdendrogram, snames, fnames, fnames_meta, args = args )
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
752 hm.draw()
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
753
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
754
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
755
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
756
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
757
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
758