| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 import sys | 
|  | 4 import numpy as np | 
|  | 5 import matplotlib | 
|  | 6 matplotlib.use('Agg') | 
|  | 7 import scipy | 
|  | 8 import pylab | 
|  | 9 import scipy.cluster.hierarchy as sch | 
|  | 10 import scipy.spatial.distance as dis | 
|  | 11 from scipy import stats | 
|  | 12 | 
|  | 13 # User defined color maps (in addition to matplotlib ones) | 
|  | 14 bbcyr = {'red':  (  (0.0, 0.0, 0.0), | 
|  | 15                     (0.25, 0.0, 0.0), | 
|  | 16                     (0.50, 0.0, 0.0), | 
|  | 17                     (0.75, 1.0, 1.0), | 
|  | 18                     (1.0, 1.0, 1.0)), | 
|  | 19          'green': ( (0.0, 0.0, 0.0), | 
|  | 20                     (0.25, 0.0, 0.0), | 
|  | 21                     (0.50, 1.0, 1.0), | 
|  | 22                     (0.75, 1.0, 1.0), | 
|  | 23                     (1.0, 0.0, 1.0)), | 
|  | 24          'blue': (  (0.0, 0.0, 0.0), | 
|  | 25                     (0.25, 1.0, 1.0), | 
|  | 26                     (0.50, 1.0, 1.0), | 
|  | 27                     (0.75, 0.0, 0.0), | 
|  | 28                     (1.0, 0.0, 1.0))} | 
|  | 29 | 
|  | 30 bbcry = {'red':  (  (0.0, 0.0, 0.0), | 
|  | 31                     (0.25, 0.0, 0.0), | 
|  | 32                     (0.50, 0.0, 0.0), | 
|  | 33                     (0.75, 1.0, 1.0), | 
|  | 34                     (1.0, 1.0, 1.0)), | 
|  | 35          'green': ( (0.0, 0.0, 0.0), | 
|  | 36                     (0.25, 0.0, 0.0), | 
|  | 37                     (0.50, 1.0, 1.0), | 
|  | 38                     (0.75, 0.0, 0.0), | 
|  | 39                     (1.0, 1.0, 1.0)), | 
|  | 40          'blue': (  (0.0, 0.0, 0.0), | 
|  | 41                     (0.25, 1.0, 1.0), | 
|  | 42                     (0.50, 1.0, 1.0), | 
|  | 43                     (0.75, 0.0, 0.0), | 
|  | 44                     (1.0, 0.0, 1.0))} | 
|  | 45 my_colormaps = [    ('bbcyr',bbcyr), | 
|  | 46                     ('bbcry',bbcry)] | 
|  | 47 | 
|  | 48 | 
|  | 49 | 
|  | 50 def read_params(args): | 
|  | 51     import argparse as ap | 
|  | 52     import textwrap | 
|  | 53 | 
|  | 54     p = ap.ArgumentParser( description= "TBA" ) | 
|  | 55 | 
|  | 56     p.add_argument( '--in', '--inp', metavar='INPUT_FILE', type=str, | 
|  | 57                     nargs='?', default=sys.stdin, | 
|  | 58                     help= "the input archive " ) | 
|  | 59 | 
|  | 60     p.add_argument( '--out', metavar='OUTPUT_FILE', type=str, | 
|  | 61                     nargs = '?', default=None, | 
|  | 62                     help= " the output file, image on screen" | 
|  | 63                           " if not specified. " ) | 
|  | 64 | 
|  | 65     p.add_argument( '-m', metavar='method', type=str, | 
|  | 66                     choices=[   "single","complete","average", | 
|  | 67                                 "weighted","centroid","median", | 
|  | 68                                 "ward" ], | 
|  | 69                     default="average" ) | 
|  | 70 | 
|  | 71     dist_funcs = [  "euclidean","minkowski","cityblock","seuclidean", | 
|  | 72                     "sqeuclidean","cosine","correlation","hamming", | 
|  | 73                     "jaccard","chebyshev","canberra","braycurtis", | 
|  | 74                     "mahalanobis","yule","matching","dice", | 
|  | 75                     "kulsinski","rogerstanimoto","russellrao","sokalmichener", | 
|  | 76                     "sokalsneath","wminkowski","ward"] | 
|  | 77     p.add_argument( '-d', metavar='distance function', type=str, | 
|  | 78                     choices=dist_funcs, | 
|  | 79                     default="euclidean" ) | 
|  | 80     p.add_argument( '-f', metavar='distance function for features', type=str, | 
|  | 81                     choices=dist_funcs, | 
|  | 82                     default="d" ) | 
|  | 83 | 
|  | 84     p.add_argument( '--dmf', metavar='distance matrix for features', type=str, | 
|  | 85                     default = None ) | 
|  | 86     p.add_argument( '--dms', metavar='distance matrix for samples', type=str, | 
|  | 87                     default = None ) | 
|  | 88 | 
|  | 89 | 
|  | 90     p.add_argument( '-l', metavar='sample label', type=str, | 
|  | 91                     default = None ) | 
|  | 92 | 
|  | 93     p.add_argument( '-s', metavar='scale norm', type=str, | 
|  | 94                     default = 'lin', choices = ['log','lin']) | 
|  | 95 | 
|  | 96     p.add_argument( '-x', metavar='x cell width', type=float, | 
|  | 97                     default = 0.1) | 
|  | 98     p.add_argument( '-y', metavar='y cell width', type=float, | 
|  | 99                     default = 0.1 ) | 
|  | 100 | 
|  | 101     p.add_argument( '--minv', metavar='min value', type=float, | 
|  | 102                     default = 0.0 ) | 
|  | 103     p.add_argument( '--maxv', metavar='max value', type=float, | 
|  | 104                     default = None ) | 
|  | 105 | 
|  | 106     p.add_argument( '--xstart', metavar='x coordinate of the top left cell ' | 
|  | 107                                         'of the values', | 
|  | 108                     type=int, default=1 ) | 
|  | 109     p.add_argument( '--ystart', metavar='y coordinate of the top left cell ' | 
|  | 110                                         'of the values', | 
|  | 111                     type=int, default=1 ) | 
|  | 112     p.add_argument( '--xstop', metavar='x coordinate of the bottom right cell ' | 
|  | 113                                         'of the values (default None = last row)', | 
|  | 114                     type=int, default=None ) | 
|  | 115     p.add_argument( '--ystop', metavar='y coordinate of the bottom right cell ' | 
|  | 116                                         'of the values (default None = last column)', | 
|  | 117                     type=int, default=None ) | 
|  | 118 | 
|  | 119     p.add_argument( '--perc', metavar='percentile for ordering and rows selection', type=int, default=None ) | 
|  | 120     p.add_argument( '--top', metavar='selection of the top N rows', type=int, default=None ) | 
|  | 121     p.add_argument( '--norm', metavar='whether to normalize columns (default 0)', type=int, default=0 ) | 
|  | 122 | 
|  | 123     p.add_argument( '--sdend_h', metavar='height of the sample dendrogram', type=float, | 
|  | 124                     default = 0.1 ) | 
|  | 125     p.add_argument( '--fdend_w', metavar='width of the feature dendrogram', type=float, | 
|  | 126                     default = 0.1 ) | 
|  | 127     p.add_argument( '--cm_h', metavar='height of the colormap', type=float, | 
|  | 128                     default = 0.03 ) | 
|  | 129     p.add_argument( '--cm_ticks', metavar='label for ticks of the colormap', type=str, | 
|  | 130                     default = None ) | 
|  | 131 | 
|  | 132     p.add_argument( '--font_size', metavar='label_font_size', type=int, | 
|  | 133                     default = 7 ) | 
|  | 134     p.add_argument( '--feat_dend_col_th', metavar='Color threshold for feature dendrogram', type=float, | 
|  | 135                     default = None ) | 
|  | 136     p.add_argument( '--sample_dend_col_th', metavar='Color threshold for sample dendrogram', type=float, | 
|  | 137                     default = None ) | 
|  | 138     p.add_argument( '--clust_ncols', metavar='Number of colors for clusters', type=int, | 
|  | 139                     default = 7 ) | 
|  | 140     p.add_argument( '--clust_line_w', metavar='Cluster line width', type=float, | 
|  | 141                     default = 1.0 ) | 
|  | 142     p.add_argument( '--label_cols', metavar='Label colors', type=str, | 
|  | 143                     default = None ) | 
|  | 144     p.add_argument( '--label2cols', metavar='Label to colors mapping file', type=str, | 
|  | 145                     default = None ) | 
|  | 146     p.add_argument( '--sdend_out', metavar='File for storing the samples dendrogram in PhyloXML format', type=str, | 
|  | 147                     default = None ) | 
|  | 148     p.add_argument( '--fdend_out', metavar='File for storing the features dendrogram in PhyloXML format', type=str, | 
|  | 149                     default = None ) | 
|  | 150 | 
|  | 151 | 
|  | 152     p.add_argument( '--pad_inches', metavar='Proportion of figure to be left blank around the plot', type=float, | 
|  | 153                     default = 0.1 ) | 
|  | 154 | 
|  | 155 | 
|  | 156     p.add_argument( '--flabel', metavar='Whether to show the labels for the features', type=int, | 
|  | 157                     default = 0 ) | 
|  | 158     p.add_argument( '--slabel', metavar='Whether to show the labels for the samples', type=int, | 
|  | 159                     default = 0 ) | 
|  | 160 | 
|  | 161     p.add_argument( '--legend', metavar='Whether to show the samples to label legend', type=int, | 
|  | 162                     default = 0 ) | 
|  | 163     p.add_argument( '--legend_font_size', metavar='Legend font size', type=int, | 
|  | 164                     default = 7 ) | 
|  | 165     p.add_argument( '--legend_ncol', metavar='Number of columns for the legend', type=int, | 
|  | 166                     default = 3 ) | 
|  | 167     p.add_argument( '--grid', metavar='Whether to show the grid (only black for now)', type=int, | 
|  | 168                     default = 0 ) | 
|  | 169 | 
|  | 170     col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu', | 
|  | 171                 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired', | 
|  | 172                 'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr', | 
|  | 173                 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn', | 
|  | 174                 'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu', | 
|  | 175                 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone', | 
|  | 176                 'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth', | 
|  | 177                 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow', | 
|  | 178                 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray', | 
|  | 179                 'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow', | 
|  | 180                 'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in my_colormaps] | 
|  | 181     p.add_argument( '-c', metavar='colormap', type=str, | 
|  | 182                     choices = col_maps, default = 'jet' ) | 
|  | 183 | 
|  | 184     return vars(p.parse_args()) | 
|  | 185 | 
|  | 186 # Predefined colors for dendrograms brances and class labels | 
|  | 187 colors = [  "#B22222","#006400","#0000CD","#9400D3","#696969","#8B4513", | 
|  | 188             "#FF1493","#FF8C00","#3CB371","#00Bfff","#CDC9C9","#FFD700", | 
|  | 189             "#2F4F4F","#FF0000","#ADFF2F","#B03060" ] | 
|  | 190 | 
|  | 191 def samples2classes_panel(fig, samples, s2l, idx1, idx2, height, xsize, cols, legendon, fontsize, label2cols, legend_ncol ): | 
|  | 192     from matplotlib.patches import Rectangle | 
|  | 193     samples2labels = dict([(l[0],l[1]) | 
|  | 194                             for l in [ll.strip().split('\t') | 
|  | 195                                 for ll in open(s2l)] if len(l) > 1]) | 
|  | 196 | 
|  | 197     if label2cols: | 
|  | 198         labels2colors = dict([(l[0],l[1]) for l in [ll.strip().split('\t') for ll in open(label2cols)]]) | 
|  | 199     else: | 
|  | 200         cs = cols if cols else colors | 
|  | 201         labels2colors = dict([(l,cs[i%len(cs)]) for i,l in enumerate(set(samples2labels.values()))]) | 
|  | 202     ax1 = fig.add_axes([0.,1.0,1.0,height],frameon=False) | 
|  | 203     ax1.set_xticks([]) | 
|  | 204     ax1.set_yticks([]) | 
|  | 205     ax1.set_ylim( [0.0, height] ) | 
|  | 206     ax1.set_xlim( [0.0, xsize] ) | 
|  | 207     step = xsize / float(len(samples)) | 
|  | 208     labels = set() | 
|  | 209     added_labels = set() | 
|  | 210     for i,ind in enumerate(idx2): | 
|  | 211         if  not samples[ind] in samples2labels or \ | 
|  | 212             not samples2labels[samples[ind]] in labels2colors: | 
|  | 213             fc, ll = "k", None | 
|  | 214         else: | 
|  | 215             ll = samples2labels[samples[ind]] | 
|  | 216             ll = None if ll in added_labels else ll | 
|  | 217             added_labels.add( ll ) | 
|  | 218             fc = labels2colors[samples2labels[samples[ind]]] | 
|  | 219 | 
|  | 220         rect = Rectangle( [float(i)*step, 0.0], step, height, | 
|  | 221                             facecolor = fc, | 
|  | 222                             label = ll, | 
|  | 223                             edgecolor='b', lw = 0.0) | 
|  | 224         labels.add( ll ) | 
|  | 225         ax1.add_patch(rect) | 
|  | 226     ax1.autoscale_view() | 
|  | 227 | 
|  | 228     if legendon: | 
|  | 229         ax1.legend( loc = 2, ncol = legend_ncol, bbox_to_anchor=(1.01, 3.), | 
|  | 230                     borderpad = 0.0, labelspacing = 0.0, | 
|  | 231                     handlelength = 0.5, handletextpad = 0.3, | 
|  | 232                     borderaxespad = 0.0, columnspacing = 0.3, | 
|  | 233                     prop = {'size':fontsize}, frameon = False) | 
|  | 234 | 
|  | 235 def samples_dend_panel( fig, Z, Z2, ystart, ylen, lw ): | 
|  | 236     ax2 = fig.add_axes([0.0,1.0+ystart,1.0,ylen], frameon=False) | 
|  | 237     Z2['color_list'] = [c.replace('b','k') for c in Z2['color_list']] | 
|  | 238     mh = max(Z[:,2]) | 
|  | 239     sch._plot_dendrogram(   Z2['icoord'], Z2['dcoord'], Z2['ivl'], | 
|  | 240                             Z.shape[0] + 1, Z.shape[0] + 1, | 
|  | 241                             mh, 'top', no_labels=True, | 
|  | 242                             color_list=Z2['color_list'] ) | 
|  | 243     for coll in ax2.collections: | 
|  | 244         coll._linewidths = (lw,) | 
|  | 245     ax2.set_xticks([]) | 
|  | 246     ax2.set_yticks([]) | 
|  | 247     ax2.set_xticklabels([]) | 
|  | 248 | 
|  | 249 def features_dend_panel( fig, Z, Z2, width, lw ): | 
|  | 250     ax1 = fig.add_axes([-width,0.0,width,1.0], frameon=False) | 
|  | 251     Z2['color_list'] = [c.replace('b','k').replace('x','b') for c in Z2['color_list']] | 
|  | 252     mh = max(Z[:,2]) | 
|  | 253     sch._plot_dendrogram(Z2['icoord'], Z2['dcoord'], Z2['ivl'], Z.shape[0] + 1, Z.shape[0] + 1, mh, 'right', no_labels=True, color_list=Z2['color_list']) | 
|  | 254     for coll in ax1.collections: | 
|  | 255         coll._linewidths = (lw,) | 
|  | 256     ax1.set_xticks([]) | 
|  | 257     ax1.set_yticks([]) | 
|  | 258     ax1.set_xticklabels([]) | 
|  | 259 | 
|  | 260 | 
|  | 261 def add_cmap( cmapdict, name ): | 
|  | 262     my_cmap = matplotlib.colors.LinearSegmentedColormap(name,cmapdict,256) | 
|  | 263     pylab.register_cmap(name=name,cmap=my_cmap) | 
|  | 264 | 
|  | 265 def init_fig(xsize,ysize,ncol): | 
|  | 266     fig = pylab.figure(figsize=(xsize,ysize)) | 
|  | 267     sch._link_line_colors = colors[:ncol] | 
|  | 268     return fig | 
|  | 269 | 
|  | 270 def heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, cols, rows, label_font_size, cb_offset, cb_l, flabelson, slabelson, cm_ticks, gridon, bar_offset ): | 
|  | 271     cm = pylab.get_cmap(cm_name) | 
|  | 272     bottom_col = [    cm._segmentdata['red'][0][1], | 
|  | 273                       cm._segmentdata['green'][0][1], | 
|  | 274                       cm._segmentdata['blue'][0][1]   ] | 
|  | 275     axmatrix = fig.add_axes(    [0.0,0.0,1.0,1.0], | 
|  | 276                                 axisbg=bottom_col) | 
|  | 277     if any([c < 0.95 for c in bottom_col]): | 
|  | 278         axmatrix.spines['right'].set_color('none') | 
|  | 279         axmatrix.spines['left'].set_color('none') | 
|  | 280         axmatrix.spines['top'].set_color('none') | 
|  | 281         axmatrix.spines['bottom'].set_color('none') | 
|  | 282     norm_f = matplotlib.colors.LogNorm if scale == 'log' else matplotlib.colors.Normalize | 
|  | 283     im = axmatrix.matshow(  D, norm = norm_f(   vmin=minv if minv > 0.0 else None, | 
|  | 284                                                 vmax=maxv), | 
|  | 285                             aspect='auto', origin='lower', cmap=cm, vmax=maxv) | 
|  | 286 | 
|  | 287     axmatrix2 = axmatrix.twinx() | 
|  | 288     axmatrix3 = axmatrix.twiny() | 
|  | 289 | 
|  | 290     axmatrix.set_xticks([]) | 
|  | 291     axmatrix2.set_xticks([]) | 
|  | 292     axmatrix3.set_xticks([]) | 
|  | 293     axmatrix.set_yticks([]) | 
|  | 294     axmatrix2.set_yticks([]) | 
|  | 295     axmatrix3.set_yticks([]) | 
|  | 296 | 
|  | 297     axmatrix.set_xticklabels([]) | 
|  | 298     axmatrix2.set_xticklabels([]) | 
|  | 299     axmatrix3.set_xticklabels([]) | 
|  | 300     axmatrix.set_yticklabels([]) | 
|  | 301     axmatrix2.set_yticklabels([]) | 
|  | 302     axmatrix3.set_yticklabels([]) | 
|  | 303 | 
|  | 304     if any([c < 0.95 for c in bottom_col]): | 
|  | 305         axmatrix2.spines['right'].set_color('none') | 
|  | 306         axmatrix2.spines['left'].set_color('none') | 
|  | 307         axmatrix2.spines['top'].set_color('none') | 
|  | 308         axmatrix2.spines['bottom'].set_color('none') | 
|  | 309     if any([c < 0.95 for c in bottom_col]): | 
|  | 310         axmatrix3.spines['right'].set_color('none') | 
|  | 311         axmatrix3.spines['left'].set_color('none') | 
|  | 312         axmatrix3.spines['top'].set_color('none') | 
|  | 313         axmatrix3.spines['bottom'].set_color('none') | 
|  | 314     if flabelson: | 
|  | 315         axmatrix2.set_yticks(np.arange(len(rows))+0.5) | 
|  | 316         axmatrix2.set_yticklabels([rows[r] for r in idx1],size=label_font_size,va='center') | 
|  | 317     if slabelson: | 
|  | 318         axmatrix.set_xticks(np.arange(len(cols))) | 
|  | 319         axmatrix.set_xticklabels([cols[r] for r in idx2],size=label_font_size,rotation=90,va='top',ha='center') | 
|  | 320     axmatrix.tick_params(length=0) | 
|  | 321     axmatrix2.tick_params(length=0) | 
|  | 322     axmatrix3.tick_params(length=0) | 
|  | 323     axmatrix2.set_ylim(0,len(rows)) | 
|  | 324 | 
|  | 325     if gridon: | 
|  | 326         axmatrix.set_yticks(np.arange(len(idx1)-1)+0.5) | 
|  | 327         axmatrix.set_xticks(np.arange(len(idx2))+0.5) | 
|  | 328         axmatrix.grid( True ) | 
|  | 329         ticklines = axmatrix.get_xticklines() | 
|  | 330         ticklines.extend( axmatrix.get_yticklines() ) | 
|  | 331         #gridlines = axmatrix.get_xgridlines() | 
|  | 332         #gridlines.extend( axmatrix.get_ygridlines() ) | 
|  | 333 | 
|  | 334         for line in ticklines: | 
|  | 335             line.set_linewidth(3) | 
|  | 336 | 
|  | 337     if cb_l > 0.0: | 
|  | 338         axcolor = fig.add_axes([0.0,1.0+bar_offset*1.25,1.0,cb_l]) | 
|  | 339         cbar = fig.colorbar(im, cax=axcolor, orientation='horizontal') | 
|  | 340         cbar.ax.tick_params(labelsize=label_font_size) | 
|  | 341         if cm_ticks: | 
|  | 342             cbar.ax.set_xticklabels( cm_ticks.split(":") ) | 
|  | 343 | 
|  | 344 | 
|  | 345 def read_table( fin, xstart,xstop,ystart,ystop, percentile = None, top = None, norm = False ): | 
|  | 346     mat = [l.rstrip().split('\t') for l in open( fin )] | 
|  | 347 | 
|  | 348     if fin.endswith(".biom"): | 
|  | 349         sample_labels =  mat[1][1:-1] | 
|  | 350         m = [(mm[-1]+"; OTU"+mm[0],np.array([float(f) for f in mm[1:-1]])) for mm in mat[2:]] | 
|  | 351         #feat_labels = [m[-1].replace(";","_").replace(" ","")+m[0] for m in mat[2:]] | 
|  | 352         #print len(feat_labels) | 
|  | 353         #print len(sample_labels) | 
|  | 354     else: | 
|  | 355         sample_labels = mat[0][xstart:xstop] | 
|  | 356         m = [(mm[xstart-1],np.array([float(f) for f in mm[xstart:xstop]])) for mm in mat[ystart:ystop]] | 
|  | 357 | 
|  | 358     if norm: | 
|  | 359         msums = [0.0 for l in m[0][1]] | 
|  | 360         for mm in m: | 
|  | 361             for i,v in enumerate(mm[1]): | 
|  | 362                 msums[i] += v | 
|  | 363 | 
|  | 364     if top and not percentile: | 
|  | 365         percentile = 90 | 
|  | 366 | 
|  | 367     if percentile: | 
|  | 368         m = sorted(m,key=lambda x:-stats.scoreatpercentile(x[1],percentile)) | 
|  | 369     if top: | 
|  | 370         if fin.endswith(".biom"): | 
|  | 371             #feat_labels = [mm[-1].replace(";","_").replace(" ","")+mm[0] for mm in m[:top]] | 
|  | 372             feat_labels = [mm[0] for mm in m[:top]] | 
|  | 373         else: | 
|  | 374             feat_labels = [mm[0] for mm in m[:top]] | 
|  | 375         if norm: | 
|  | 376             m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m[:top]] | 
|  | 377         else: | 
|  | 378             m = [mm[1] for mm in m[:top]] | 
|  | 379     else: | 
|  | 380         if fin.endswith(".biom"): | 
|  | 381             feat_labels = [mm[0] for mm in m] | 
|  | 382         else: | 
|  | 383             feat_labels = [mm[0] for mm in m] | 
|  | 384         if norm: | 
|  | 385             m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m] | 
|  | 386         else: | 
|  | 387             m = [mm[1] for mm in m] | 
|  | 388         #m = [mm[1] for mm in m] | 
|  | 389 | 
|  | 390     D = np.matrix(  np.array( m ) ) | 
|  | 391 | 
|  | 392     return D, feat_labels, sample_labels | 
|  | 393 | 
|  | 394 def read_dm( fin, n ): | 
|  | 395     mat = [[float(f) for f in l.strip().split('\t')] for l in open( fin )] | 
|  | 396     nc = sum([len(r) for r in mat]) | 
|  | 397 | 
|  | 398     if nc == n*n: | 
|  | 399         dm = [] | 
|  | 400         for i in range(n): | 
|  | 401             dm += mat[i][i+1:] | 
|  | 402         return np.array(dm) | 
|  | 403     if nc == (n*n-n)/2: | 
|  | 404         dm = [] | 
|  | 405         for i in range(n): | 
|  | 406             dm += mat[i] | 
|  | 407         return np.array(dm) | 
|  | 408     sys.stderr.write( "Error in reading the distance matrix\n" ) | 
|  | 409     sys.exit() | 
|  | 410 | 
|  | 411 | 
|  | 412 def exp_newick( inp, labels, outfile, tree_format = 'phyloxml' ): | 
|  | 413     n_leaves = int(inp[-1][-1]) | 
|  | 414     from Bio import Phylo | 
|  | 415     import collections | 
|  | 416     from Bio.Phylo.BaseTree import Tree as BTree | 
|  | 417     from Bio.Phylo.BaseTree import Clade as BClade | 
|  | 418     tree = BTree() | 
|  | 419     tree.root = BClade() | 
|  | 420 | 
|  | 421     subclades = {} | 
|  | 422     sb_cbl = {} | 
|  | 423 | 
|  | 424     for i,(fr,to,bl,nsub) in enumerate( inp ): | 
|  | 425         if fr < n_leaves: | 
|  | 426             fr_c = BClade(branch_length=-1.0,name=labels[int(fr)]) | 
|  | 427             subclades[fr] = fr_c | 
|  | 428             sb_cbl[fr] = bl | 
|  | 429         if to < n_leaves: | 
|  | 430             to_c = BClade(branch_length=-1.0,name=labels[int(to)]) | 
|  | 431             subclades[to] = to_c | 
|  | 432             sb_cbl[to] = bl | 
|  | 433     for i,(fr,to,bl,nsub) in enumerate( inp ): | 
|  | 434         fr_c = subclades[fr] | 
|  | 435         to_c = subclades[to] | 
|  | 436         cur_c = BClade(branch_length=bl) | 
|  | 437         cur_c.clades.append( fr_c ) | 
|  | 438         cur_c.clades.append( to_c ) | 
|  | 439         subclades[i+n_leaves] = cur_c | 
|  | 440 | 
|  | 441     def reset_rec( clade, fath_bl ): | 
|  | 442         if clade.branch_length < 0: | 
|  | 443             clade.branch_length = fath_bl | 
|  | 444             return | 
|  | 445         for c in clade.clades: | 
|  | 446             reset_rec( c, clade.branch_length ) | 
|  | 447         clade.branch_length = fath_bl-clade.branch_length | 
|  | 448 | 
|  | 449     tree.root = cur_c | 
|  | 450     reset_rec( tree.root, 0.0 ) | 
|  | 451     tree.root.branch_length = 0.0 | 
|  | 452     Phylo.write(tree, outfile, tree_format ) | 
|  | 453 | 
|  | 454 def hclust(  fin, fout, | 
|  | 455              method = "average", | 
|  | 456              dist_func = "euclidean", | 
|  | 457              feat_dist_func = "d", | 
|  | 458              xcw = 0.1, | 
|  | 459              ycw = 0.1, | 
|  | 460              scale = 'lin', | 
|  | 461              minv = 0.0, | 
|  | 462              maxv = None, | 
|  | 463              xstart = 1, | 
|  | 464              ystart = 1, | 
|  | 465              xstop = None, | 
|  | 466              ystop = None, | 
|  | 467              percentile = None, | 
|  | 468              top = None, | 
|  | 469              norm = False, | 
|  | 470              cm_name = 'jet', | 
|  | 471              s2l = None, | 
|  | 472              label_font_size = 7, | 
|  | 473              feat_dend_col_th = None, | 
|  | 474              sample_dend_col_th = None, | 
|  | 475              clust_ncols = 7, | 
|  | 476              clust_line_w = 1.0, | 
|  | 477              label_cols = None, | 
|  | 478              sdend_h = 0.1, | 
|  | 479              fdend_w = 0.1, | 
|  | 480              cm_h = 0.03, | 
|  | 481              dmf = None, | 
|  | 482              dms = None, | 
|  | 483              legendon = False, | 
|  | 484              label2cols = None, | 
|  | 485              flabelon = True, | 
|  | 486              slabelon = True, | 
|  | 487              cm_ticks = None, | 
|  | 488              legend_ncol = 3, | 
|  | 489              pad_inches = None, | 
|  | 490              legend_font_size = 7, | 
|  | 491              gridon = 0, | 
|  | 492              sdend_out = None, | 
|  | 493              fdend_out= None): | 
|  | 494 | 
|  | 495     if label_cols and label_cols.count("-"): | 
|  | 496         label_cols = label_cols.split("-") | 
|  | 497 | 
|  | 498     for n,c in my_colormaps: | 
|  | 499         add_cmap( c, n ) | 
|  | 500 | 
|  | 501     if feat_dist_func == 'd': | 
|  | 502         feat_dist_func = dist_func | 
|  | 503 | 
|  | 504     D, feat_labels, sample_labels = read_table(fin,xstart,xstop,ystart,ystop,percentile,top,norm) | 
|  | 505 | 
|  | 506     ylen,xlen = D[:].shape | 
|  | 507     Dt = D.transpose() | 
|  | 508 | 
|  | 509     size_cx, size_cy = xcw, ycw | 
|  | 510 | 
|  | 511     xsize, ysize = max(xlen*size_cx,2.0), max(ylen*size_cy,2.0) | 
|  | 512     ydend_offset = 0.025*8.0/ysize if s2l else 0.0 | 
|  | 513 | 
|  | 514     fig = init_fig(xsize,ysize,clust_ncols) | 
|  | 515 | 
|  | 516     nfeats, nsamples = len(D), len(Dt) | 
|  | 517 | 
|  | 518     if dmf: | 
|  | 519         p1 = read_dm( dmf, nfeats ) | 
|  | 520         Y1 = sch.linkage(   p1, method=method ) | 
|  | 521     else: | 
|  | 522         p1 = dis.pdist( D, feat_dist_func ) | 
|  | 523         Y1 = sch.linkage( p1, method=method ) # , metric=feat_dist_func ) | 
|  | 524         #Y1 = sch.linkage( D, method=method, metric=feat_dist_func ) | 
|  | 525     Z1 = sch.dendrogram(Y1, no_plot=True, color_threshold=feat_dend_col_th) | 
|  | 526 | 
|  | 527     if fdend_out: | 
|  | 528         exp_newick( Y1, feat_labels, fdend_out ) | 
|  | 529 | 
|  | 530     if dms: | 
|  | 531         p2 = read_dm( dms, nsamples ) | 
|  | 532         Y2 = sch.linkage(   p2, method=method ) | 
|  | 533     else: | 
|  | 534         p2 = dis.pdist( Dt, dist_func ) | 
|  | 535         Y2 = sch.linkage(   p2, method=method ) # , metric=dist_func ) | 
|  | 536         #Y2 = sch.linkage(   Dt, method=method, metric=dist_func ) | 
|  | 537     Z2 = sch.dendrogram(Y2, no_plot=True, color_threshold=sample_dend_col_th) | 
|  | 538 | 
|  | 539     if sdend_out: | 
|  | 540         exp_newick( Y2, sample_labels, sdend_out ) | 
|  | 541 | 
|  | 542     if fdend_w > 0.0: | 
|  | 543         features_dend_panel(fig, Y1, Z1, fdend_w*8.0/xsize, clust_line_w ) | 
|  | 544     if sdend_h > 0.0: | 
|  | 545         samples_dend_panel(fig, Y2, Z2, ydend_offset, sdend_h*8.0/ysize, clust_line_w) | 
|  | 546 | 
|  | 547     idx1, idx2 = Z1['leaves'], Z2['leaves'] | 
|  | 548     D = D[idx1,:][:,idx2] | 
|  | 549 | 
|  | 550     if s2l: | 
|  | 551         samples2classes_panel( fig, sample_labels, s2l, idx1, idx2, 0.025*8.0/ysize, xsize, label_cols, legendon, legend_font_size, label2cols, legend_ncol ) | 
|  | 552     heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, sample_labels, feat_labels, label_font_size, -cm_h*8.0/ysize, cm_h*0.8*8.0/ysize, flabelon, slabelon, cm_ticks, gridon, ydend_offset+sdend_h*8.0/ysize ) | 
|  | 553 | 
|  | 554     fig.savefig(    fout, bbox_inches='tight', | 
|  | 555                     pad_inches = pad_inches, | 
|  | 556                     dpi=300) if fout else pylab.show() | 
|  | 557 | 
|  | 558 if __name__ == '__main__': | 
|  | 559     pars = read_params( sys.argv ) | 
|  | 560 | 
|  | 561     hclust(   fin  = pars['in'], | 
|  | 562               fout = pars['out'], | 
|  | 563               method = pars['m'], | 
|  | 564               dist_func = pars['d'], | 
|  | 565               feat_dist_func = pars['f'], | 
|  | 566               xcw = pars['x'], | 
|  | 567               ycw = pars['y'], | 
|  | 568               scale = pars['s'], | 
|  | 569               minv = pars['minv'], | 
|  | 570               maxv = pars['maxv'], | 
|  | 571               xstart = pars['xstart'], | 
|  | 572               ystart = pars['ystart'], | 
|  | 573               xstop = pars['xstop'], | 
|  | 574               ystop = pars['ystop'], | 
|  | 575               percentile = pars['perc'], | 
|  | 576               top = pars['top'], | 
|  | 577               norm = pars['norm'], | 
|  | 578               cm_name = pars['c'], | 
|  | 579               s2l = pars['l'], | 
|  | 580               label_font_size = pars['font_size'], | 
|  | 581               feat_dend_col_th = pars['feat_dend_col_th'], | 
|  | 582               sample_dend_col_th = pars['sample_dend_col_th'], | 
|  | 583               clust_ncols = pars['clust_ncols'], | 
|  | 584               clust_line_w = pars['clust_line_w'], | 
|  | 585               label_cols = pars['label_cols'], | 
|  | 586               sdend_h = pars['sdend_h'], | 
|  | 587               fdend_w = pars['fdend_w'], | 
|  | 588               cm_h = pars['cm_h'], | 
|  | 589               dmf = pars['dmf'], | 
|  | 590               dms = pars['dms'], | 
|  | 591               legendon = pars['legend'], | 
|  | 592               label2cols = pars['label2cols'], | 
|  | 593               flabelon = pars['flabel'], | 
|  | 594               slabelon = pars['slabel'], | 
|  | 595               cm_ticks = pars['cm_ticks'], | 
|  | 596               legend_ncol = pars['legend_ncol'], | 
|  | 597               pad_inches = pars['pad_inches'], | 
|  | 598               legend_font_size = pars['legend_font_size'], | 
|  | 599               gridon = pars['grid'], | 
|  | 600               sdend_out = pars['sdend_out'], | 
|  | 601               fdend_out = pars['fdend_out'], | 
|  | 602               ) | 
|  | 603 |