Mercurial > repos > george-weingart > graphlan_import

#!/usr/bin/env python

import sys
import numpy as np
import matplotlib.ticker as ticker
import scipy.spatial.distance as spd
import scipy.cluster.hierarchy as sph
from scipy import stats
import matplotlib
#matplotlib.use('Agg')
import pylab
import pandas as pd
from matplotlib.patches import Rectangle
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cPickle as pickle

sys.setrecursionlimit(10000)

# samples on rows

class SqrtNorm(matplotlib.colors.Normalize):
    """
    Normalize a given value to the 0-1 range on a square root scale
    """
    def __call__(self, value, clip=None):
        if clip is None:
            clip = self.clip

        result, is_scalar = self.process_value(value)

        result = np.ma.masked_less_equal(result, 0, copy=False)

        self.autoscale_None(result)
        vmin, vmax = self.vmin, self.vmax
        if vmin > vmax:
            raise ValueError("minvalue must be less than or equal to maxvalue")
        elif vmin <= 0:
            raise ValueError("values must all be positive")
        elif vmin == vmax:
            result.fill(0)
        else:
            if clip:
                mask = np.ma.getmask(result)
                result = np.ma.array(np.clip(result.filled(vmax), vmin, vmax),
                                  mask=mask)
            # in-place equivalent of above can be much faster
            resdat = result.data
            mask = result.mask
            if mask is np.ma.nomask:
                mask = (resdat <= 0)
            else:
                mask |= resdat <= 0
            matplotlib.cbook._putmask(resdat, mask, 1)
            np.sqrt(resdat, resdat)
            resdat -= np.sqrt(vmin)
            resdat /= (np.sqrt(vmax) - np.sqrt(vmin))
            result = np.ma.array(resdat, mask=mask, copy=False)
        if is_scalar:
            result = result[0]
        return result

    def inverse(self, value):
        if not self.scaled():
            raise ValueError("Not invertible until scaled")
        vmin, vmax = self.vmin, self.vmax

        if matplotlib.cbook.iterable(value):
            val = np.ma.asarray(value)
            return vmin * np.ma.power((vmax / vmin), val)
        else:
            return vmin * pow((vmax / vmin), value)

    def autoscale(self, A):
        '''
        Set *vmin*, *vmax* to min, max of *A*.
        '''
        A = np.ma.masked_less_equal(A, 0, copy=False)
        self.vmin = np.ma.min(A)
        self.vmax = np.ma.max(A)

    def autoscale_None(self, A):
        ' autoscale only None-valued vmin or vmax'
        if self.vmin is not None and self.vmax is not None:
            return
        A = np.ma.masked_less_equal(A, 0, copy=False)
        if self.vmin is None:
            self.vmin = np.ma.min(A)
        if self.vmax is None:
            self.vmax = np.ma.max(A)

class DataMatrix:
    datatype = 'data_matrix'

    @staticmethod
    def input_parameters( parser ):
        dm_param = parser.add_argument_group('Input data matrix parameters')
        arg = dm_param.add_argument

        arg( '--sep', type=str, default='\t' )
        arg( '--out_table', type=str, default=None,
             help = 'Write processed data matrix to file' )
        arg( '--fname_row', type=int, default=0,
             help = "row number containing the names of the features "
                    "[default 0, specify -1 if no names are present in the matrix")
        arg( '--sname_row', type=int, default=0,
             help = "column number containing the names of the samples "
                    "[default 0, specify -1 if no names are present in the matrix")
        arg( '--metadata_rows', type=str, default=None,
             help = "Row numbers to use as metadata"
                    "[default None, meaning no metadata")
        arg( '--skip_rows', type=str, default=None,
             help = "Row numbers to skip (0-indexed, comma separated) from the input file"
                    "[default None, meaning no rows skipped")
        arg( '--sperc', type=int, default=90,
             help = "Percentile of sample value distribution for sample selection" )
        arg( '--fperc', type=int, default=90,
             help = "Percentile of feature value distribution for sample selection" )
        arg( '--stop', type=int, default=None,
             help = "Number of top samples to select (ordering based on percentile specified by --sperc)" )
        arg( '--ftop', type=int, default=None,
             help = "Number of top features to select (ordering based on percentile specified by --fperc)" )
        arg( '--def_na', type=float, default=None,
             help = "Set the default value for missing values [default None which means no replacement]")

    def __init__( self, input_file, args ):
        self.args = args
        self.metadata_rows =  []
        self.metadata_table = None
        toskip = [int(l) for l in self.args.skip_rows.split(",")]  if self.args.skip_rows else []
        if self.args.metadata_rows:
            self.metadata_rows = list([int(a) for a in self.args.metadata_rows.split(",")])
            mdr = self.metadata_rows[::]
            for t in toskip:
                for i,m in enumerate(mdr):
                    if t <= m:
                        self.metadata_rows[i] -= 1
        if self.metadata_rows:
            header = [self.args.fname_row]+self.metadata_rows if self.args.fname_row > -1 else self.metadata_rows
        else:
            header = self.args.fname_row if self.args.fname_row > -1 else None
        self.table = pd.read_table(
                input_file, sep = self.args.sep, # skipinitialspace = True,
                                  skiprows = sorted(toskip) if isinstance(toskip, list) else toskip,
                                  header = sorted(header) if isinstance(header, list) else header,
                                  index_col = self.args.sname_row if self.args.sname_row > -1 else None
                                    )

        def select( perc, top  ):
            self.table['perc'] = self.table.apply(lambda x: stats.scoreatpercentile(x,perc),axis=1)
            m = sorted(self.table['perc'])[-top]
            self.table = self.table[self.table['perc'] >= m ]
            del self.table['perc']

        if not self.args.def_na is None:
            self.table = self.table.fillna( self.args.def_na )

        if self.args.ftop:
            select( self.args.fperc, self.args.ftop )

        if self.args.stop:
            self.table = self.table.T
            select( self.args.sperc, self.args.stop )
            self.table = self.table.T


        # add missing values

    def get_numpy_matrix( self ):
        return np.matrix(self.table)

    #def get_metadata_matrix( self ):
    #    return self.table.columns

    def get_snames( self ):
        #return list(self.table.index)
        return self.table.columns

    def get_fnames( self ):
        #print self.table.columns.names
        #print self.table.columns
        return list(self.table.index)

    def get_averages(self, by_row = True) :
        return self.table.mean(axis = 1 if by_row else 0)

    def save_matrix( self, output_file ):
        self.table.to_csv( output_file, sep = '\t' )

class DistMatrix:
    datatype = 'distance_matrix'

    @staticmethod
    def input_parameters( parser ):
        dm_param = parser.add_argument_group('Distance parameters')
        arg = dm_param.add_argument

        dist_funcs = [  "euclidean","minkowski","cityblock","seuclidean",
                        "sqeuclidean","cosine","correlation","hamming",
                        "jaccard","chebyshev","canberra","braycurtis",
                        "mahalanobis","yule","matching","dice",
                        "kulsinski","rogerstanimoto","russellrao","sokalmichener",
                        "sokalsneath","wminkowski","ward" ]

        arg( '--f_dist_f', type=str, default="correlation",
             help = "Distance function for features [default correlation]")
        arg( '--s_dist_f', type=str, default="euclidean",
             help = "Distance function for sample [default euclidean]")
        arg( '--load_dist_matrix_f', type=str, default=None,
             help = "Load the distance matrix to be used for features [default None].")
        arg( '--load_dist_matrix_s', type=str, default=None,
             help = "Load the distance matrix to be used for samples [default None].")
        arg( '--save_dist_matrix_f', type=str, default=None,
             help = "Save the distance matrix for features to file [default None].")
        arg( '--save_dist_matrix_s', type=str, default=None,
             help = "Save the distance matrix for samples to file [default None].")

    def __init__( self, data, args = None ):
        self.sdf = args.s_dist_f
        self.fdf = args.f_dist_f

        self.s_cdist_matrix, self.f_cdist_matrix = None, None

        self.numpy_full_matrix = (data if
                type(data) == np.matrixlib.defmatrix.matrix else None)

    def compute_f_dists( self ):
        if args.load_dist_matrix_f:
            with open( args.load_dist_matrix_f ) as inp:
                self.f_cdist_matrix = pickle.load( inp )

        else:
            dt = self.numpy_full_matrix

            if self.fdf == "spearman":
                dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
                self.f_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
                return

            if self.fdf == "pearson":
                self.fdf = 'correlation'

            self.f_cdist_matrix = spd.pdist( dt, self.fdf )

        if args.save_dist_matrix_f:
            with open( args.save_dist_matrix_f, "wb" ) as outf:
                pickle.dump( self.f_cdist_matrix, outf )

    def compute_s_dists( self ):
        if args.load_dist_matrix_s:
            with open( args.load_dist_matrix_s ) as inp:
                self.s_cdist_matrix = pickle.load( inp )
        else:
            dt = self.numpy_full_matrix.transpose()

            if self.sdf == "spearman":
                dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
                self.s_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
                return

            if self.sdf == "pearson":
                self.sdf = 'correlation'

            self.s_cdist_matrix = spd.pdist( dt, self.sdf )

        if args.save_dist_matrix_s:
            with open( args.save_dist_matrix_s, "wb" ) as outf:
                pickle.dump( self.s_cdist_matrix, outf )

    def get_s_dm( self ):
        return self.s_cdist_matrix

    def get_f_dm( self ):
        return self.f_cdist_matrix

class HClustering:
    datatype = 'hclustering'

    @staticmethod
    def input_parameters( parser ):
        cl_param = parser.add_argument_group('Clustering parameters')
        arg = cl_param.add_argument

        linkage_method = [ "single","complete","average",
                           "weighted","centroid","median",
                           "ward" ]
        arg( '--no_fclustering', action='store_true',
             help = "avoid clustering features" )
        arg( '--no_sclustering', action='store_true',
             help = "avoid clustering samples" )
        arg( '--flinkage', type=str, default="average",
             help = "Linkage method for feature clustering [default average]")
        arg( '--slinkage', type=str, default="average",
             help = "Linkage method for sample clustering [default average]")

    def get_reordered_matrix( self, matrix, sclustering = True, fclustering = True ):
        if not sclustering and not fclustering:
            return matrix

        idx1 = self.sdendrogram['leaves'] if sclustering else None   # !!!!!!!!!!!
        idx2 = self.fdendrogram['leaves'][::-1] if fclustering else None

        if sclustering and fclustering:
            return matrix[idx2,:][:,idx1]
        if fclustering:
            return matrix[idx2,:][:]
        if sclustering: # !!!!!!!!!!!!
            return matrix[:][:,idx1]

    def get_reordered_sample_labels( self, slabels ):
        return [slabels[i] for i in self.sdendrogram['leaves']]

    def get_reordered_feature_labels( self, flabels ):
        return [flabels[i] for i in self.fdendrogram['leaves']]

    def __init__( self, s_dm, f_dm, args = None ):
        self.s_dm = s_dm
        self.f_dm = f_dm
        self.args = args
        self.sclusters = None
        self.fclusters = None
        self.sdendrogram = None
        self.fdendrogram = None

    def shcluster( self, dendrogram = True ):
        self.shclusters = sph.linkage( self.s_dm, args.slinkage )
        if dendrogram:
            self.sdendrogram = sph.dendrogram( self.shclusters, no_plot=True )

    def fhcluster( self, dendrogram = True ):
        self.fhclusters = sph.linkage( self.f_dm, args.flinkage )
        if dendrogram:
            self.fdendrogram = sph.dendrogram( self.fhclusters, no_plot=True )

    def get_shclusters( self ):
        return self.shclusters

    def get_fhclusters( self ):
        return self.fhclusters

    def get_sdendrogram( self ):
        return self.sdendrogram

    def get_fdendrogram( self ):
        return self.fdendrogram


class Heatmap:
    datatype = 'heatmap'

    bbcyr = {'red':  (  (0.0, 0.0, 0.0),
                        (0.25, 0.0, 0.0),
                        (0.50, 0.0, 0.0),
                        (0.75, 1.0, 1.0),
                        (1.0, 1.0, 1.0)),
             'green': ( (0.0, 0.0, 0.0),
                        (0.25, 0.0, 0.0),
                        (0.50, 1.0, 1.0),
                        (0.75, 1.0, 1.0),
                        (1.0, 0.0, 1.0)),
             'blue': (  (0.0, 0.0, 0.0),
                        (0.25, 1.0, 1.0),
                        (0.50, 1.0, 1.0),
                        (0.75, 0.0, 0.0),
                        (1.0, 0.0, 1.0))}

    bbcry = {'red':  (  (0.0, 0.0, 0.0),
                        (0.25, 0.0, 0.0),
                        (0.50, 0.0, 0.0),
                        (0.75, 1.0, 1.0),
                        (1.0, 1.0, 1.0)),
             'green': ( (0.0, 0.0, 0.0),
                        (0.25, 0.0, 0.0),
                        (0.50, 1.0, 1.0),
                        (0.75, 0.0, 0.0),
                        (1.0, 1.0, 1.0)),
             'blue': (  (0.0, 0.0, 0.0),
                        (0.25, 1.0, 1.0),
                        (0.50, 1.0, 1.0),
                        (0.75, 0.0, 0.0),
                        (1.0, 0.0, 1.0))}

    bcry = {'red':  (   (0.0, 0.0, 0.0),
                        (0.33, 0.0, 0.0),
                        (0.66, 1.0, 1.0),
                        (1.0, 1.0, 1.0)),
             'green': ( (0.0, 0.0, 0.0),
                        (0.33, 1.0, 1.0),
                        (0.66, 0.0, 0.0),
                        (1.0, 1.0, 1.0)),
             'blue': (  (0.0, 1.0, 1.0),
                        (0.33, 1.0, 1.0),
                        (0.66, 0.0, 0.0),
                        (1.0, 0.0, 1.0))}


    my_colormaps = [    ('bbcyr',bbcyr),
                        ('bbcry',bbcry),
                        ('bcry',bcry)]

    dcols = ['#ca0000','#0087ff','#00ba1d','#cf00ff','#00dbe2','#ffaf00','#0017f4','#006012','#e175ff','#877878','#050505','#b5cf00','#ff8a8a','#aa6400','#50008a','#00ff58']


    @staticmethod
    def input_parameters( parser ):
        hm_param = parser.add_argument_group('Heatmap options')
        arg = hm_param.add_argument

        arg( '--dpi', type=int, default=150,
             help = "Image resolution in dpi [default 150]")
        arg( '-l', '--log_scale', action='store_true',
             help = "Log scale" )
        arg( '-s', '--sqrt_scale', action='store_true',
             help = "Square root scale" )
        arg( '--no_slabels', action='store_true',
             help = "Do not show sample labels" )
        arg( '--minv', type=float, default=None,
             help = "Minimum value to display in the color map [default None meaning automatic]" )
        arg( '--maxv', type=float, default=None,
             help = "Maximum value to display in the color map [default None meaning automatic]" )
        arg( '--no_flabels', action='store_true',
             help = "Do not show feature labels" )
        arg( '--max_slabel_len', type=int, default=25,
             help = "Max number of chars to report for sample labels [default 15]" )
        arg( '--max_flabel_len', type=int, default=25,
             help = "Max number of chars to report for feature labels [default 15]" )
        arg( '--flabel_size', type=int, default=10,
             help = "Feature label font size [default 10]" )
        arg( '--slabel_size', type=int, default=10,
             help = "Sample label font size [default 10]" )
        arg( '--fdend_width', type=float, default=1.0,
             help = "Width of the feature dendrogram [default 1 meaning 100%% of default heatmap width]")
        arg( '--sdend_height', type=float, default=1.0,
             help = "Height of the sample dendrogram [default 1 meaning 100%% of default heatmap height]")
        arg( '--metadata_height', type=float, default=.05,
             help = "Height of the metadata panel [default 0.05 meaning 5%% of default heatmap height]")
        arg( '--metadata_separation', type=float, default=.01,
             help = "Distance between the metadata and data panels. [default 0.001 meaning 0.1%% of default heatmap height]")
        arg( '--image_size', type=float, default=8,
             help = "Size of the largest between width and eight size for the image in inches [default 8]")
        arg( '--cell_aspect_ratio', type=float, default=1.0,
             help = "Aspect ratio between width and height for the cells of the heatmap [default 1.0]")
        col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu',
                    'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired',
                    'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr',
                    'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn',
                    'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu',
                    'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone',
                    'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth',
                    'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow',
                    'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray',
                    'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow',
                    'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in Heatmap.my_colormaps]
        for n,c in Heatmap.my_colormaps:
            my_cmap = matplotlib.colors.LinearSegmentedColormap(n,c,256)
            pylab.register_cmap(name=n,cmap=my_cmap)
        arg( '-c','--colormap', type=str, choices = col_maps, default = 'bbcry' )
        arg( '--bottom_c', type=str, default = None,
             help = "Color to use for cells below the minimum value of the scale [default None meaning bottom color of the scale]")
        arg( '--top_c', type=str, default = None,
             help = "Color to use for cells below the maximum value of the scale [default None meaning bottom color of the scale]")
        arg( '--nan_c', type=str, default = None,
             help = "Color to use for nan cells  [default None]")


        """
        arg( '--', type=str, default="average",
             help = "Linkage method for feature clustering [default average]")
        arg( '--slinkage', type=str, default="average",
             help = "Linkage method for sample clustering [default average]")
        """

    def __init__( self, numpy_matrix, sdendrogram, fdendrogram, snames, fnames, fnames_meta, args = None ):
        self.numpy_matrix = numpy_matrix
        self.sdendrogram = sdendrogram
        self.fdendrogram = fdendrogram
        self.snames = snames
        self.fnames = fnames
        self.fnames_meta = fnames_meta
        self.ns,self.nf = self.numpy_matrix.shape
        self.args = args

    def make_legend( self, dmap, titles, out_fn ):
        figlegend = plt.figure(figsize=(1+3*len(titles),2), frameon = False)

        gs = gridspec.GridSpec( 1, len(dmap), wspace = 2.0  )

        for i,(d,title) in enumerate(zip(dmap,titles)):
            legax = plt.subplot(gs[i],frameon = False)
            for k,v in sorted(d.items(),key=lambda x:x[1]):
                rect = Rectangle( [0.0, 0.0], 0.0, 0.0,
                                  facecolor = self.dcols[v%len(self.dcols)],
                                  label = k,
                                  edgecolor='b', lw = 0.0)

                legax.add_patch(rect)
        #remove_splines( legax )
            legax.set_xticks([])
            legax.set_yticks([])
            legax.legend( loc = 2, frameon = False, title = title)
        """
                      ncol = legend_ncol, bbox_to_anchor=(1.01, 3.),
                      borderpad = 0.0, labelspacing = 0.0,
                      handlelength = 0.5, handletextpad = 0.3,
                      borderaxespad = 0.0, columnspacing = 0.3,
                      prop = {'size':fontsize}, frameon = False)
        """
        if out_fn:
            figlegend.savefig(out_fn, bbox_inches='tight')

    def draw( self ):

        rat = float(self.ns)/self.nf
        rat *= self.args.cell_aspect_ratio
        x,y = (self.args.image_size,rat*self.args.image_size) if rat < 1 else (self.args.image_size/rat,self.args.image_size)
        fig = plt.figure( figsize=(x,y), facecolor = 'w'  )

        cm = pylab.get_cmap(self.args.colormap)
        bottom_col = [  cm._segmentdata['red'][0][1],
                        cm._segmentdata['green'][0][1],
                        cm._segmentdata['blue'][0][1]   ]
        if self.args.bottom_c:
            bottom_col = self.args.bottom_c
        cm.set_under( bottom_col )
        top_col = [  cm._segmentdata['red'][-1][1],
                     cm._segmentdata['green'][-1][1],
                     cm._segmentdata['blue'][-1][1]   ]
        if self.args.top_c:
            top_col = self.args.top_c
        cm.set_over( top_col )

        if self.args.nan_c:
            cm.set_bad( self.args.nan_c  )

        def make_ticklabels_invisible(ax):
            for tl in ax.get_xticklabels() + ax.get_yticklabels():
                 tl.set_visible(False)
            ax.set_xticks([])
            ax.set_yticks([])

        def remove_splines( ax ):
            for v in ['right','left','top','bottom']:
                ax.spines[v].set_color('none')

        def shrink_labels( labels, n ):
            shrink = lambda x: x[:n/2]+" [...] "+x[-n/2:]
            return [(shrink(str(l)) if len(str(l)) > n else l) for l in labels]


        #gs = gridspec.GridSpec( 4, 2,
        #                        width_ratios=[1.0-fr_ns,fr_ns],
        #                        height_ratios=[.03,0.03,1.0-fr_nf,fr_nf],
        #                        wspace = 0.0, hspace = 0.0 )

        fr_ns = float(self.ns)/max([self.ns,self.nf])
        fr_nf = float(self.nf)/max([self.ns,self.nf])

        buf_space = 0.05
        minv = min( [buf_space*8, 8*rat*buf_space] )
        if minv < 0.05:
            buf_space /= minv/0.05
        metadata_height = self.args.metadata_height if type(snames[0]) is tuple and len(snames[0]) > 1 else 0.000001
        gs = gridspec.GridSpec( 6, 4,
                                width_ratios=[ buf_space, buf_space*2, .08*self.args.fdend_width,0.9],
                                height_ratios=[ buf_space, buf_space*2, .08*self.args.sdend_height, metadata_height, self.args.metadata_separation, 0.9],
                                wspace = 0.0, hspace = 0.0 )

        ax_hm = plt.subplot(gs[23], axisbg = bottom_col  )
        ax_metadata = plt.subplot(gs[15], axisbg = bottom_col  )
        ax_hm_y2 = ax_hm.twinx()

        norm_f = matplotlib.colors.Normalize
        if self.args.log_scale:
            norm_f = matplotlib.colors.LogNorm
        elif self.args.sqrt_scale:
            norm_f = SqrtNorm
        minv, maxv = 0.0, None

        maps, values, ndv = [], [], 0
        if type(snames[0]) is tuple and len(snames[0]) > 1:
            metadata = zip(*[list(s[1:]) for s in snames])
            for m in metadata:
                mmap = dict([(v[1],ndv+v[0]) for v in enumerate(list(set(m)))])
                values.append([mmap[v] for v in m])
                ndv += len(mmap)
                maps.append(mmap)
            dcols = []
            mdmat = np.matrix(values)
            while len(dcols) < ndv:
                dcols += self.dcols
            cmap = matplotlib.colors.ListedColormap(dcols[:ndv])
            bounds = [float(f)-0.5 for f in range(ndv+1)]
            imm = ax_metadata.imshow( mdmat, #origin='lower',
                    interpolation = 'nearest',
                                    aspect='auto',
                                    extent = [0, self.nf, 0, self.ns],
                                    cmap=cmap,
                                    vmin=bounds[0],
                                    vmax=bounds[-1],
                                    )
            remove_splines( ax_metadata )
            ax_metadata_y2 = ax_metadata.twinx()
            ax_metadata_y2.set_ylim(0,len(self.fnames_meta))
            ax_metadata.set_yticks([])
            ax_metadata_y2.set_ylim(0,len(self.fnames_meta))
            ax_metadata_y2.tick_params(length=0)
            ax_metadata_y2.set_yticks(np.arange(len(self.fnames_meta))+0.5)
            ax_metadata_y2.set_yticklabels(self.fnames_meta[::-1], va='center',size=self.args.flabel_size)
        else:
            ax_metadata.set_yticks([])

        ax_metadata.set_xticks([])

        im = ax_hm.imshow( self.numpy_matrix, #origin='lower',
                                interpolation = 'nearest',  aspect='auto',
                                extent = [0, self.nf, 0, self.ns],
                                cmap=cm,
                                vmin=self.args.minv,
                                vmax=self.args.maxv,
                                norm = norm_f( vmin=minv if minv > 0.0 else None, vmax=maxv)
                                )

        #ax_hm.set_ylim([0,800])
        ax_hm.set_xticks(np.arange(len(list(snames)))+0.5)
        if not self.args.no_slabels:
            snames_short = shrink_labels( list([s[0] for s in snames]) if type(snames[0]) is tuple else snames, self.args.max_slabel_len )
            ax_hm.set_xticklabels(snames_short,rotation=90,va='top',ha='center',size=self.args.slabel_size)
        else:
            ax_hm.set_xticklabels([])
        ax_hm_y2.set_ylim([0,self.ns])
        ax_hm_y2.set_yticks(np.arange(len(fnames))+0.5)
        if not self.args.no_flabels:
            fnames_short = shrink_labels( fnames, self.args.max_flabel_len )
            ax_hm_y2.set_yticklabels(fnames_short,va='center',size=self.args.flabel_size)
        else:
            ax_hm_y2.set_yticklabels( [] )
        ax_hm.set_yticks([])
        remove_splines( ax_hm )
        ax_hm.tick_params(length=0)
        ax_hm_y2.tick_params(length=0)
        #ax_hm.set_xlim([0,self.ns])
        ax_cm = plt.subplot(gs[3], axisbg = 'r', frameon = False)
        #fig.colorbar(im, ax_cm, orientation = 'horizontal', spacing = 'proportional', format = ticker.LogFormatterMathtext() )
        fig.colorbar(im, ax_cm, orientation = 'horizontal', spacing='proportional' if self.args.sqrt_scale else 'uniform' ) # , format = ticker.LogFormatterMathtext() )

        if not self.args.no_sclustering:
            ax_den_top = plt.subplot(gs[11], axisbg = 'r', frameon = False)
            sph._plot_dendrogram( self.sdendrogram['icoord'], self.sdendrogram['dcoord'], self.sdendrogram['ivl'],
                                  self.ns + 1, self.nf + 1, 1, 'top', no_labels=True,
                                  color_list=self.sdendrogram['color_list'] )
            ymax = max([max(a) for a in self.sdendrogram['dcoord']])
            ax_den_top.set_ylim([0,ymax])
            make_ticklabels_invisible( ax_den_top )
        if not self.args.no_fclustering:
            ax_den_right = plt.subplot(gs[22], axisbg = 'b', frameon = False)
            sph._plot_dendrogram(   self.fdendrogram['icoord'], self.fdendrogram['dcoord'], self.fdendrogram['ivl'],
                                    self.ns + 1, self.nf + 1, 1, 'right', no_labels=True,
                                    color_list=self.fdendrogram['color_list'] )
            xmax = max([max(a) for a in self.fdendrogram['dcoord']])
            ax_den_right.set_xlim([xmax,0])
            make_ticklabels_invisible( ax_den_right )


        if not self.args.out:
            plt.show( )
        else:
            fig.savefig( self.args.out, bbox_inches='tight', dpi = self.args.dpi )
            if maps:
                self.make_legend( maps, fnames_meta, self.args.legend_file )


class ReadCmd:

    def __init__( self ):
        import argparse as ap
        import textwrap

        p = ap.ArgumentParser( description= "TBA" )
        arg = p.add_argument

        arg( '-i', '--inp', '--in', metavar='INPUT_FILE', type=str, nargs='?', default=sys.stdin,
             help= "The input matrix" )
        arg( '-o', '--out', metavar='OUTPUT_FILE', type=str, nargs='?', default=None,
             help= "The output image file [image on screen of not specified]" )
        arg( '--legend_file', metavar='LEGEND_FILE', type=str, nargs='?', default=None,
             help= "The output file for the legend of the provided metadata" )

        input_types = [DataMatrix.datatype,DistMatrix.datatype]
        arg( '-t', '--input_type', metavar='INPUT_TYPE', type=str, choices = input_types,
             default='data_matrix',
             help= "The input type can be a data matrix or distance matrix [default data_matrix]" )

        DataMatrix.input_parameters( p )
        DistMatrix.input_parameters( p )
        HClustering.input_parameters( p )
        Heatmap.input_parameters( p )

        self.args  = p.parse_args()

    def check_consistency( self ):
        pass

    def get_args( self ):
        return self.args

if __name__ == '__main__':

    read = ReadCmd( )
    read.check_consistency()
    args = read.get_args()

    if args.input_type == DataMatrix.datatype:
        dm = DataMatrix( args.inp, args )
        if args.out_table:
            dm.save_matrix( args.out_table )

        distm = DistMatrix( dm.get_numpy_matrix(), args = args )
        if not args.no_sclustering:
            distm.compute_s_dists()
        if not args.no_fclustering:
            distm.compute_f_dists()
    elif args.input_type == DataMatrix.datatype:
        # distm = read...
        pass
    else:
        pass

    cl = HClustering( distm.get_s_dm(), distm.get_f_dm(), args = args )
    if not args.no_sclustering:
        cl.shcluster()
    if not args.no_fclustering:
        cl.fhcluster()

    hmp = dm.get_numpy_matrix()
    fnames = dm.get_fnames()
    snames = dm.get_snames()
    fnames_meta = snames.names[1:]
    #if not args.no_sclustering or not args.no_fclustering ):

    hmp = cl.get_reordered_matrix( hmp, sclustering = not args.no_sclustering, fclustering = not args.no_fclustering  )
    if not args.no_sclustering:
        snames = cl.get_reordered_sample_labels( snames )
    if not args.no_fclustering:
        fnames = cl.get_reordered_feature_labels( fnames )
    else:
        fnames = fnames[::-1]

    hm = Heatmap( hmp, cl.sdendrogram, cl.fdendrogram, snames, fnames, fnames_meta, args = args )
    hm.draw()
author	george-weingart
date	Sat, 06 Sep 2014 15:42:27 -0400
parents	cac6247cb1d3
children