Mercurial > repos > sauria > create_correlation_matrix
view correlation_matrix.py @ 3:89009e9b7eb0 draft
Uploaded
author | sauria |
---|---|
date | Thu, 27 Apr 2017 17:28:27 -0400 |
parents | f0c8cdd78e28 |
children | 4a76c97c3dd0 |
line wrap: on
line source
#!/usr/bin/env python import argparse import numpy import scipy.stats def main(): parser = generate_parser() args = parser.parse_args() data, rnames = load_data(args) corr = find_correlations(data, args) save_data(corr, names, args) def load_data(args): infile = open(args.input) names = [] cnames = None data = [] if args.column: cnames = infile.readline().split() if args.rows: cnames = cnames[1:] temp = infile.readline() if args.int: dtype = int else: dtype = float while temp: temp = temp.split() if args.row: names.append(temp[0]) temp = temp[1:] data.append([]) for i in range(len(temp)): data[-1].append(dtype(temp[i])) temp = infile.readline() if len(names) == 0: names = None data = numpy.array(data) if args.features: data = data.T names = cnames return data, names, cnames def find_correlations(data, args): corr = numpy.ones((data.shape[0], data.shape[0]), dtype=numpy.float32) if args.test == 'pearson': findcorr = scipy.stats.pearsonr elif args.test == 'spearman': findcorr = scipy.stats.spearmanr else: findcorr = scipy.stats.kendalltau for i in range(data.shape[0] - 1): for j in range(i + 1, data.shape[0]): corr[i, j] = findcorr(data[i, :], data[j, :])[0] corr[j, i] = corr[i, j] return corr def save_data(data, names, args): output = open(args.output, 'w') if names is not None: output.write("%s\n" % '\t'.join(['sample'] + names)) for i in range(data.shape[0]): if names is not None: temp = [names[i]] else: temp = [] for j in range(data.shape[1]): temp.append("%0.6f" % data[i, j]) output.write("%s\n" % '\t'.join(temp)) output.close() def generate_parser(): """Generate an argument parser.""" description = "%(prog)s -- Create a raw file of paired aligned reads for a HiC experiment from bam files" parser = argparse.ArgumentParser(description=description) parser.add_argument('-f', dest="features", action='store_true', help="Rows represent features.") parser.add_argument('-i', dest='int', action='store_true', help="Data is of type int.") parser.add_argument('-t', dest='test', action='store', default='pearson', choices=['spearman', 'pearson', 'kendall'], help="Type of correlation to perform.") parser.add_argument('-r', dest='row', action='store_true', help="Row names present.") parser.add_argument('-c', dest='column', action='store_true', help="Column names present.") parser.add_argument(dest="input", type=str, action='store', help="Text files conatining table to be correlated.") parser.add_argument(dest="output", type=str, action='store', help="Output destination.") return parser if __name__ == "__main__": main()