Mercurial > repos > sauria > create_correlation_matrix
diff correlation_matrix.py @ 2:f0c8cdd78e28 draft
Uploaded
author | sauria |
---|---|
date | Thu, 27 Apr 2017 12:37:59 -0400 |
parents | |
children | 89009e9b7eb0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/correlation_matrix.py Thu Apr 27 12:37:59 2017 -0400 @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import argparse + +import numpy +import scipy.stats + +def main(): + parser = generate_parser() + args = parser.parse_args() + data, names = load_data(args) + corr = find_correlations(data, args) + save_data(corr, names, args) + +def load_data(args): + infile = open(args.input) + names = [] + data = [] + if args.column: + temp = infile.readline() + temp = infile.readline() + if args.int: + dtype = int + else: + dtype = float + while temp: + temp = temp.split() + if args.row: + names.append(temp[0]) + temp = temp[1:] + data.append([]) + for i in range(len(temp)): + data[-1].append(dtype(temp[i])) + temp = infile.readline() + if len(names) == 0: + names = None + data = numpy.array(data) + if args.features: + data = data.T + return data, names + +def find_correlations(data, args): + corr = numpy.ones((data.shape[0], data.shape[0]), dtype=numpy.float32) + if args.test == 'pearson': + findcorr = scipy.stats.pearsonr + elif args.test == 'spearman': + findcorr = scipy.stats.spearmanr + else: + findcorr = scipy.stats.kendalltau + for i in range(data.shape[0] - 1): + for j in range(i + 1, data.shape[0]): + corr[i, j] = findcorr(data[i, :], data[j, :])[0] + corr[j, i] = corr[i, j] + return corr + +def save_data(data, names, args): + output = open(args.output, 'w') + if names is not None: + output.write("%s\n" % '\t'.join(['sample'] + names)) + for i in range(data.shape[0]): + if names is not None: + temp = [names[i]] + else: + temp = [] + for j in range(data.shape[1]): + temp.append("%0.6f" % data[i, j]) + output.write("%s\n" % '\t'.join(temp)) + output.close() + +def generate_parser(): + """Generate an argument parser.""" + description = "%(prog)s -- Create a raw file of paired aligned reads for a HiC experiment from bam files" + parser = argparse.ArgumentParser(description=description) + parser.add_argument('-f', dest="features", action='store_true', help="Rows represent features.") + parser.add_argument('-i', dest='int', action='store_true', help="Data is of type int.") + parser.add_argument('-t', dest='test', action='store', default='pearson', + choices=['spearman', 'pearson', 'kendall'], help="Type of correlation to perform.") + parser.add_argument('-r', dest='row', action='store_true', help="Row names present.") + parser.add_argument('-c', dest='column', action='store_true', help="Column names present.") + parser.add_argument(dest="input", type=str, action='store', help="Text files conatining table to be correlated.") + parser.add_argument(dest="output", type=str, action='store', help="Output destination.") + return parser + +if __name__ == "__main__": + main() \ No newline at end of file