diff correlation_matrix.py @ 2:f0c8cdd78e28 draft

Uploaded
author sauria
date Thu, 27 Apr 2017 12:37:59 -0400
parents
children 89009e9b7eb0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/correlation_matrix.py	Thu Apr 27 12:37:59 2017 -0400
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import argparse
+
+import numpy
+import scipy.stats
+
+def main():
+    parser = generate_parser()
+    args = parser.parse_args()
+    data, names = load_data(args)
+    corr = find_correlations(data, args)
+    save_data(corr, names, args)
+
+def load_data(args):
+    infile = open(args.input)
+    names = []
+    data = []
+    if args.column:
+        temp = infile.readline()
+    temp = infile.readline()
+    if args.int:
+        dtype = int
+    else:
+        dtype = float
+    while temp:
+        temp = temp.split()
+        if args.row:
+            names.append(temp[0])
+            temp = temp[1:]
+        data.append([])
+        for i in range(len(temp)):
+            data[-1].append(dtype(temp[i]))
+        temp = infile.readline()
+    if len(names) == 0:
+        names = None
+    data = numpy.array(data)
+    if args.features:
+        data = data.T
+    return data, names
+
+def find_correlations(data, args):
+    corr = numpy.ones((data.shape[0], data.shape[0]), dtype=numpy.float32)
+    if args.test == 'pearson':
+        findcorr = scipy.stats.pearsonr
+    elif args.test == 'spearman':
+        findcorr = scipy.stats.spearmanr
+    else:
+        findcorr = scipy.stats.kendalltau
+    for i in range(data.shape[0] - 1):
+        for j in range(i + 1, data.shape[0]):
+            corr[i, j] = findcorr(data[i, :], data[j, :])[0]
+            corr[j, i] = corr[i, j]
+    return corr
+
+def save_data(data, names, args):
+    output = open(args.output, 'w')
+    if names is not None:
+        output.write("%s\n" % '\t'.join(['sample'] + names))
+    for i in range(data.shape[0]):
+        if names is not None:
+            temp = [names[i]]
+        else:
+            temp = []
+        for j in range(data.shape[1]):
+            temp.append("%0.6f" % data[i, j])
+        output.write("%s\n" % '\t'.join(temp))
+    output.close()
+
+def generate_parser():
+    """Generate an argument parser."""
+    description = "%(prog)s -- Create a raw file of paired aligned reads for a HiC experiment from bam files"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('-f', dest="features",  action='store_true', help="Rows represent features.")
+    parser.add_argument('-i', dest='int', action='store_true', help="Data is of type int.")
+    parser.add_argument('-t', dest='test', action='store', default='pearson',
+        choices=['spearman', 'pearson', 'kendall'], help="Type of correlation to perform.")
+    parser.add_argument('-r', dest='row', action='store_true', help="Row names present.")
+    parser.add_argument('-c', dest='column', action='store_true', help="Column names present.")
+    parser.add_argument(dest="input", type=str, action='store', help="Text files conatining table to be correlated.")
+    parser.add_argument(dest="output", type=str, action='store', help="Output destination.")
+    return parser
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file