Mercurial > repos > sauria > create_correlation_matrix
comparison correlation_matrix.py @ 2:f0c8cdd78e28 draft
Uploaded
author | sauria |
---|---|
date | Thu, 27 Apr 2017 12:37:59 -0400 |
parents | |
children | 89009e9b7eb0 |
comparison
equal
deleted
inserted
replaced
1:9aeb70cf7a41 | 2:f0c8cdd78e28 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 | |
5 import numpy | |
6 import scipy.stats | |
7 | |
8 def main(): | |
9 parser = generate_parser() | |
10 args = parser.parse_args() | |
11 data, names = load_data(args) | |
12 corr = find_correlations(data, args) | |
13 save_data(corr, names, args) | |
14 | |
15 def load_data(args): | |
16 infile = open(args.input) | |
17 names = [] | |
18 data = [] | |
19 if args.column: | |
20 temp = infile.readline() | |
21 temp = infile.readline() | |
22 if args.int: | |
23 dtype = int | |
24 else: | |
25 dtype = float | |
26 while temp: | |
27 temp = temp.split() | |
28 if args.row: | |
29 names.append(temp[0]) | |
30 temp = temp[1:] | |
31 data.append([]) | |
32 for i in range(len(temp)): | |
33 data[-1].append(dtype(temp[i])) | |
34 temp = infile.readline() | |
35 if len(names) == 0: | |
36 names = None | |
37 data = numpy.array(data) | |
38 if args.features: | |
39 data = data.T | |
40 return data, names | |
41 | |
42 def find_correlations(data, args): | |
43 corr = numpy.ones((data.shape[0], data.shape[0]), dtype=numpy.float32) | |
44 if args.test == 'pearson': | |
45 findcorr = scipy.stats.pearsonr | |
46 elif args.test == 'spearman': | |
47 findcorr = scipy.stats.spearmanr | |
48 else: | |
49 findcorr = scipy.stats.kendalltau | |
50 for i in range(data.shape[0] - 1): | |
51 for j in range(i + 1, data.shape[0]): | |
52 corr[i, j] = findcorr(data[i, :], data[j, :])[0] | |
53 corr[j, i] = corr[i, j] | |
54 return corr | |
55 | |
56 def save_data(data, names, args): | |
57 output = open(args.output, 'w') | |
58 if names is not None: | |
59 output.write("%s\n" % '\t'.join(['sample'] + names)) | |
60 for i in range(data.shape[0]): | |
61 if names is not None: | |
62 temp = [names[i]] | |
63 else: | |
64 temp = [] | |
65 for j in range(data.shape[1]): | |
66 temp.append("%0.6f" % data[i, j]) | |
67 output.write("%s\n" % '\t'.join(temp)) | |
68 output.close() | |
69 | |
70 def generate_parser(): | |
71 """Generate an argument parser.""" | |
72 description = "%(prog)s -- Create a raw file of paired aligned reads for a HiC experiment from bam files" | |
73 parser = argparse.ArgumentParser(description=description) | |
74 parser.add_argument('-f', dest="features", action='store_true', help="Rows represent features.") | |
75 parser.add_argument('-i', dest='int', action='store_true', help="Data is of type int.") | |
76 parser.add_argument('-t', dest='test', action='store', default='pearson', | |
77 choices=['spearman', 'pearson', 'kendall'], help="Type of correlation to perform.") | |
78 parser.add_argument('-r', dest='row', action='store_true', help="Row names present.") | |
79 parser.add_argument('-c', dest='column', action='store_true', help="Column names present.") | |
80 parser.add_argument(dest="input", type=str, action='store', help="Text files conatining table to be correlated.") | |
81 parser.add_argument(dest="output", type=str, action='store', help="Output destination.") | |
82 return parser | |
83 | |
84 if __name__ == "__main__": | |
85 main() |