annotate correlation_matrix.py @ 4:4a76c97c3dd0 draft default tip

Uploaded
author sauria
date Thu, 27 Apr 2017 17:32:19 -0400
parents 89009e9b7eb0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
1 #!/usr/bin/env python
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
2
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
3 import argparse
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
4
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
5 import numpy
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
6 import scipy.stats
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
7
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
8 def main():
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
9 parser = generate_parser()
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
10 args = parser.parse_args()
4
4a76c97c3dd0 Uploaded
sauria
parents: 3
diff changeset
11 data, names = load_data(args)
2
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
12 corr = find_correlations(data, args)
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
13 save_data(corr, names, args)
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
14
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
15 def load_data(args):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
16 infile = open(args.input)
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
17 names = []
3
89009e9b7eb0 Uploaded
sauria
parents: 2
diff changeset
18 cnames = None
2
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
19 data = []
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
20 if args.column:
3
89009e9b7eb0 Uploaded
sauria
parents: 2
diff changeset
21 cnames = infile.readline().split()
4
4a76c97c3dd0 Uploaded
sauria
parents: 3
diff changeset
22 if args.row:
3
89009e9b7eb0 Uploaded
sauria
parents: 2
diff changeset
23 cnames = cnames[1:]
2
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
24 temp = infile.readline()
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
25 if args.int:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
26 dtype = int
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
27 else:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
28 dtype = float
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
29 while temp:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
30 temp = temp.split()
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
31 if args.row:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
32 names.append(temp[0])
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
33 temp = temp[1:]
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
34 data.append([])
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
35 for i in range(len(temp)):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
36 data[-1].append(dtype(temp[i]))
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
37 temp = infile.readline()
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
38 if len(names) == 0:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
39 names = None
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
40 data = numpy.array(data)
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
41 if args.features:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
42 data = data.T
3
89009e9b7eb0 Uploaded
sauria
parents: 2
diff changeset
43 names = cnames
4
4a76c97c3dd0 Uploaded
sauria
parents: 3
diff changeset
44 return data, names
2
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
45
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
46 def find_correlations(data, args):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
47 corr = numpy.ones((data.shape[0], data.shape[0]), dtype=numpy.float32)
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
48 if args.test == 'pearson':
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
49 findcorr = scipy.stats.pearsonr
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
50 elif args.test == 'spearman':
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
51 findcorr = scipy.stats.spearmanr
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
52 else:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
53 findcorr = scipy.stats.kendalltau
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
54 for i in range(data.shape[0] - 1):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
55 for j in range(i + 1, data.shape[0]):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
56 corr[i, j] = findcorr(data[i, :], data[j, :])[0]
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
57 corr[j, i] = corr[i, j]
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
58 return corr
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
59
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
60 def save_data(data, names, args):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
61 output = open(args.output, 'w')
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
62 if names is not None:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
63 output.write("%s\n" % '\t'.join(['sample'] + names))
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
64 for i in range(data.shape[0]):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
65 if names is not None:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
66 temp = [names[i]]
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
67 else:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
68 temp = []
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
69 for j in range(data.shape[1]):
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
70 temp.append("%0.6f" % data[i, j])
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
71 output.write("%s\n" % '\t'.join(temp))
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
72 output.close()
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
73
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
74 def generate_parser():
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
75 """Generate an argument parser."""
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
76 description = "%(prog)s -- Create a raw file of paired aligned reads for a HiC experiment from bam files"
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
77 parser = argparse.ArgumentParser(description=description)
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
78 parser.add_argument('-f', dest="features", action='store_true', help="Rows represent features.")
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
79 parser.add_argument('-i', dest='int', action='store_true', help="Data is of type int.")
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
80 parser.add_argument('-t', dest='test', action='store', default='pearson',
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
81 choices=['spearman', 'pearson', 'kendall'], help="Type of correlation to perform.")
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
82 parser.add_argument('-r', dest='row', action='store_true', help="Row names present.")
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
83 parser.add_argument('-c', dest='column', action='store_true', help="Column names present.")
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
84 parser.add_argument(dest="input", type=str, action='store', help="Text files conatining table to be correlated.")
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
85 parser.add_argument(dest="output", type=str, action='store', help="Output destination.")
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
86 return parser
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
87
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
88 if __name__ == "__main__":
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
89 main()