2
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import argparse
|
|
4
|
|
5 import numpy
|
|
6 import scipy.stats
|
|
7
|
|
8 def main():
|
|
9 parser = generate_parser()
|
|
10 args = parser.parse_args()
|
|
11 data, names = load_data(args)
|
|
12 corr = find_correlations(data, args)
|
|
13 save_data(corr, names, args)
|
|
14
|
|
15 def load_data(args):
|
|
16 infile = open(args.input)
|
|
17 names = []
|
|
18 data = []
|
|
19 if args.column:
|
|
20 temp = infile.readline()
|
|
21 temp = infile.readline()
|
|
22 if args.int:
|
|
23 dtype = int
|
|
24 else:
|
|
25 dtype = float
|
|
26 while temp:
|
|
27 temp = temp.split()
|
|
28 if args.row:
|
|
29 names.append(temp[0])
|
|
30 temp = temp[1:]
|
|
31 data.append([])
|
|
32 for i in range(len(temp)):
|
|
33 data[-1].append(dtype(temp[i]))
|
|
34 temp = infile.readline()
|
|
35 if len(names) == 0:
|
|
36 names = None
|
|
37 data = numpy.array(data)
|
|
38 if args.features:
|
|
39 data = data.T
|
|
40 return data, names
|
|
41
|
|
42 def find_correlations(data, args):
|
|
43 corr = numpy.ones((data.shape[0], data.shape[0]), dtype=numpy.float32)
|
|
44 if args.test == 'pearson':
|
|
45 findcorr = scipy.stats.pearsonr
|
|
46 elif args.test == 'spearman':
|
|
47 findcorr = scipy.stats.spearmanr
|
|
48 else:
|
|
49 findcorr = scipy.stats.kendalltau
|
|
50 for i in range(data.shape[0] - 1):
|
|
51 for j in range(i + 1, data.shape[0]):
|
|
52 corr[i, j] = findcorr(data[i, :], data[j, :])[0]
|
|
53 corr[j, i] = corr[i, j]
|
|
54 return corr
|
|
55
|
|
56 def save_data(data, names, args):
|
|
57 output = open(args.output, 'w')
|
|
58 if names is not None:
|
|
59 output.write("%s\n" % '\t'.join(['sample'] + names))
|
|
60 for i in range(data.shape[0]):
|
|
61 if names is not None:
|
|
62 temp = [names[i]]
|
|
63 else:
|
|
64 temp = []
|
|
65 for j in range(data.shape[1]):
|
|
66 temp.append("%0.6f" % data[i, j])
|
|
67 output.write("%s\n" % '\t'.join(temp))
|
|
68 output.close()
|
|
69
|
|
70 def generate_parser():
|
|
71 """Generate an argument parser."""
|
|
72 description = "%(prog)s -- Create a raw file of paired aligned reads for a HiC experiment from bam files"
|
|
73 parser = argparse.ArgumentParser(description=description)
|
|
74 parser.add_argument('-f', dest="features", action='store_true', help="Rows represent features.")
|
|
75 parser.add_argument('-i', dest='int', action='store_true', help="Data is of type int.")
|
|
76 parser.add_argument('-t', dest='test', action='store', default='pearson',
|
|
77 choices=['spearman', 'pearson', 'kendall'], help="Type of correlation to perform.")
|
|
78 parser.add_argument('-r', dest='row', action='store_true', help="Row names present.")
|
|
79 parser.add_argument('-c', dest='column', action='store_true', help="Column names present.")
|
|
80 parser.add_argument(dest="input", type=str, action='store', help="Text files conatining table to be correlated.")
|
|
81 parser.add_argument(dest="output", type=str, action='store', help="Output destination.")
|
|
82 return parser
|
|
83
|
|
84 if __name__ == "__main__":
|
|
85 main() |