view correlation_matrix.py @ 3:89009e9b7eb0 draft

Uploaded
author sauria
date Thu, 27 Apr 2017 17:28:27 -0400
parents f0c8cdd78e28
children 4a76c97c3dd0
line wrap: on
line source

#!/usr/bin/env python

import argparse

import numpy
import scipy.stats

def main():
    parser = generate_parser()
    args = parser.parse_args()
    data, rnames = load_data(args)
    corr = find_correlations(data, args)
    save_data(corr, names, args)

def load_data(args):
    infile = open(args.input)
    names = []
    cnames = None
    data = []
    if args.column:
        cnames = infile.readline().split()
        if args.rows:
            cnames = cnames[1:]
    temp = infile.readline()
    if args.int:
        dtype = int
    else:
        dtype = float
    while temp:
        temp = temp.split()
        if args.row:
            names.append(temp[0])
            temp = temp[1:]
        data.append([])
        for i in range(len(temp)):
            data[-1].append(dtype(temp[i]))
        temp = infile.readline()
    if len(names) == 0:
        names = None
    data = numpy.array(data)
    if args.features:
        data = data.T
        names = cnames
    return data, names, cnames

def find_correlations(data, args):
    corr = numpy.ones((data.shape[0], data.shape[0]), dtype=numpy.float32)
    if args.test == 'pearson':
        findcorr = scipy.stats.pearsonr
    elif args.test == 'spearman':
        findcorr = scipy.stats.spearmanr
    else:
        findcorr = scipy.stats.kendalltau
    for i in range(data.shape[0] - 1):
        for j in range(i + 1, data.shape[0]):
            corr[i, j] = findcorr(data[i, :], data[j, :])[0]
            corr[j, i] = corr[i, j]
    return corr

def save_data(data, names, args):
    output = open(args.output, 'w')
    if names is not None:
        output.write("%s\n" % '\t'.join(['sample'] + names))
    for i in range(data.shape[0]):
        if names is not None:
            temp = [names[i]]
        else:
            temp = []
        for j in range(data.shape[1]):
            temp.append("%0.6f" % data[i, j])
        output.write("%s\n" % '\t'.join(temp))
    output.close()

def generate_parser():
    """Generate an argument parser."""
    description = "%(prog)s -- Create a raw file of paired aligned reads for a HiC experiment from bam files"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-f', dest="features",  action='store_true', help="Rows represent features.")
    parser.add_argument('-i', dest='int', action='store_true', help="Data is of type int.")
    parser.add_argument('-t', dest='test', action='store', default='pearson',
        choices=['spearman', 'pearson', 'kendall'], help="Type of correlation to perform.")
    parser.add_argument('-r', dest='row', action='store_true', help="Row names present.")
    parser.add_argument('-c', dest='column', action='store_true', help="Column names present.")
    parser.add_argument(dest="input", type=str, action='store', help="Text files conatining table to be correlated.")
    parser.add_argument(dest="output", type=str, action='store', help="Output destination.")
    return parser

if __name__ == "__main__":
    main()