#!/usr/bin/env python
"""matrix_cat.py: 

Usage:
  matrix_cat.py [options] file1 file2 [file3 ...]

Options:
  -h            skip header
  -i            output only features in common with all files
  -f            use 'float' mode to save memory
  -o <outfile>  Output file
  -m <missing>  Missing value string         
  -q            run quietly
"""
import os, os.path, sys, getopt, re
import array

delim = "\t"
verbose = True

def usage(code = 0):
    sys.stderr.write( __doc__ )
    if code != None: sys.exit(code)

def log(msg, die = False):
    if (verbose):
        sys.stderr.write(msg)
    if die:
        sys.exit(1)

def readFile(inFile, header = True, use_float = False):
    dataWidth = None
    dataHeader = None
    dataMap = {}
    f = open(inFile, "r")
    if header:
        line = f.readline()
        if line.isspace():
            log("ERROR: missing header\n", die = True)
        pline = re.split(delim, line.rstrip("\n\r"))
        dataHeader = pline
        dataWidth = len(pline[1:])
    for line in f:
        if line.isspace():
            continue
        pline = re.split(delim, line.rstrip("\n\r"))
        if dataWidth is None:
            dataWidth = len(pline[1:])
        assert(len(pline[1:]) == dataWidth)
        if use_float:
            out = array.array("f")
            for a in pline[1:]:
                try:
                    out.append(float(a))
                except ValueError:
                    out.append(float('nan'))
        else:
            out = pline[1:]
        dataMap[pline[0]] = out
    f.close()
    return (dataMap, dataHeader, dataWidth)

def main(args):
    ## parse arguments
    try:
        opts, args = getopt.getopt(args, "hiqfo:m:")
    except getopt.GetoptError, err:
        sys.stderr.write( str(err) + "\n" )
        usage(2)
    
    if len(args) > 0:
        files = args
    else:
        files = []
        for i in sys.stdin:
           files.append(i.rstrip("\n\r"))
    
    if len(files) < 1:
        sys.stderr.write("incorrect number of arguments\n")
        usage(1)

    header = True
    useIntersection = False
    output = None
    use_float = False
    missing = ""
    global verbose
    for o, a in opts:
        if o == "-h":
            header = False
        elif o == "-i":
            useIntersection = True
        elif o == "-q":
            verbose = False
        elif o == "-o":
            output = a
        elif o == "-f":
            use_float = True
        elif o == "-m":
            missing = a
    
    ## read files
    fileData = {}
    fileWidth = {}
    fileHeader = {}
    for file in files:
        (fileData[file], fileHeader[file], fileWidth[file]) = readFile(file, header = header, use_float = use_float)
    header_dict = {}
    for file in files:
        for f in fileHeader[file][1:]:
            header_dict[f] = True
    
    headers = list(header_dict.keys())
    if useIntersection:
        for file in files:
            features = list(set(fileHeader[file].keys()) & set(headers))
    headers.sort()
    
    if output is not None:
        ohandle = open(output, "w")
    else:
        ohandle  = sys.stdout

    ## output
    if header:
        ohandle.write("#\t%s\n" % (delim.join(headers)))
    for file in files:
        for feature in fileData[file]:
            lineElements = []
            for h in headers:
                try:
                    index = fileHeader[file].index(h)
                    lineElements.append(fileData[file][feature][index-1])
                except ValueError:
                    lineElements.append(missing)
            ohandle.write("%s\n" % (feature + delim + delim.join( (str(c) for c in lineElements)) ))

if __name__ == "__main__":
    main(sys.argv[1:])
