view abundanceTable_to_biom/abundanceTable_to_biom.py @ 2:613e054cad7f draft

Uploaded
author czouaoui
date Tue, 20 Mar 2018 04:26:35 -0400
parents 5ac3be5b1d12
children
line wrap: on
line source

import datetime
import argparse
import csv
import json

def __main__():
    print "Generating biom file ..."
    options = argsParser()
    families, samples, data = abundanceTableParser(options.file)
    biom_file = constructBiom(families, samples, data)
    file_name = options.out_name + '.biom' if options.out_name else 'result.biom'
    with open(file_name, 'w') as output:
    	json.dump(biom_file, output)
    print "Your biom file (" + file_name + ") is now created"

def argsParser():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--file", type=argparse.FileType('r'), help="Abundance table in csv format")
    parser.add_argument("-o", "--out_name", help="BIOM (v1.0) file name")

    return parser.parse_args()

def abundanceTableParser(file):
    with file:
        reader = csv.reader(file)

	families = []
        for i, row in enumerate(reader):
	    if i != 0:
                families.append({"id":"OTU_" + str(i), "metadata":{"taxonomy":["k__", "p__", "c__", "o__", 
		"f__" + row[0], "g__", "s__"]}})

	file.seek(0)
	first_row = next(reader)
	samples = []
        for i in range(1, len(first_row)):
            samples.append({"id":first_row[i], "metadata":{"phinchID":first_row[i]}})

	file.seek(0)
	data = []
        for i, row in enumerate(reader):
	    if i != 0:
		for j in range(1, len(row)):
	            if row[j] != 'NA':
		    	data.append([i - 1, j - 1, int(row[j])])
    
    return families, samples, data

def constructBiom(families, samples, data):
    biom = {"id":"null",
	    "format": "Biological Observation Matrix 0.9.1-dev",
            "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html",
            "type": "OTU table",
            "generated_by": "abundanceTable_to_biom script",
 	    "date": datetime.date.today().isoformat(),
 	    "matrix_type": "sparse",
 	    "matrix_element_type": "int",
 	    "shape": [len(families), len(samples)],
	    "rows":families,
            "columns":samples,
            "data":data}
    return biom
    

if __name__ == "__main__":
    __main__()