Mercurial > repos > czouaoui > abundance_table_to_biom1
diff abundanceTable_to_biom/abundanceTable_to_biom.py @ 0:5ac3be5b1d12 draft
Uploaded
author | czouaoui |
---|---|
date | Tue, 20 Mar 2018 04:21:57 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abundanceTable_to_biom/abundanceTable_to_biom.py Tue Mar 20 04:21:57 2018 -0400 @@ -0,0 +1,66 @@ +import datetime +import argparse +import csv +import json + +def __main__(): + print "Generating biom file ..." + options = argsParser() + families, samples, data = abundanceTableParser(options.file) + biom_file = constructBiom(families, samples, data) + file_name = options.out_name + '.biom' if options.out_name else 'result.biom' + with open(file_name, 'w') as output: + json.dump(biom_file, output) + print "Your biom file (" + file_name + ") is now created" + +def argsParser(): + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--file", type=argparse.FileType('r'), help="Abundance table in csv format") + parser.add_argument("-o", "--out_name", help="BIOM (v1.0) file name") + + return parser.parse_args() + +def abundanceTableParser(file): + with file: + reader = csv.reader(file) + + families = [] + for i, row in enumerate(reader): + if i != 0: + families.append({"id":"OTU_" + str(i), "metadata":{"taxonomy":["k__", "p__", "c__", "o__", + "f__" + row[0], "g__", "s__"]}}) + + file.seek(0) + first_row = next(reader) + samples = [] + for i in range(1, len(first_row)): + samples.append({"id":first_row[i], "metadata":{"phinchID":first_row[i]}}) + + file.seek(0) + data = [] + for i, row in enumerate(reader): + if i != 0: + for j in range(1, len(row)): + if row[j] != 'NA': + data.append([i - 1, j - 1, int(row[j])]) + + return families, samples, data + +def constructBiom(families, samples, data): + biom = {"id":"null", + "format": "Biological Observation Matrix 0.9.1-dev", + "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html", + "type": "OTU table", + "generated_by": "abundanceTable_to_biom script", + "date": datetime.date.today().isoformat(), + "matrix_type": "sparse", + "matrix_element_type": "int", + "shape": [len(families), len(samples)], + "rows":families, + "columns":samples, + "data":data} + return biom + + +if __name__ == "__main__": + __main__()