diff abundanceTable_to_biom/abundanceTable_to_biom.py @ 0:5ac3be5b1d12 draft

Uploaded
author czouaoui
date Tue, 20 Mar 2018 04:21:57 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abundanceTable_to_biom/abundanceTable_to_biom.py	Tue Mar 20 04:21:57 2018 -0400
@@ -0,0 +1,66 @@
+import datetime
+import argparse
+import csv
+import json
+
+def __main__():
+    print "Generating biom file ..."
+    options = argsParser()
+    families, samples, data = abundanceTableParser(options.file)
+    biom_file = constructBiom(families, samples, data)
+    file_name = options.out_name + '.biom' if options.out_name else 'result.biom'
+    with open(file_name, 'w') as output:
+    	json.dump(biom_file, output)
+    print "Your biom file (" + file_name + ") is now created"
+
+def argsParser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f", "--file", type=argparse.FileType('r'), help="Abundance table in csv format")
+    parser.add_argument("-o", "--out_name", help="BIOM (v1.0) file name")
+
+    return parser.parse_args()
+
+def abundanceTableParser(file):
+    with file:
+        reader = csv.reader(file)
+
+	families = []
+        for i, row in enumerate(reader):
+	    if i != 0:
+                families.append({"id":"OTU_" + str(i), "metadata":{"taxonomy":["k__", "p__", "c__", "o__", 
+		"f__" + row[0], "g__", "s__"]}})
+
+	file.seek(0)
+	first_row = next(reader)
+	samples = []
+        for i in range(1, len(first_row)):
+            samples.append({"id":first_row[i], "metadata":{"phinchID":first_row[i]}})
+
+	file.seek(0)
+	data = []
+        for i, row in enumerate(reader):
+	    if i != 0:
+		for j in range(1, len(row)):
+	            if row[j] != 'NA':
+		    	data.append([i - 1, j - 1, int(row[j])])
+    
+    return families, samples, data
+
+def constructBiom(families, samples, data):
+    biom = {"id":"null",
+	    "format": "Biological Observation Matrix 0.9.1-dev",
+            "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html",
+            "type": "OTU table",
+            "generated_by": "abundanceTable_to_biom script",
+ 	    "date": datetime.date.today().isoformat(),
+ 	    "matrix_type": "sparse",
+ 	    "matrix_element_type": "int",
+ 	    "shape": [len(families), len(samples)],
+	    "rows":families,
+            "columns":samples,
+            "data":data}
+    return biom
+    
+
+if __name__ == "__main__":
+    __main__()