0
|
1 import datetime
|
|
2 import argparse
|
|
3 import csv
|
|
4 import json
|
|
5
|
|
6 def __main__():
|
|
7 print "Generating biom file ..."
|
|
8 options = argsParser()
|
|
9 families, samples, data = abundanceTableParser(options.file)
|
|
10 biom_file = constructBiom(families, samples, data)
|
|
11 file_name = options.out_name + '.biom' if options.out_name else 'result.biom'
|
|
12 with open(file_name, 'w') as output:
|
|
13 json.dump(biom_file, output)
|
|
14 print "Your biom file (" + file_name + ") is now created"
|
|
15
|
|
16 def argsParser():
|
|
17 parser = argparse.ArgumentParser()
|
|
18 parser.add_argument("-f", "--file", type=argparse.FileType('r'), help="Abundance table in csv format")
|
|
19 parser.add_argument("-o", "--out_name", help="BIOM (v1.0) file name")
|
|
20
|
|
21 return parser.parse_args()
|
|
22
|
|
23 def abundanceTableParser(file):
|
|
24 with file:
|
|
25 reader = csv.reader(file)
|
|
26
|
|
27 families = []
|
|
28 for i, row in enumerate(reader):
|
|
29 if i != 0:
|
|
30 families.append({"id":"OTU_" + str(i), "metadata":{"taxonomy":["k__", "p__", "c__", "o__",
|
|
31 "f__" + row[0], "g__", "s__"]}})
|
|
32
|
|
33 file.seek(0)
|
|
34 first_row = next(reader)
|
|
35 samples = []
|
|
36 for i in range(1, len(first_row)):
|
|
37 samples.append({"id":first_row[i], "metadata":{"phinchID":first_row[i]}})
|
|
38
|
|
39 file.seek(0)
|
|
40 data = []
|
|
41 for i, row in enumerate(reader):
|
|
42 if i != 0:
|
|
43 for j in range(1, len(row)):
|
|
44 if row[j] != 'NA':
|
|
45 data.append([i - 1, j - 1, int(row[j])])
|
|
46
|
|
47 return families, samples, data
|
|
48
|
|
49 def constructBiom(families, samples, data):
|
|
50 biom = {"id":"null",
|
|
51 "format": "Biological Observation Matrix 0.9.1-dev",
|
|
52 "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html",
|
|
53 "type": "OTU table",
|
|
54 "generated_by": "abundanceTable_to_biom script",
|
|
55 "date": datetime.date.today().isoformat(),
|
|
56 "matrix_type": "sparse",
|
|
57 "matrix_element_type": "int",
|
|
58 "shape": [len(families), len(samples)],
|
|
59 "rows":families,
|
|
60 "columns":samples,
|
|
61 "data":data}
|
|
62 return biom
|
|
63
|
|
64
|
|
65 if __name__ == "__main__":
|
|
66 __main__()
|