annotate abundanceTable_to_biom/abundanceTable_to_biom.py @ 0:5ac3be5b1d12 draft

Uploaded
author czouaoui
date Tue, 20 Mar 2018 04:21:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
1 import datetime
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
2 import argparse
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
3 import csv
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
4 import json
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
5
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
6 def __main__():
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
7 print "Generating biom file ..."
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
8 options = argsParser()
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
9 families, samples, data = abundanceTableParser(options.file)
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
10 biom_file = constructBiom(families, samples, data)
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
11 file_name = options.out_name + '.biom' if options.out_name else 'result.biom'
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
12 with open(file_name, 'w') as output:
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
13 json.dump(biom_file, output)
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
14 print "Your biom file (" + file_name + ") is now created"
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
15
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
16 def argsParser():
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
17 parser = argparse.ArgumentParser()
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
18 parser.add_argument("-f", "--file", type=argparse.FileType('r'), help="Abundance table in csv format")
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
19 parser.add_argument("-o", "--out_name", help="BIOM (v1.0) file name")
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
20
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
21 return parser.parse_args()
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
22
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
23 def abundanceTableParser(file):
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
24 with file:
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
25 reader = csv.reader(file)
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
26
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
27 families = []
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
28 for i, row in enumerate(reader):
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
29 if i != 0:
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
30 families.append({"id":"OTU_" + str(i), "metadata":{"taxonomy":["k__", "p__", "c__", "o__",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
31 "f__" + row[0], "g__", "s__"]}})
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
32
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
33 file.seek(0)
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
34 first_row = next(reader)
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
35 samples = []
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
36 for i in range(1, len(first_row)):
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
37 samples.append({"id":first_row[i], "metadata":{"phinchID":first_row[i]}})
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
38
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
39 file.seek(0)
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
40 data = []
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
41 for i, row in enumerate(reader):
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
42 if i != 0:
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
43 for j in range(1, len(row)):
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
44 if row[j] != 'NA':
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
45 data.append([i - 1, j - 1, int(row[j])])
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
46
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
47 return families, samples, data
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
48
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
49 def constructBiom(families, samples, data):
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
50 biom = {"id":"null",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
51 "format": "Biological Observation Matrix 0.9.1-dev",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
52 "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
53 "type": "OTU table",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
54 "generated_by": "abundanceTable_to_biom script",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
55 "date": datetime.date.today().isoformat(),
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
56 "matrix_type": "sparse",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
57 "matrix_element_type": "int",
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
58 "shape": [len(families), len(samples)],
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
59 "rows":families,
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
60 "columns":samples,
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
61 "data":data}
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
62 return biom
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
63
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
64
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
65 if __name__ == "__main__":
5ac3be5b1d12 Uploaded
czouaoui
parents:
diff changeset
66 __main__()