| 
0
 | 
     1 import datetime
 | 
| 
 | 
     2 import argparse
 | 
| 
 | 
     3 import csv
 | 
| 
 | 
     4 import json
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 def __main__():
 | 
| 
 | 
     7     print "Generating biom file ..."
 | 
| 
 | 
     8     options = argsParser()
 | 
| 
 | 
     9     families, samples, data = abundanceTableParser(options.file)
 | 
| 
 | 
    10     biom_file = constructBiom(families, samples, data)
 | 
| 
 | 
    11     file_name = options.out_name + '.biom' if options.out_name else 'result.biom'
 | 
| 
 | 
    12     with open(file_name, 'w') as output:
 | 
| 
 | 
    13     	json.dump(biom_file, output)
 | 
| 
 | 
    14     print "Your biom file (" + file_name + ") is now created"
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 def argsParser():
 | 
| 
 | 
    17     parser = argparse.ArgumentParser()
 | 
| 
 | 
    18     parser.add_argument("-f", "--file", type=argparse.FileType('r'), help="Abundance table in csv format")
 | 
| 
 | 
    19     parser.add_argument("-o", "--out_name", help="BIOM (v1.0) file name")
 | 
| 
 | 
    20 
 | 
| 
 | 
    21     return parser.parse_args()
 | 
| 
 | 
    22 
 | 
| 
 | 
    23 def abundanceTableParser(file):
 | 
| 
 | 
    24     with file:
 | 
| 
 | 
    25         reader = csv.reader(file)
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 	families = []
 | 
| 
 | 
    28         for i, row in enumerate(reader):
 | 
| 
 | 
    29 	    if i != 0:
 | 
| 
 | 
    30                 families.append({"id":"OTU_" + str(i), "metadata":{"taxonomy":["k__", "p__", "c__", "o__", 
 | 
| 
 | 
    31 		"f__" + row[0], "g__", "s__"]}})
 | 
| 
 | 
    32 
 | 
| 
 | 
    33 	file.seek(0)
 | 
| 
 | 
    34 	first_row = next(reader)
 | 
| 
 | 
    35 	samples = []
 | 
| 
 | 
    36         for i in range(1, len(first_row)):
 | 
| 
 | 
    37             samples.append({"id":first_row[i], "metadata":{"phinchID":first_row[i]}})
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 	file.seek(0)
 | 
| 
 | 
    40 	data = []
 | 
| 
 | 
    41         for i, row in enumerate(reader):
 | 
| 
 | 
    42 	    if i != 0:
 | 
| 
 | 
    43 		for j in range(1, len(row)):
 | 
| 
 | 
    44 	            if row[j] != 'NA':
 | 
| 
 | 
    45 		    	data.append([i - 1, j - 1, int(row[j])])
 | 
| 
 | 
    46     
 | 
| 
 | 
    47     return families, samples, data
 | 
| 
 | 
    48 
 | 
| 
 | 
    49 def constructBiom(families, samples, data):
 | 
| 
 | 
    50     biom = {"id":"null",
 | 
| 
 | 
    51 	    "format": "Biological Observation Matrix 0.9.1-dev",
 | 
| 
 | 
    52             "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html",
 | 
| 
 | 
    53             "type": "OTU table",
 | 
| 
 | 
    54             "generated_by": "abundanceTable_to_biom script",
 | 
| 
 | 
    55  	    "date": datetime.date.today().isoformat(),
 | 
| 
 | 
    56  	    "matrix_type": "sparse",
 | 
| 
 | 
    57  	    "matrix_element_type": "int",
 | 
| 
 | 
    58  	    "shape": [len(families), len(samples)],
 | 
| 
 | 
    59 	    "rows":families,
 | 
| 
 | 
    60             "columns":samples,
 | 
| 
 | 
    61             "data":data}
 | 
| 
 | 
    62     return biom
 | 
| 
 | 
    63     
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 if __name__ == "__main__":
 | 
| 
 | 
    66     __main__()
 |