diff kraken_count.py @ 14:fd27c97c8366 draft default tip

Uploaded
author cschu
date Mon, 18 May 2015 15:55:47 -0400
parents 0916697409ea
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken_count.py	Mon May 18 15:55:47 2015 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import csv
+
+from collections import Counter
+
+from kraken_visualize import readTaxonomyNames, readTaxonomyNodes, getDescendents
+
+
+taxons = readTaxonomyNames(os.path.join(sys.argv[2], 'names.dmp'))
+
+taxID = int(sys.argv[3])
+validTaxons = getDescendents(taxID, readTaxonomyNodes(os.path.join(sys.argv[2], 'nodes.dmp'))[1])
+
+c = Counter([int(row[2]) 
+	         for row in csv.reader(open(sys.argv[1]), delimiter='\t', quotechar='"')])
+
+N = float(sum(c.values()))
+ct = 0
+for k in sorted(c, key=lambda x:c[x], reverse=True):
+	if k in validTaxons:
+		print k, taxons.get(k), c[k], '%.10f' % (c[k]/N)
+		ct += c[k]
+	# print k, taxons.get(k, 'N/A'), c[k], 'VALID' if k in validTaxons else ''
+print ct, ct/N