0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import sys
|
|
5 import csv
|
|
6
|
|
7 from collections import Counter
|
|
8
|
|
9 from kraken_visualize import readTaxonomyNames, readTaxonomyNodes, getDescendents
|
|
10
|
|
11
|
|
12 taxons = readTaxonomyNames(os.path.join(sys.argv[2], 'names.dmp'))
|
|
13
|
|
14 taxID = int(sys.argv[3])
|
|
15 validTaxons = getDescendents(taxID, readTaxonomyNodes(os.path.join(sys.argv[2], 'nodes.dmp'))[1])
|
|
16
|
|
17 c = Counter([int(row[2])
|
|
18 for row in csv.reader(open(sys.argv[1]), delimiter='\t', quotechar='"')])
|
|
19
|
|
20 N = float(sum(c.values()))
|
|
21 ct = 0
|
|
22 for k in sorted(c, key=lambda x:c[x], reverse=True):
|
|
23 if k in validTaxons:
|
|
24 print k, taxons.get(k), c[k], '%.10f' % (c[k]/N)
|
|
25 ct += c[k]
|
|
26 # print k, taxons.get(k, 'N/A'), c[k], 'VALID' if k in validTaxons else ''
|
|
27 print ct, ct/N
|