annotate kraken_count.py @ 14:fd27c97c8366 draft default tip

Uploaded
author cschu
date Mon, 18 May 2015 15:55:47 -0400
parents 0916697409ea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0916697409ea Uploaded
cschu
parents:
diff changeset
1 #!/usr/bin/env python
0916697409ea Uploaded
cschu
parents:
diff changeset
2
0916697409ea Uploaded
cschu
parents:
diff changeset
3 import os
0916697409ea Uploaded
cschu
parents:
diff changeset
4 import sys
0916697409ea Uploaded
cschu
parents:
diff changeset
5 import csv
0916697409ea Uploaded
cschu
parents:
diff changeset
6
0916697409ea Uploaded
cschu
parents:
diff changeset
7 from collections import Counter
0916697409ea Uploaded
cschu
parents:
diff changeset
8
0916697409ea Uploaded
cschu
parents:
diff changeset
9 from kraken_visualize import readTaxonomyNames, readTaxonomyNodes, getDescendents
0916697409ea Uploaded
cschu
parents:
diff changeset
10
0916697409ea Uploaded
cschu
parents:
diff changeset
11
0916697409ea Uploaded
cschu
parents:
diff changeset
12 taxons = readTaxonomyNames(os.path.join(sys.argv[2], 'names.dmp'))
0916697409ea Uploaded
cschu
parents:
diff changeset
13
0916697409ea Uploaded
cschu
parents:
diff changeset
14 taxID = int(sys.argv[3])
0916697409ea Uploaded
cschu
parents:
diff changeset
15 validTaxons = getDescendents(taxID, readTaxonomyNodes(os.path.join(sys.argv[2], 'nodes.dmp'))[1])
0916697409ea Uploaded
cschu
parents:
diff changeset
16
0916697409ea Uploaded
cschu
parents:
diff changeset
17 c = Counter([int(row[2])
0916697409ea Uploaded
cschu
parents:
diff changeset
18 for row in csv.reader(open(sys.argv[1]), delimiter='\t', quotechar='"')])
0916697409ea Uploaded
cschu
parents:
diff changeset
19
0916697409ea Uploaded
cschu
parents:
diff changeset
20 N = float(sum(c.values()))
0916697409ea Uploaded
cschu
parents:
diff changeset
21 ct = 0
0916697409ea Uploaded
cschu
parents:
diff changeset
22 for k in sorted(c, key=lambda x:c[x], reverse=True):
0916697409ea Uploaded
cschu
parents:
diff changeset
23 if k in validTaxons:
0916697409ea Uploaded
cschu
parents:
diff changeset
24 print k, taxons.get(k), c[k], '%.10f' % (c[k]/N)
0916697409ea Uploaded
cschu
parents:
diff changeset
25 ct += c[k]
0916697409ea Uploaded
cschu
parents:
diff changeset
26 # print k, taxons.get(k, 'N/A'), c[k], 'VALID' if k in validTaxons else ''
0916697409ea Uploaded
cschu
parents:
diff changeset
27 print ct, ct/N