annotate kraken_visualize.py @ 6:19b55450a15b draft

Uploaded
author cschu
date Mon, 18 May 2015 15:43:25 -0400
parents 0916697409ea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0916697409ea Uploaded
cschu
parents:
diff changeset
1 #!/usr/bin/env python
0916697409ea Uploaded
cschu
parents:
diff changeset
2
0916697409ea Uploaded
cschu
parents:
diff changeset
3 import sys
0916697409ea Uploaded
cschu
parents:
diff changeset
4
0916697409ea Uploaded
cschu
parents:
diff changeset
5 def readTaxonomyNames(names_dmp):
0916697409ea Uploaded
cschu
parents:
diff changeset
6 nodeNames = []
0916697409ea Uploaded
cschu
parents:
diff changeset
7 for line in open(names_dmp):
0916697409ea Uploaded
cschu
parents:
diff changeset
8 line = line.strip().strip('|').strip()
0916697409ea Uploaded
cschu
parents:
diff changeset
9 if not line: break
0916697409ea Uploaded
cschu
parents:
diff changeset
10 line = line.split('\t|\t')
0916697409ea Uploaded
cschu
parents:
diff changeset
11 if line[3].strip() == 'scientific name':
0916697409ea Uploaded
cschu
parents:
diff changeset
12 nodeNames.append((int(line[0].strip()), line[1].strip()))
0916697409ea Uploaded
cschu
parents:
diff changeset
13 pass
0916697409ea Uploaded
cschu
parents:
diff changeset
14 return dict(nodeNames)
0916697409ea Uploaded
cschu
parents:
diff changeset
15
0916697409ea Uploaded
cschu
parents:
diff changeset
16 def readTaxonomyNodes(nodes_dmp):
0916697409ea Uploaded
cschu
parents:
diff changeset
17 nodeRanks = []
0916697409ea Uploaded
cschu
parents:
diff changeset
18 nodeChildren = {}
0916697409ea Uploaded
cschu
parents:
diff changeset
19 nodeParents = {}
0916697409ea Uploaded
cschu
parents:
diff changeset
20 for line in open(nodes_dmp):
0916697409ea Uploaded
cschu
parents:
diff changeset
21 line = line.strip().strip('|').strip()
0916697409ea Uploaded
cschu
parents:
diff changeset
22 if not line: break
0916697409ea Uploaded
cschu
parents:
diff changeset
23 line = map(lambda x:x.strip(), line.split('\t|\t'))
0916697409ea Uploaded
cschu
parents:
diff changeset
24 line[:2] = map(int, line[:2])
0916697409ea Uploaded
cschu
parents:
diff changeset
25 if line[0] == 1:
0916697409ea Uploaded
cschu
parents:
diff changeset
26 line[1] = 1
0916697409ea Uploaded
cschu
parents:
diff changeset
27
0916697409ea Uploaded
cschu
parents:
diff changeset
28 nodeParents[line[0]] = line[1]
0916697409ea Uploaded
cschu
parents:
diff changeset
29 try:
0916697409ea Uploaded
cschu
parents:
diff changeset
30 nodeChildren[line[1]].add(line[0])
0916697409ea Uploaded
cschu
parents:
diff changeset
31 except:
0916697409ea Uploaded
cschu
parents:
diff changeset
32 nodeChildren[line[1]] = set([line[0]])
0916697409ea Uploaded
cschu
parents:
diff changeset
33 nodeRanks.append((line[0], line[2]))
0916697409ea Uploaded
cschu
parents:
diff changeset
34
0916697409ea Uploaded
cschu
parents:
diff changeset
35 return dict(nodeRanks), nodeChildren, nodeParents
0916697409ea Uploaded
cschu
parents:
diff changeset
36
0916697409ea Uploaded
cschu
parents:
diff changeset
37 def getDescendents(taxID, tree):
0916697409ea Uploaded
cschu
parents:
diff changeset
38 descendents = set([taxID])
0916697409ea Uploaded
cschu
parents:
diff changeset
39 queue = [taxID]
0916697409ea Uploaded
cschu
parents:
diff changeset
40 while queue:
0916697409ea Uploaded
cschu
parents:
diff changeset
41 node = queue.pop()
0916697409ea Uploaded
cschu
parents:
diff changeset
42
0916697409ea Uploaded
cschu
parents:
diff changeset
43 children = tree.get(node, set())
0916697409ea Uploaded
cschu
parents:
diff changeset
44 if children:
0916697409ea Uploaded
cschu
parents:
diff changeset
45 descendents = descendents.union(children)
0916697409ea Uploaded
cschu
parents:
diff changeset
46 queue.extend(children)
0916697409ea Uploaded
cschu
parents:
diff changeset
47 pass
0916697409ea Uploaded
cschu
parents:
diff changeset
48 return descendents