0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys
|
|
4
|
|
5 def readTaxonomyNames(names_dmp):
|
|
6 nodeNames = []
|
|
7 for line in open(names_dmp):
|
|
8 line = line.strip().strip('|').strip()
|
|
9 if not line: break
|
|
10 line = line.split('\t|\t')
|
|
11 if line[3].strip() == 'scientific name':
|
|
12 nodeNames.append((int(line[0].strip()), line[1].strip()))
|
|
13 pass
|
|
14 return dict(nodeNames)
|
|
15
|
|
16 def readTaxonomyNodes(nodes_dmp):
|
|
17 nodeRanks = []
|
|
18 nodeChildren = {}
|
|
19 nodeParents = {}
|
|
20 for line in open(nodes_dmp):
|
|
21 line = line.strip().strip('|').strip()
|
|
22 if not line: break
|
|
23 line = map(lambda x:x.strip(), line.split('\t|\t'))
|
|
24 line[:2] = map(int, line[:2])
|
|
25 if line[0] == 1:
|
|
26 line[1] = 1
|
|
27
|
|
28 nodeParents[line[0]] = line[1]
|
|
29 try:
|
|
30 nodeChildren[line[1]].add(line[0])
|
|
31 except:
|
|
32 nodeChildren[line[1]] = set([line[0]])
|
|
33 nodeRanks.append((line[0], line[2]))
|
|
34
|
|
35 return dict(nodeRanks), nodeChildren, nodeParents
|
|
36
|
|
37 def getDescendents(taxID, tree):
|
|
38 descendents = set([taxID])
|
|
39 queue = [taxID]
|
|
40 while queue:
|
|
41 node = queue.pop()
|
|
42
|
|
43 children = tree.get(node, set())
|
|
44 if children:
|
|
45 descendents = descendents.union(children)
|
|
46 queue.extend(children)
|
|
47 pass
|
|
48 return descendents |