diff kraken_visualize.py @ 2:df4163858937 draft

Uploaded
author cschu
date Mon, 18 May 2015 15:35:48 -0400
parents 0916697409ea
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken_visualize.py	Mon May 18 15:35:48 2015 -0400
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+import sys
+
+def readTaxonomyNames(names_dmp):
+    nodeNames = []
+    for line in open(names_dmp):
+        line = line.strip().strip('|').strip()
+        if not line: break
+        line = line.split('\t|\t')
+        if line[3].strip() == 'scientific name':
+            nodeNames.append((int(line[0].strip()), line[1].strip()))        
+        pass
+    return dict(nodeNames)
+
+def readTaxonomyNodes(nodes_dmp):    
+    nodeRanks = []
+    nodeChildren = {}
+    nodeParents = {}
+    for line in open(nodes_dmp):
+        line = line.strip().strip('|').strip()
+        if not line: break
+        line = map(lambda x:x.strip(), line.split('\t|\t'))
+        line[:2] = map(int, line[:2])
+        if line[0] == 1:
+            line[1] = 1    
+
+        nodeParents[line[0]] = line[1]
+        try:
+            nodeChildren[line[1]].add(line[0])
+        except:
+            nodeChildren[line[1]] = set([line[0]])
+        nodeRanks.append((line[0], line[2]))
+
+    return dict(nodeRanks), nodeChildren, nodeParents
+
+def getDescendents(taxID, tree):
+    descendents = set([taxID])
+    queue = [taxID]
+    while queue:
+        node = queue.pop()
+
+        children = tree.get(node, set())
+        if children:
+            descendents = descendents.union(children)
+            queue.extend(children)
+        pass
+    return descendents
\ No newline at end of file