Mercurial > repos > cschu > kraken_tools
diff kraken_visualize.py @ 2:df4163858937 draft
Uploaded
author | cschu |
---|---|
date | Mon, 18 May 2015 15:35:48 -0400 |
parents | 0916697409ea |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kraken_visualize.py Mon May 18 15:35:48 2015 -0400 @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +import sys + +def readTaxonomyNames(names_dmp): + nodeNames = [] + for line in open(names_dmp): + line = line.strip().strip('|').strip() + if not line: break + line = line.split('\t|\t') + if line[3].strip() == 'scientific name': + nodeNames.append((int(line[0].strip()), line[1].strip())) + pass + return dict(nodeNames) + +def readTaxonomyNodes(nodes_dmp): + nodeRanks = [] + nodeChildren = {} + nodeParents = {} + for line in open(nodes_dmp): + line = line.strip().strip('|').strip() + if not line: break + line = map(lambda x:x.strip(), line.split('\t|\t')) + line[:2] = map(int, line[:2]) + if line[0] == 1: + line[1] = 1 + + nodeParents[line[0]] = line[1] + try: + nodeChildren[line[1]].add(line[0]) + except: + nodeChildren[line[1]] = set([line[0]]) + nodeRanks.append((line[0], line[2])) + + return dict(nodeRanks), nodeChildren, nodeParents + +def getDescendents(taxID, tree): + descendents = set([taxID]) + queue = [taxID] + while queue: + node = queue.pop() + + children = tree.get(node, set()) + if children: + descendents = descendents.union(children) + queue.extend(children) + pass + return descendents \ No newline at end of file