Mercurial > repos > bcclaywell > argo_navis
comparison bin/extract_nexus_translations.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import csv | |
5 import re | |
6 | |
7 | |
8 begin_trees_re = re.compile('^Begin trees\;') | |
9 trans_token_re = re.compile('^\s*Translate') | |
10 trans_line_re = re.compile('^\s*(\d+)\s*([^\s\,]+)\,') | |
11 end_trans_re = re.compile("^\;$") | |
12 | |
13 | |
14 def translation_line_extraction_reducer(lines_and_state, next_line): | |
15 "The reducer guts behind the extract_translation_lines state machine" | |
16 lines, state = lines_and_state | |
17 if state == "init": | |
18 if begin_trees_re.match(next_line): | |
19 return (lines, "begin_trees") | |
20 else: | |
21 return lines_and_state | |
22 elif state == "begin_trees": | |
23 if trans_token_re.match(next_line): | |
24 return (lines, "in_trans") | |
25 else: | |
26 raise Exception, "Translation line should immediately procede Begin Trees" | |
27 elif state == "in_trans": | |
28 m = trans_line_re.match(next_line) | |
29 if m: | |
30 return (lines + [m.groups()], "in_trans") | |
31 else: | |
32 return (lines, "end_of_trans") | |
33 elif state == "end_of_trans": | |
34 if end_trans_re.match(next_line): | |
35 return (lines, "finished") | |
36 else: | |
37 raise Exception, "Next line should have been an end of trans line" | |
38 elif state == "finished": | |
39 return lines_and_state | |
40 else: | |
41 raise Exception, "Unknown state: %s" % state | |
42 | |
43 | |
44 def extract_translation_lines(line_reader): | |
45 """Basically a little state machine that marches through the nexus file, figures out when we're in the | |
46 translation section, extracts those lines as 2-tuples of (int, seqname).""" | |
47 lines, _ = reduce(translation_line_extraction_reducer, line_reader, ([], "init")) | |
48 return lines | |
49 | |
50 | |
51 def get_args(): | |
52 parser = argparse.ArgumentParser() | |
53 parser.add_argument('in_trees', type=argparse.FileType('r')) | |
54 parser.add_argument('out_translation', type=argparse.FileType('w')) | |
55 return parser.parse_args() | |
56 | |
57 | |
58 def main(args): | |
59 writer = csv.writer(args.out_translation) | |
60 writer.writerow(['id', 'sequence']) | |
61 writer.writerows(extract_translation_lines(args.in_trees)) | |
62 args.in_trees.close() | |
63 args.out_translation.close() | |
64 | |
65 | |
66 if __name__ == '__main__': | |
67 main(get_args()) | |
68 | |
69 |