annotate query_textgrids.py @ 1:be28ced5c4e0 draft

planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
author stevecassidy
date Tue, 30 Aug 2016 20:54:17 -0400
parents e9c8e6204679
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
1 from __future__ import print_function
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
2 import json
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
3 import argparse
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
4 import sys
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
5 import os
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
6 import tgt
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
7
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
8
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
9 def parser():
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
10 parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid")
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
11 parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)")
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
12 parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)")
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
13 parser.add_argument('--tier', required=True, action="store", type=str, help="TextGrid Tier to search")
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
14 parser.add_argument('--regex', required=True, action="store", type=str, help="Regular expression matching segments")
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
15 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
16 return parser.parse_args()
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
17
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
18 def main():
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
19 args = parser()
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
20
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
21 tgfiles = args.textgrid.split(',')
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
22 identifiers = args.identifier.split(',')
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
23 assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers"
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
24
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
25 pairs = zip(tgfiles, identifiers)
1
be28ced5c4e0 planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents: 0
diff changeset
26
0
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
27 rows = []
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
28 for tgfile, identifier in pairs:
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
29 tg = tgt.read_textgrid(tgfile)
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
30 tier = tg.get_tier_by_name(args.tier)
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
31 matches = tier.get_annotations_with_text(args.regex, regex=True)
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
32
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
33 for m in matches:
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
34 rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier))
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
35
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
36 with open(args.output_path, 'w') as out:
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
37 out.write("start\tend\tduration\tlabel\tidentifier\n")
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
38 for row in rows:
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
39 out.write('\t'.join(row) + '\n')
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
40
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
41
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
42
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
43 if __name__ == '__main__':
e9c8e6204679 planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff changeset
44 main()