Mercurial > repos > stevecassidy > textgrid
annotate query_textgrids.py @ 2:a65223e5ca3a draft default tip
planemo upload commit 72cee9103c0ae4acb5794afaed179bea2c729f2c-dirty
author | stevecassidy |
---|---|
date | Sat, 11 Mar 2017 21:36:22 -0500 |
parents | be28ced5c4e0 |
children |
rev | line source |
---|---|
0
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
1 from __future__ import print_function |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
2 import json |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
3 import argparse |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
4 import sys |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
5 import os |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
6 import tgt |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
7 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
8 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
9 def parser(): |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
10 parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid") |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
11 parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)") |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
12 parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)") |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
13 parser.add_argument('--tier', required=True, action="store", type=str, help="TextGrid Tier to search") |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
14 parser.add_argument('--regex', required=True, action="store", type=str, help="Regular expression matching segments") |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
15 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
16 return parser.parse_args() |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
17 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
18 def main(): |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
19 args = parser() |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
20 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
21 tgfiles = args.textgrid.split(',') |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
22 identifiers = args.identifier.split(',') |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
23 assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers" |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
24 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
25 pairs = zip(tgfiles, identifiers) |
1
be28ced5c4e0
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
0
diff
changeset
|
26 |
0
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
27 rows = [] |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
28 for tgfile, identifier in pairs: |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
29 tg = tgt.read_textgrid(tgfile) |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
30 tier = tg.get_tier_by_name(args.tier) |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
31 matches = tier.get_annotations_with_text(args.regex, regex=True) |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
32 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
33 for m in matches: |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
34 rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier)) |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
35 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
36 with open(args.output_path, 'w') as out: |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
37 out.write("start\tend\tduration\tlabel\tidentifier\n") |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
38 for row in rows: |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
39 out.write('\t'.join(row) + '\n') |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
40 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
41 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
42 |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
43 if __name__ == '__main__': |
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
diff
changeset
|
44 main() |