Mercurial > repos > stevecassidy > textgrid
annotate query_textgrids.py @ 1:be28ced5c4e0 draft
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
| author | stevecassidy | 
|---|---|
| date | Tue, 30 Aug 2016 20:54:17 -0400 | 
| parents | e9c8e6204679 | 
| children | 
| rev | line source | 
|---|---|
| 0 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 1 from __future__ import print_function | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 2 import json | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 3 import argparse | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 4 import sys | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 5 import os | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 6 import tgt | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 7 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 8 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 9 def parser(): | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 10 parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid") | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 11 parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)") | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 12 parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)") | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 13 parser.add_argument('--tier', required=True, action="store", type=str, help="TextGrid Tier to search") | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 14 parser.add_argument('--regex', required=True, action="store", type=str, help="Regular expression matching segments") | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 15 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 16 return parser.parse_args() | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 17 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 18 def main(): | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 19 args = parser() | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 20 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 21 tgfiles = args.textgrid.split(',') | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 22 identifiers = args.identifier.split(',') | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 23 assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers" | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 24 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 25 pairs = zip(tgfiles, identifiers) | 
| 1 
be28ced5c4e0
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: 
0diff
changeset | 26 | 
| 0 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 27 rows = [] | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 28 for tgfile, identifier in pairs: | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 29 tg = tgt.read_textgrid(tgfile) | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 30 tier = tg.get_tier_by_name(args.tier) | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 31 matches = tier.get_annotations_with_text(args.regex, regex=True) | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 32 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 33 for m in matches: | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 34 rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier)) | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 35 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 36 with open(args.output_path, 'w') as out: | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 37 out.write("start\tend\tduration\tlabel\tidentifier\n") | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 38 for row in rows: | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 39 out.write('\t'.join(row) + '\n') | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 40 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 41 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 42 | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 43 if __name__ == '__main__': | 
| 
e9c8e6204679
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
 stevecassidy parents: diff
changeset | 44 main() | 
