Mercurial > repos > stevecassidy > textgrid
comparison query_textgrids.py @ 0:e9c8e6204679 draft
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
| author | stevecassidy |
|---|---|
| date | Mon, 15 Aug 2016 23:47:30 -0400 |
| parents | |
| children | be28ced5c4e0 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e9c8e6204679 |
|---|---|
| 1 from __future__ import print_function | |
| 2 import json | |
| 3 import argparse | |
| 4 import pyalveo | |
| 5 import sys | |
| 6 import os | |
| 7 import tgt | |
| 8 | |
| 9 API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module | |
| 10 | |
| 11 def parser(): | |
| 12 parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid") | |
| 13 parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)") | |
| 14 parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)") | |
| 15 parser.add_argument('--tier', required=True, action="store", type=str, help="TextGrid Tier to search") | |
| 16 parser.add_argument('--regex', required=True, action="store", type=str, help="Regular expression matching segments") | |
| 17 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") | |
| 18 return parser.parse_args() | |
| 19 | |
| 20 def main(): | |
| 21 args = parser() | |
| 22 | |
| 23 tgfiles = args.textgrid.split(',') | |
| 24 identifiers = args.identifier.split(',') | |
| 25 assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers" | |
| 26 | |
| 27 pairs = zip(tgfiles, identifiers) | |
| 28 | |
| 29 rows = [] | |
| 30 for tgfile, identifier in pairs: | |
| 31 tg = tgt.read_textgrid(tgfile) | |
| 32 tier = tg.get_tier_by_name(args.tier) | |
| 33 matches = tier.get_annotations_with_text(args.regex, regex=True) | |
| 34 | |
| 35 for m in matches: | |
| 36 rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier)) | |
| 37 | |
| 38 with open(args.output_path, 'w') as out: | |
| 39 out.write("start\tend\tduration\tlabel\tidentifier\n") | |
| 40 for row in rows: | |
| 41 out.write('\t'.join(row) + '\n') | |
| 42 | |
| 43 | |
| 44 | |
| 45 if __name__ == '__main__': | |
| 46 main() |
