comparison query_textgrids.py @ 0:e9c8e6204679 draft

planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
author stevecassidy
date Mon, 15 Aug 2016 23:47:30 -0400
parents
children be28ced5c4e0
comparison
equal deleted inserted replaced
-1:000000000000 0:e9c8e6204679
1 from __future__ import print_function
2 import json
3 import argparse
4 import pyalveo
5 import sys
6 import os
7 import tgt
8
9 API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
10
11 def parser():
12 parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid")
13 parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)")
14 parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)")
15 parser.add_argument('--tier', required=True, action="store", type=str, help="TextGrid Tier to search")
16 parser.add_argument('--regex', required=True, action="store", type=str, help="Regular expression matching segments")
17 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
18 return parser.parse_args()
19
20 def main():
21 args = parser()
22
23 tgfiles = args.textgrid.split(',')
24 identifiers = args.identifier.split(',')
25 assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers"
26
27 pairs = zip(tgfiles, identifiers)
28
29 rows = []
30 for tgfile, identifier in pairs:
31 tg = tgt.read_textgrid(tgfile)
32 tier = tg.get_tier_by_name(args.tier)
33 matches = tier.get_annotations_with_text(args.regex, regex=True)
34
35 for m in matches:
36 rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier))
37
38 with open(args.output_path, 'w') as out:
39 out.write("start\tend\tduration\tlabel\tidentifier\n")
40 for row in rows:
41 out.write('\t'.join(row) + '\n')
42
43
44
45 if __name__ == '__main__':
46 main()