comparison query_textgrids.py @ 1:be28ced5c4e0 draft

planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
author stevecassidy
date Tue, 30 Aug 2016 20:54:17 -0400
parents e9c8e6204679
children
comparison
equal deleted inserted replaced
0:e9c8e6204679 1:be28ced5c4e0
1 from __future__ import print_function 1 from __future__ import print_function
2 import json 2 import json
3 import argparse 3 import argparse
4 import pyalveo
5 import sys 4 import sys
6 import os 5 import os
7 import tgt 6 import tgt
8 7
9 API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
10 8
11 def parser(): 9 def parser():
12 parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid") 10 parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid")
13 parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)") 11 parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)")
14 parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)") 12 parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)")
23 tgfiles = args.textgrid.split(',') 21 tgfiles = args.textgrid.split(',')
24 identifiers = args.identifier.split(',') 22 identifiers = args.identifier.split(',')
25 assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers" 23 assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers"
26 24
27 pairs = zip(tgfiles, identifiers) 25 pairs = zip(tgfiles, identifiers)
28 26
29 rows = [] 27 rows = []
30 for tgfile, identifier in pairs: 28 for tgfile, identifier in pairs:
31 tg = tgt.read_textgrid(tgfile) 29 tg = tgt.read_textgrid(tgfile)
32 tier = tg.get_tier_by_name(args.tier) 30 tier = tg.get_tier_by_name(args.tier)
33 matches = tier.get_annotations_with_text(args.regex, regex=True) 31 matches = tier.get_annotations_with_text(args.regex, regex=True)