Mercurial > repos > stevecassidy > textgrid
changeset 0:e9c8e6204679 draft
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
author | stevecassidy |
---|---|
date | Mon, 15 Aug 2016 23:47:30 -0400 |
parents | |
children | be28ced5c4e0 |
files | query_textgrids.py query_textgrids.xml |
diffstat | 2 files changed, 122 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/query_textgrids.py Mon Aug 15 23:47:30 2016 -0400 @@ -0,0 +1,46 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys +import os +import tgt + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + +def parser(): + parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid") + parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)") + parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)") + parser.add_argument('--tier', required=True, action="store", type=str, help="TextGrid Tier to search") + parser.add_argument('--regex', required=True, action="store", type=str, help="Regular expression matching segments") + parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") + return parser.parse_args() + +def main(): + args = parser() + + tgfiles = args.textgrid.split(',') + identifiers = args.identifier.split(',') + assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers" + + pairs = zip(tgfiles, identifiers) + + rows = [] + for tgfile, identifier in pairs: + tg = tgt.read_textgrid(tgfile) + tier = tg.get_tier_by_name(args.tier) + matches = tier.get_annotations_with_text(args.regex, regex=True) + + for m in matches: + rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier)) + + with open(args.output_path, 'w') as out: + out.write("start\tend\tduration\tlabel\tidentifier\n") + for row in rows: + out.write('\t'.join(row) + '\n') + + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/query_textgrids.xml Mon Aug 15 23:47:30 2016 -0400 @@ -0,0 +1,76 @@ +<tool id="query_textgrids" name="Search TextGrid" version="0.01" force_history_refresh="True"> + <description>to find matching segments</description> + + <requirements> + <requirement type="package" version="1.4.2">tgt</requirement> + </requirements> + + <command interpreter="python"> + query_textgrids.py --textgrid "${",".join(map(str, $textgrid))}" --identifier "${",".join(map(str, [t.element_identifier for t in $textgrid]))}" --tier $tier --regex '$regex' --output_path $output + </command> + + <inputs> + <param name="textgrid" type="data" multiple="true" format="TextGrid" label="TextGrid" help="The TextGrid file"/> + <param name="tier" type="text" label="Tier name" help="e.g MAU, ORT"/> + <param name="regex" type="text" label="Regular Expressoion to match segments"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </inputs> + + <outputs> + <data format="tabular" name="output" label="Query Results" /> + </outputs> + + + <tests> + <test> + <param name="tier" value="MAU"/> + <param name="regex" value=".*"/> + <param name="textgrid" value="textgrids/1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/> + <output name="output"> + <assert_contents> + <has_text text="start"/> + <has_text text="3:"/> + <has_text text="0.55"/> + <has_text text="1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/> + </assert_contents> + </output> + </test> + <test> + <param name="tier" value="MAU"/> + <param name="regex" value="(a|e|{|}|6|3|i|U|O|I)"/> + <param name="textgrid" value="textgrids/1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/> + <output name="output"> + <assert_contents> + <has_text text="start"/> + <has_text text="3:"/> + <has_text text="0.81"/> + <has_text text="1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/> + </assert_contents> + </output> + </test> + </tests> + + <help>Search for segments within a TextGrid file. Outputs a table with start, end, duration, + label and filename. Use a regular expression to match segments, eg. to match a or O or I + use (a|O|I), for any SAMPA-AU short vowel try (a|e|{|}|6|3|i|U|O|I) .</help> + + <citations> + <citation type='bibtex'> + @inproceedings{Buschmeir2013, + author = {{Hendrik Buschmeier}, Marcin Wlodarczak}, + booktitle = {Tagungsband der 24. Konferenz zur Elektronischen Sprachsignalverarbeitung (ESSV 2013)}, + pages = {152--157}, + title = {{TextGridTools: A TextGrid Processing and Analysis Toolkit for Python}}, + year = {2013} + } + </citation> + </citations> +</tool>