changeset 0:e9c8e6204679 draft

planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
author stevecassidy
date Mon, 15 Aug 2016 23:47:30 -0400
parents
children be28ced5c4e0
files query_textgrids.py query_textgrids.xml
diffstat 2 files changed, 122 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/query_textgrids.py	Mon Aug 15 23:47:30 2016 -0400
@@ -0,0 +1,46 @@
+from __future__ import print_function
+import json
+import argparse
+import pyalveo
+import sys
+import os
+import tgt
+
+API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
+
+def parser():
+    parser = argparse.ArgumentParser(description="Find matching segments in a TextGrid")
+    parser.add_argument('--textgrid', required=True, action="store", type=str, help="TextGrid files (comma separated)")
+    parser.add_argument('--identifier', required=True, action="store", type=str, help="Dataset identifiers (comma separated)")
+    parser.add_argument('--tier', required=True, action="store", type=str, help="TextGrid Tier to search")
+    parser.add_argument('--regex', required=True, action="store", type=str, help="Regular expression matching segments")
+    parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
+    return parser.parse_args()
+
+def main():
+    args = parser()
+
+    tgfiles = args.textgrid.split(',')
+    identifiers = args.identifier.split(',')
+    assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers"
+
+    pairs = zip(tgfiles, identifiers)
+    
+    rows = []
+    for tgfile, identifier in pairs:
+        tg = tgt.read_textgrid(tgfile)
+        tier = tg.get_tier_by_name(args.tier)
+        matches = tier.get_annotations_with_text(args.regex, regex=True)
+
+        for m in matches:
+            rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier))
+
+    with open(args.output_path, 'w') as out:
+        out.write("start\tend\tduration\tlabel\tidentifier\n")
+        for row in rows:
+            out.write('\t'.join(row) + '\n')
+
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/query_textgrids.xml	Mon Aug 15 23:47:30 2016 -0400
@@ -0,0 +1,76 @@
+<tool id="query_textgrids" name="Search TextGrid" version="0.01" force_history_refresh="True">
+    <description>to find matching segments</description>
+
+    <requirements>
+        <requirement type="package" version="1.4.2">tgt</requirement>
+    </requirements>
+
+    <command interpreter="python">
+        query_textgrids.py --textgrid "${",".join(map(str, $textgrid))}" --identifier "${",".join(map(str, [t.element_identifier for t in $textgrid]))}" --tier $tier --regex '$regex' --output_path $output
+    </command>
+
+    <inputs>
+        <param name="textgrid" type="data" multiple="true" format="TextGrid" label="TextGrid" help="The TextGrid file"/>
+        <param name="tier" type="text" label="Tier name" help="e.g MAU, ORT"/>
+        <param name="regex" type="text" label="Regular Expressoion to match segments">
+            <sanitizer>
+              <valid initial="string.printable">
+               <remove value="&apos;"/>
+              </valid>
+              <mapping initial="none">
+                <add source="&apos;" target="__sq__"/>
+              </mapping>
+            </sanitizer>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="output" label="Query Results" />
+    </outputs>
+
+
+    <tests>
+        <test>
+            <param name="tier" value="MAU"/>
+            <param name="regex" value=".*"/>
+            <param name="textgrid" value="textgrids/1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="start"/>
+                    <has_text text="3:"/>
+                    <has_text text="0.55"/>
+                    <has_text text="1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="tier" value="MAU"/>
+            <param name="regex" value="(a|e|{|}|6|3|i|U|O|I)"/>
+            <param name="textgrid" value="textgrids/1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="start"/>
+                    <has_text text="3:"/>
+                    <has_text text="0.81"/>
+                    <has_text text="1_1308_2_22_020-ch6-speaker16.TextGrid_TextGrid"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help>Search for segments within a TextGrid file. Outputs a table with start, end, duration,
+    label and filename.  Use a regular expression to match segments, eg. to match a or O or I
+    use (a|O|I), for any SAMPA-AU short vowel try (a|e|{|}|6|3|i|U|O|I) .</help>
+
+    <citations>
+        <citation type='bibtex'>
+            @inproceedings{Buschmeir2013,
+            author = {{Hendrik Buschmeier}, Marcin Wlodarczak},
+            booktitle = {Tagungsband der 24. Konferenz zur Elektronischen Sprachsignalverarbeitung (ESSV 2013)},
+            pages = {152--157},
+            title = {{TextGridTools: A TextGrid Processing and Analysis Toolkit for Python}},
+            year = {2013}
+            }
+        </citation>
+    </citations>
+</tool>