Mercurial > repos > stevecassidy > alveoimport
changeset 4:3a9f20428cff draft
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
author | stevecassidy |
---|---|
date | Wed, 31 Aug 2016 22:07:27 -0400 |
parents | c0569deac8d9 |
children | e28c0258a09e |
files | __pycache__/pyalveo.cpython-35.pyc alveo_api_key.py alveo_api_key.xml alveo_get_item_data.py alveo_get_item_data.xml alveo_get_primary_text.py alveo_get_primary_text.xml alveo_item_list_downloader.py alveo_item_list_downloader.xml alveo_item_list_importer.py alveo_item_list_importer.xml austalk-select-hVd-words.py austalk-select-hVd-words.xml phonR_tool.R phonR_tool.xml test |
diffstat | 16 files changed, 792 insertions(+), 104 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_api_key.py Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,44 @@ +from __future__ import print_function +import argparse +import pyalveo +import sys + +API_URL = 'https://app.alveo.edu.au' + +def parser(): + parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--output_path', required=True, action="store", type=str, help="File to store the API key in") + return parser.parse_args() + +def write_key(api_key, output_path, client_module=pyalveo): + """Tests whether an API key is valid and writes it to a file. + + :type api_key: String + :param api_key: Alveo API key + + :type output_path: String + :param output_path: Path to the file to store the API key in + + :type client_module: pyalveo.Client + :param client_module: Module providing the client (used for testing purposes), + defaults to pyalveo + + :raises: pyalveo.APIError if the API request is not successful + + """ + client = client_module.Client(api_key, API_URL, use_cache=False) + outfile = open(output_path, 'w') + outfile.write(api_key) + outfile.close() + +def main(): + args = parser() + try: + write_key(args.api_key, args.output_path) + except Exception as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_api_key.xml Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,39 @@ +<tool id="alveo_api_key_storer" name="Store Alveo API Key" version="0.01" force_history_refresh="True"> + <description>for use with Alveo tools</description> + + <requirements> + <requirement type="package" version="0.6">pyalveo</requirement> + </requirements> + + <command interpreter="python"> + alveo_api_key.py --api_key $api_key --output_path $output + </command> + + <inputs> + <param name="api_key" type="text" size="30" label="API Key" help="Your Alveo API key"/> + </inputs> + + <outputs> + <data format="txt" name="output" label="Alveo API key" /> + </outputs> + + <tests> + <test> + <param name="api_key" value="your api key here" /> + <output name="output" file="api-key.dat" compare="contains" /> + </test> + </tests> + + <help>Stores Alveo API keys for use with the Alveo Galaxy tools.</help> + + <citations> + <citation type='bibtex'> + @article{cassidy2014alveo, + title={The alveo virtual laboratory: a web based repository API}, + author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, + year={2014}, + publisher={Reykjavik, Iceland: European Language Resources Association} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_item_data.py Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,101 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys +import os +from fnmatch import fnmatch +import csv + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + +def parser(): + parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") + parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") + parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") + return parser.parse_args() + +def read_item_list(filename, client): + """Read an item list from a file + which should be a tabular formatted file + with one column header ItemURL. + Return an instance of ItemGroup""" + + with open(filename) as fd: + csvreader = csv.DictReader(fd, dialect='excel-tab') + if 'ItemURL' not in csvreader.fieldnames: + return None + itemurls = [] + for row in csvreader: + itemurls.append(row['ItemURL']) + + print("ITEMS:", itemurls) + itemlist = pyalveo.ItemGroup(itemurls, client) + + return itemlist + +# this file name pattern allows galaxy to discover the dataset designation and type +FNPAT = "%(designation)s#%(ext)s" + +def galaxy_name(itemname, fname): + """construct a filename suitable for Galaxy dataset discovery + designation - (dataset identifier) is the file basename + ext - defines the dataset type and is the file extension + """ + + root, ext = os.path.splitext(fname) + ext = ext[1:] # remove initial . + fname = FNPAT % {'designation': itemname, 'ext': ext} + + return fname + +def download_documents(item_list, patterns, output_path): + """ + Downloads a list of documents to the directory specificed by output_path. + + :type documents: list of pyalveo.Document + :param documents: Documents to download + + :type output_path: String + :param output_path: directory to download to the documents to + """ + if not os.path.exists(output_path): + os.makedirs(output_path) + + downloaded = [] + + items = item_list.get_all() + filtered_documents = [] + for item in items: + documents = item.get_documents() + for doc in documents: + for pattern in patterns: + if not pattern == '' and fnmatch(doc.get_filename(), pattern): + fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) + try: + doc.download_content(dir_path=output_path, filename=fname) + downloaded.append(doc.get_filename()) + except: + # maybe it doesn't exist or we have no access + # TODO: report this + pass + return downloaded + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + + client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) + + item_list = read_item_list(args.item_list, client) + patterns = args.patterns.split(',') + downloaded = download_documents(item_list, patterns, args.output_path) + except pyalveo.APIError as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_item_data.xml Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,68 @@ +<tool id="alveo_get_item_data" name="Get Alveo Data for Items" version="0.01" force_history_refresh="True"> + <description>Downloads files from the items in an Galaxy list of items</description> + + <requirements> + <requirement type="package" version="0.6">pyalveo</requirement> + </requirements> + + <command interpreter="python"> + alveo_get_item_data.py --api_key $api_key --item_list $item_list --patterns $patterns,$patternselect --output_path ItemListData + </command> + + <inputs> + <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> + <param name="item_list" type="data" format="tabular" label="Item List (table)" help=""/> + + <param name="patternselect" type="select" multiple="true" label="Predefined imports" display="checkboxes"> + <option value='*'>All Files</option> + <option value='*speaker16.wav'>Austalk 16bit/16kHz Speaker Headset WAV (*speaker16.wav)</option> + <option value='*plain.txt'>Plain text documents (*plain.txt)</option> + <option value='*.txt'>All text documents (*.txt)</option> + <option value=''>Other - enter pattern below</option> + </param> + + <param name="patterns" type="text" label="File patterns to import" + optional="true" + help="One or more file patterns separated by commas eg. *.wav,*.txt"/> + + <param name="job_name" type="text" size="25" + label="Supply a name for the output to remind you what it contains" value="Alveo Data"/> + </inputs> + + <outputs> + <collection type="list" label="$job_name" name="output1"> + <discover_datasets pattern="(?P<designation>[^#]+)#(?P<ext>.+)" directory="ItemListData"/> + </collection> + </outputs> + + <tests> + <test> + <param name="api_key" value="api-key.dat"/> + <param name="item_list" value="hvd-sample-items.dat"/> + <param name="patterns" value="*.TextGrid"/> + <param name="patternselect" value=""/> + <param name="output_path" value="test_out"/> + <output_collection name="output1" type="list" count="5"> + <element name="1_1308_2_22_023"> + <assert_contents> + <has_text_matching expression="xmax = 1.020000"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + + <help>Downloads files from a local list of Alveo items. You can download all files or those matching + a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in + your history.</help> + <citations> + <citation type='bibtex'> + @article{cassidy2014alveo, + title={The alveo virtual laboratory: a web based repository API}, + author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, + year={2014}, + publisher={Reykjavik, Iceland: European Language Resources Association} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_primary_text.py Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,71 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys +import os +from fnmatch import fnmatch + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + +def parser(): + parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") + parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") + return parser.parse_args() + +def get_item_list(api_key, item_list_url): + client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) + return client.get_item_list(item_list_url) + +# this file name pattern allows galaxy to discover the dataset designation and type +FNPAT = "%(designation)s_%(ext)s" + +def galaxy_name(fname, ext): + """construct a filename suitable for Galaxy dataset discovery""" + + fname = FNPAT % {'designation': fname, 'ext': ext} + + return fname +import pprint +def download_documents(item_list, output_path): + """ + Downloads a list of documents to the directory specificed by output_path. + + :type documents: list of pyalveo.Document + :param documents: Documents to download + + :type output_path: String + :param output_path: directory to download to the documents to + """ + if not os.path.exists(output_path): + os.makedirs(output_path) + + downloaded = [] + + items = item_list.get_all() + filtered_documents = [] + for item in items: + md = item.metadata() + fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dc:identifier'], 'txt')) + content = item.get_primary_text() + if not content == None: + with open(fname, 'w') as out: + out.write(content) + + return downloaded + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + item_list = get_item_list(api_key, args.item_list_url) + downloaded = download_documents(item_list, args.output_path) + # write out a list of downloaded files as a result? + except pyalveo.APIError as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_primary_text.xml Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,68 @@ +<tool id="alveo_get_primary_text" name="Get Text from Alveo" version="0.01" force_history_refresh="True"> + <description>Downloads primary text from the items in an Alveo Item List</description> + + <requirements> + <requirement type="package" version="0.6">pyalveo</requirement> + </requirements> + + <command interpreter="python"> + alveo_get_primary_text.py --api_key $api_key --item_list_url $item_list_url --output_path ItemListData + </command> + + <inputs> + <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> + <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/> + + <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import"> + <options from_dataset="import_list"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + + <param name="job_name" type="text" size="25" + label="Supply a name for the outputs to remind you what they contain" value="Item List downloaded from Alveo"/> + </inputs> + + <outputs> + <collection type="list" label="$job_name" name="output1"> + <discover_datasets pattern="(?P<designation>[^_]+)_(?P<ext>.+)" directory="ItemListData"/> + </collection> + </outputs> + + <tests> + <test> + <param name="api_key" value="api-key.dat"/> + <param name="import_list" value="item-lists.dat"/> + <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> + <param name="output_path" value="test_out"/> + <output_collection name="output1" type="list" count="6"> + <element name="GCSAusE02"> + <assert_contents> + <has_text_matching expression="background noises"/> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="api_key" value="api-key.dat"/> + <param name="import_list" value="item-lists.dat"/> + <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/52"/> + <param name="output_path" value="test_out"/> + <output_collection name="output1" type="list" count="0"> + </output_collection> + </test> + </tests> + + <help>Downloads the primary text for each item from an Alveo Item List</help> + <citations> + <citation type='bibtex'> + @article{cassidy2014alveo, + title={The alveo virtual laboratory: a web based repository API}, + author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, + year={2014}, + publisher={Reykjavik, Iceland: European Language Resources Association} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_item_list_downloader.py Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,85 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys +import os +from fnmatch import fnmatch + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + +def parser(): + parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") + parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") + parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") + return parser.parse_args() + +def get_item_list(api_key, item_list_url): + client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) + return client.get_item_list(item_list_url) + +# this file name pattern allows galaxy to discover the dataset designation and type +FNPAT = "%(designation)s_%(ext)s" + + +def galaxy_name(itemname, fname): + """construct a filename suitable for Galaxy dataset discovery + designation - (dataset identifier) is the file basename + ext - defines the dataset type and is the file extension + """ + + root, ext = os.path.splitext(fname) + ext = ext[1:] # remove initial . + fname = FNPAT % {'designation': itemname, 'ext': ext} + + return fname + + +def download_documents(item_list, patterns, output_path): + """ + Downloads a list of documents to the directory specificed by output_path. + + :type documents: list of pyalveo.Document + :param documents: Documents to download + + :type output_path: String + :param output_path: directory to download to the documents to + """ + if not os.path.exists(output_path): + os.makedirs(output_path) + + downloaded = [] + + items = item_list.get_all() + filtered_documents = [] + for item in items: + documents = item.get_documents() + for doc in documents: + for pattern in patterns: + if not pattern == '' and fnmatch(doc.get_filename(), pattern): + fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) + try: + doc.download_content(dir_path=output_path, filename=fname) + downloaded.append(doc.get_filename()) + except: + # maybe it doesn't exist or we have no access + # TODO: report this + pass + return downloaded + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + item_list = get_item_list(api_key, args.item_list_url) + patterns = args.patterns.split(',') + downloaded = download_documents(item_list, patterns, args.output_path) + # write out a list of downloaded files as a result? + except pyalveo.APIError as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_item_list_downloader.xml Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,91 @@ +<tool id="alveo_item_list_downloader" name="Get Files from Alveo" version="0.01" force_history_refresh="True"> + <description>Downloads files from the items in an Alveo Item List</description> + + <requirements> + <requirement type="package" version="0.6">pyalveo</requirement> + </requirements> + + <command interpreter="python"> + alveo_item_list_downloader.py --api_key $api_key --item_list_url $item_list_url --patterns $patterns,$patternselect --output_path ItemListData + </command> + + <inputs> + <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> + <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/> + + <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import"> + <options from_dataset="import_list"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + + <param name="patternselect" type="select" multiple="true" label="Predefined imports" display="checkboxes"> + <option value='*'>All Files</option> + <option value='*speaker16.wav'>Austalk 16bit/16kHz Speaker Headset WAV (*speaker16.wav)</option> + <option value='*plain.txt'>Plain text documents (*plain.txt)</option> + <option value='*.txt'>All text documents (*.txt)</option> + <option value=''>Other - enter pattern below</option> + </param> + + <param name="patterns" type="text" label="File patterns to import" + optional="true" + help="One or more file patterns separated by commas eg. *.wav,*.txt"/> + + <param name="job_name" type="text" size="25" + label="Supply a name for the outputs to remind you what they contain" value="Item List downloaded from Alveo"/> + </inputs> + + <outputs> + <collection type="list" label="$job_name" name="output1"> + <discover_datasets pattern="(?P<designation>[^_]+)_(?P<ext>.+)" directory="ItemListData"/> + </collection> + </outputs> + + <tests> + <test> + <param name="api_key" value="api-key.dat"/> + <param name="import_list" value="item-lists.dat"/> + <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> + <param name="patterns" value=""/> + <param name="patternselect" value="*plain.txt"/> + <param name="output_path" value="test_out"/> + <output_collection name="output1" type="list" count="6"> + <element name="GCSAusE02"> + <assert_contents> + <has_text_matching expression="background noises"/> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="api_key" value="api-key.dat"/> + <param name="import_list" value="item-lists.dat"/> + <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> + <param name="patterns" value="*plain.txt"/> + <param name="patternselect" value=""/> + <param name="output_path" value="test_out"/> + <output_collection name="output1" type="list" count="6"> + <element name="GCSAusE02"> + <assert_contents> + <has_text_matching expression="background noises"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + + <help>Downloads files from an Alveo Item list. You can download all files or those matching + a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in + your history.</help> + <citations> + <citation type='bibtex'> + @article{cassidy2014alveo, + title={The alveo virtual laboratory: a web based repository API}, + author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, + year={2014}, + publisher={Reykjavik, Iceland: European Language Resources Association} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_item_list_importer.py Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,41 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + + +def parser(): + parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--output', required=True, action="store", type=str, help="Path to output file") + return parser.parse_args() + +# TODO: export common function to helper module +def get_item_lists(api_key): + client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) + return client.get_item_lists() + +def write_table(item_lists, filename): + with open(filename, 'w') as outfile: + for list_set in item_lists.values(): + for item_list in list_set: + outfile.write("%s (%d)\t%s\n" % (item_list['name'], item_list['num_items'], item_list['item_list_url'])) + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + item_lists = get_item_lists(api_key) + print(item_lists) + if item_lists: + write_table(item_lists, args.output) + except Exception as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_item_list_importer.xml Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,37 @@ +<tool id="alveo_item_list_importer" name="Get Alveo Item Lists" version="0.01" force_history_refresh="True"> + <description>Retrieves item list metadata.</description> + + <requirements> + <requirement type="package" version="0.6">pyalveo</requirement> + </requirements> + + <command interpreter="python"> + alveo_item_list_importer.py --api_key $api_key --output $item_list + </command> + + + <inputs> + <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> + <param name="job_name" type="text" size="25" + label="Supply a name for the outputs to remind you what they contain" value="Alveo Item Lists"/> + </inputs> + + <outputs> + <data format="tabular" name="item_list" label="${job_name}"/> + </outputs> + + <help>Import Item Lists from Alveo. This imports the lists, but does not download the individual items. + That task is performed by the *Get Files from Alveo* tool. + </help> + + <citations> + <citation type='bibtex'> + @article{cassidy2014alveo, + title={The alveo virtual laboratory: a web based repository API}, + author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, + year={2014}, + publisher={Reykjavik, Iceland: European Language Resources Association} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/austalk-select-hVd-words.py Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,87 @@ +from __future__ import print_function +import argparse +import pyalveo +import sys + +API_URL = 'https://app.alveo.edu.au/' +PREFIXES = """ +PREFIX dc:<http://purl.org/dc/terms/> +PREFIX austalk:<http://ns.austalk.edu.au/> +PREFIX olac:<http://www.language-archives.org/OLAC/1.1/> +PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/> +PREFIX foaf:<http://xmlns.com/foaf/0.1/> +PREFIX dbpedia:<http://dbpedia.org/ontology/> +PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> +PREFIX geo:<http://www.w3.org/2003/01/geo/wgs84_pos#> +PREFIX iso639schema:<http://downlode.org/rdf/iso-639/schema#> +PREFIX austalkid:<http://id.austalk.edu.au/> +PREFIX iso639:<http://downlode.org/rdf/iso-639/languages#> +PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> +PREFIX is: <http://purl.org/ontology/is/core#> +PREFIX iso: <http://purl.org/iso25964/skos-thes#> +PREFIX dada: <http://purl.org/dada/schema/0.2#>""" + +def parser(): + parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier") + parser.add_argument('--words', required=False, default='all', action="store", type=str, help="Word group (all, monopthongs, dipthongs)") + parser.add_argument('--output', required=True, action="store", type=str, help="output file name") + return parser.parse_args() + +def find_hVd_words(api_key, speakerid, output, words='all'): + """Find words in the Austalk corpus + """ + + client = pyalveo.Client(api_key, API_URL, use_cache=False) + + query = PREFIXES + """ +SELECT distinct ?item ?prompt ?compname +WHERE { + ?item a ausnc:AusNCObject . + ?item olac:speaker ?speaker . + ?speaker austalk:id "%s" . + ?item austalk:prompt ?prompt . + ?item austalk:componentName ?compname . + """ % speakerid + + hVdWords = { + 'monopthongs': ['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', "whod"], + 'dipthongs': ['herd', 'howd', 'hoyd', 'haired', 'hard', 'heared'] + } + + if words == 'all': + words = hVdWords['monopthongs'] + hVdWords['dipthongs'] + else: + words = hVdWords[words] + + filterclause = 'FILTER regex(?prompt, "^' + filterclause += '$|^'.join(words) + filterclause += '$", "i")\n' + + query += filterclause + "}" + + result = client.sparql_query('austalk', query) + + items = [] + for b in result['results']['bindings']: + items.append((b['prompt']['value'], b['item']['value'])) + + with open(output, 'w') as out: + out.write("Speaker\tPrompt\tItemURL\n") + for item in items: + out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n") + + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + find_hVd_words(api_key, args.speaker, args.output, args.words) + except Exception as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/austalk-select-hVd-words.xml Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,44 @@ +<tool id="austalk-select-hvd-words" name="Find HVD words in Austalk" version="0.01" force_history_refresh="True"> + <description>for a single speaker</description> + + <requirements> + <requirement type="package" version="0.6">pyalveo</requirement> + </requirements> + + <command interpreter="python"> + austalk-select-hVd-words.py --api_key $api_key --speaker $speaker --words $words --output $output + </command> + + <inputs> + <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> + <param name="speaker" type="text" format="text" label="Speaker ID" help="e.g. 1_123"/> + <param name="words" type="select" multiple="false" label="Word List" display="radioboxes"> + <option value='all'>All hVd words</option> + <option value='monopthongs'>hVd monopthongs</option> + <option value='dipthongs'>hVd dipthongs</option> + </param> + <param name="job_name" type="text" size="25" + label="Supply a name for the output to remind you what it contains" value="Query Results"/> + </inputs> + + <outputs> + <data format="tabular" name="output" label="$job_name" /> + </outputs> + + <tests> + </tests> + + <help>Find items corresponding to the hVd words for this Austalk speaker.</help> + + <citations> + <citation type='bibtex'> + @inproceedings{Buschmeir2013, + author = {{Hendrik Buschmeier}, Marcin Wlodarczak}, + booktitle = {Tagungsband der 24. Konferenz zur Elektronischen Sprachsignalverarbeitung (ESSV 2013)}, + pages = {152--157}, + title = {{TextGridTools: A TextGrid Processing and Analysis Toolkit for Python}}, + year = {2013} + } + </citation> + </citations> +</tool>
--- a/phonR_tool.R Wed Aug 31 22:02:30 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -# -# Galaxy tool that plots Vowels using the phonR package. -# Accepts 8 inputs of the form <tsv data file> <output file name> <column1> <column2> <optPretty> <optEllipse> <optTokens> <optMeans> -# Created by Michael Bauer -# -library(phonR) -library('getopt') - -#create options -option_specification = matrix(c( - 'outdir', 'f', 1, 'character', - 'htmlfile', 'h', 1, 'character', - 'inputfile', 'i', 1, 'character', - 'column1', 'y', 1, 'integer', - 'column2', 'z', 1, 'integer', - 'columnvowels', 'x', 1, 'integer', - 'pretty', 'p', 1, 'logical', - 'ellipse', 'e', 1, 'logical', - 'tokens', 't', 1, 'logical', - 'means', 'm', 1, 'logical', - 'cextokens', 'c', 1, 'numeric', - 'alphatokens', 'a', 1, 'numeric', - 'cexmeans', 'b', 1, 'numeric' -), byrow=TRUE, ncol=4); - -# Parse options -options = getopt(option_specification); - -if (!is.null(options$outdir)) { - # Create the directory - dir.create(options$outdir,FALSE) -} - -pngfile <- gsub("[ ]+", "", paste(options$outdir,"/pngfile.png")) -htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) - -data = read.table(options$inputfile,sep="\t", header=TRUE); - -png(pngfile); - -plotVowels(data[,options$column1], data[,options$column2], data[,options$columnvowels], plot.tokens = options$tokens, - pch.tokens = data[,options$columnvowels], cex.tokens = options$cextokens, alpha.tokens = options$alphatokens, - plot.means = options$means, pch.means = data[,options$columnvowels], cex.means = options$cexmeans, - var.col.by = data[,options$columnvowels], ellipse.line = options$ellipse, pretty = options$pretty) -dev.off(); - -htmlfile_handle <- file(htmlfile) -html_output = c('<html><body>', - '<h3>Result:</h3><img src="pngfile.png"/>', - '</html></body>'); -writeLines(html_output, htmlfile_handle); -close(htmlfile_handle);
--- a/phonR_tool.xml Wed Aug 31 22:02:30 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -<tool id="vowel_plot" name="Plot Vowels" version="0.01" force_history_refresh="True"> - <description>Using phonR to produce a Vowel Plot</description> - - <requirements> - <requirement type="package" version="3.2.1">R</requirement> - <requirement type="package" version="1.0.3">phonR</requirement> - </requirements> - - <command interpreter="Rscript"> - $__tool_directory__/phonR_tool.R --input="${input}" --outdir="$htmlfile.files_path" --htmlfile="$htmlfile" --column1="${f1}" --column2="${f2}" --columnvowels="${vowel}" --pretty="${pretty}" --ellipse="${ellipse}" --tokens="${tokens}" --means="${means}" --cextokens="${cextokens}" --alphatokens="${alphatokens}" --cexmeans="${cexmeans}" - </command> - - <inputs> - <param name="input" type="data" format="tabular" label="Segment List" help=""/> - <param name="f1" type="data_column" data_ref="input" label="Column for f1" force_select="true" use_header_names="true"/> - <param name="f2" type="data_column" data_ref="input" label="Column for f2" force_select="true" use_header_names="true"/> - <param name="vowel" type="data_column" data_ref="input" label="Column with Vowels" force_select="true" use_header_names="true"/> - <param name="pretty" type="boolean" label="Make Pretty" - truevalue="TRUE" falsevalue="FALSE" checked="True" - help="Will apply various beautification techniques." /> - <param name="ellipse" type="boolean" label="Add Ellipses" - truevalue="TRUE" falsevalue="FALSE" checked="True" - help="Will add an ellipse around the location of each vowel cluster." /> - <param name="tokens" type="boolean" label="Add Tokens" - truevalue="TRUE" falsevalue="FALSE" checked="True" - help="Will add tokens to the plot." /> - <param name="means" type="boolean" label="Add Means" - truevalue="TRUE" falsevalue="FALSE" checked="True" - help="Will add means to the plot." /> - <param name="cextokens" type="float" label="cex.tokens" value="1.2" min="0.0" max="10.0" - help="Size of tokens on the plot." /> - <param name="alphatokens" type="float" label="alpha.tokens" value="0.2" min="0.0" max="1.0" - help="The alpha of the tokens on the plot (transparency). NOTE: Must be a value betweek 0 and 1!" /> - <param name="cexmeans" type="float" label="cex.means" value="2" min="0.0" max="10.0" - help="Size of the means on the plot." /> - </inputs> - - <outputs> - <data format="html" name="htmlfile" label="output.html" /> - </outputs> - - - <tests> - <test> - </test> - </tests> - - <help> - Will make a vowel plot from given data. Best used from data directly run from the "Get Formants at segment midpoint" Tool. - </help> - -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test Wed Aug 31 22:07:27 2016 -0400 @@ -0,0 +1,16 @@ +Adelaide (6840) https://app.alveo.edu.au/item_lists/478 +austalk_catepillar (309) https://app.alveo.edu.au/item_lists/64 +different (888) https://app.alveo.edu.au/item_lists/132 +gum-tree (58) https://app.alveo.edu.au/item_lists/84 +M&D_Test_140904 (10) https://app.alveo.edu.au/item_lists/168 +rose (245) https://app.alveo.edu.au/item_lists/82 +thistle (16) https://app.alveo.edu.au/item_lists/83 +ace-specialised (122) https://app.alveo.edu.au/item_lists/178 +austalk-adult-csl-child-match (250) https://app.alveo.edu.au/item_lists/904 +austalk-digits (144) https://app.alveo.edu.au/item_lists/517 +austalk_textgrid_sample (10) https://app.alveo.edu.au/item_lists/484 +COOEE ALL (1354) https://app.alveo.edu.au/item_lists/95 +cooee sample (129) https://app.alveo.edu.au/item_lists/53 +dialogue-all (76) https://app.alveo.edu.au/item_lists/116 +dialogue-sample (6) https://app.alveo.edu.au/item_lists/180 +mdsample (20) https://app.alveo.edu.au/item_lists/52