Mercurial > repos > stevecassidy > alveoimport
changeset 3:c0569deac8d9 draft
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
author | stevecassidy |
---|---|
date | Wed, 31 Aug 2016 22:02:30 -0400 |
parents | 7b6021997b8e |
children | 3a9f20428cff |
files | __pycache__/pyalveo.cpython-35.pyc alveo_api_key.py alveo_api_key.xml alveo_get_item_data.py alveo_get_item_data.xml alveo_get_primary_text.py alveo_get_primary_text.xml alveo_item_list_downloader.py alveo_item_list_downloader.xml alveo_item_list_importer.py alveo_item_list_importer.xml austalk-select-hVd-words.py austalk-select-hVd-words.xml phonR_tool.R phonR_tool.xml test |
diffstat | 16 files changed, 104 insertions(+), 792 deletions(-) [+] |
line wrap: on
line diff
--- a/alveo_api_key.py Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -from __future__ import print_function -import argparse -import pyalveo -import sys - -API_URL = 'https://app.alveo.edu.au' - -def parser(): - parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--output_path', required=True, action="store", type=str, help="File to store the API key in") - return parser.parse_args() - -def write_key(api_key, output_path, client_module=pyalveo): - """Tests whether an API key is valid and writes it to a file. - - :type api_key: String - :param api_key: Alveo API key - - :type output_path: String - :param output_path: Path to the file to store the API key in - - :type client_module: pyalveo.Client - :param client_module: Module providing the client (used for testing purposes), - defaults to pyalveo - - :raises: pyalveo.APIError if the API request is not successful - - """ - client = client_module.Client(api_key, API_URL, use_cache=False) - outfile = open(output_path, 'w') - outfile.write(api_key) - outfile.close() - -def main(): - args = parser() - try: - write_key(args.api_key, args.output_path) - except Exception as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - -if __name__ == '__main__': - main()
--- a/alveo_api_key.xml Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -<tool id="alveo_api_key_storer" name="Store Alveo API Key" version="0.01" force_history_refresh="True"> - <description>for use with Alveo tools</description> - - <requirements> - <requirement type="package" version="0.6">pyalveo</requirement> - </requirements> - - <command interpreter="python"> - alveo_api_key.py --api_key $api_key --output_path $output - </command> - - <inputs> - <param name="api_key" type="text" size="30" label="API Key" help="Your Alveo API key"/> - </inputs> - - <outputs> - <data format="txt" name="output" label="Alveo API key" /> - </outputs> - - <tests> - <test> - <param name="api_key" value="your api key here" /> - <output name="output" file="api-key.dat" compare="contains" /> - </test> - </tests> - - <help>Stores Alveo API keys for use with the Alveo Galaxy tools.</help> - - <citations> - <citation type='bibtex'> - @article{cassidy2014alveo, - title={The alveo virtual laboratory: a web based repository API}, - author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, - year={2014}, - publisher={Reykjavik, Iceland: European Language Resources Association} - } - </citation> - </citations> -</tool>
--- a/alveo_get_item_data.py Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -from __future__ import print_function -import json -import argparse -import pyalveo -import sys -import os -from fnmatch import fnmatch -import csv - -API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module - -def parser(): - parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") - parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") - parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") - return parser.parse_args() - -def read_item_list(filename, client): - """Read an item list from a file - which should be a tabular formatted file - with one column header ItemURL. - Return an instance of ItemGroup""" - - with open(filename) as fd: - csvreader = csv.DictReader(fd, dialect='excel-tab') - if 'ItemURL' not in csvreader.fieldnames: - return None - itemurls = [] - for row in csvreader: - itemurls.append(row['ItemURL']) - - print("ITEMS:", itemurls) - itemlist = pyalveo.ItemGroup(itemurls, client) - - return itemlist - -# this file name pattern allows galaxy to discover the dataset designation and type -FNPAT = "%(designation)s#%(ext)s" - -def galaxy_name(itemname, fname): - """construct a filename suitable for Galaxy dataset discovery - designation - (dataset identifier) is the file basename - ext - defines the dataset type and is the file extension - """ - - root, ext = os.path.splitext(fname) - ext = ext[1:] # remove initial . - fname = FNPAT % {'designation': itemname, 'ext': ext} - - return fname - -def download_documents(item_list, patterns, output_path): - """ - Downloads a list of documents to the directory specificed by output_path. - - :type documents: list of pyalveo.Document - :param documents: Documents to download - - :type output_path: String - :param output_path: directory to download to the documents to - """ - if not os.path.exists(output_path): - os.makedirs(output_path) - - downloaded = [] - - items = item_list.get_all() - filtered_documents = [] - for item in items: - documents = item.get_documents() - for doc in documents: - for pattern in patterns: - if not pattern == '' and fnmatch(doc.get_filename(), pattern): - fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) - try: - doc.download_content(dir_path=output_path, filename=fname) - downloaded.append(doc.get_filename()) - except: - # maybe it doesn't exist or we have no access - # TODO: report this - pass - return downloaded - -def main(): - args = parser() - try: - api_key = open(args.api_key, 'r').read().strip() - - client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) - - item_list = read_item_list(args.item_list, client) - patterns = args.patterns.split(',') - downloaded = download_documents(item_list, patterns, args.output_path) - except pyalveo.APIError as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - -if __name__ == '__main__': - main()
--- a/alveo_get_item_data.xml Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -<tool id="alveo_get_item_data" name="Get Alveo Data for Items" version="0.01" force_history_refresh="True"> - <description>Downloads files from the items in an Galaxy list of items</description> - - <requirements> - <requirement type="package" version="0.6">pyalveo</requirement> - </requirements> - - <command interpreter="python"> - alveo_get_item_data.py --api_key $api_key --item_list $item_list --patterns $patterns,$patternselect --output_path ItemListData - </command> - - <inputs> - <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> - <param name="item_list" type="data" format="tabular" label="Item List (table)" help=""/> - - <param name="patternselect" type="select" multiple="true" label="Predefined imports" display="checkboxes"> - <option value='*'>All Files</option> - <option value='*speaker16.wav'>Austalk 16bit/16kHz Speaker Headset WAV (*speaker16.wav)</option> - <option value='*plain.txt'>Plain text documents (*plain.txt)</option> - <option value='*.txt'>All text documents (*.txt)</option> - <option value=''>Other - enter pattern below</option> - </param> - - <param name="patterns" type="text" label="File patterns to import" - optional="true" - help="One or more file patterns separated by commas eg. *.wav,*.txt"/> - - <param name="job_name" type="text" size="25" - label="Supply a name for the output to remind you what it contains" value="Alveo Data"/> - </inputs> - - <outputs> - <collection type="list" label="$job_name" name="output1"> - <discover_datasets pattern="(?P<designation>[^#]+)#(?P<ext>.+)" directory="ItemListData"/> - </collection> - </outputs> - - <tests> - <test> - <param name="api_key" value="api-key.dat"/> - <param name="item_list" value="hvd-sample-items.dat"/> - <param name="patterns" value="*.TextGrid"/> - <param name="patternselect" value=""/> - <param name="output_path" value="test_out"/> - <output_collection name="output1" type="list" count="5"> - <element name="1_1308_2_22_023"> - <assert_contents> - <has_text_matching expression="xmax = 1.020000"/> - </assert_contents> - </element> - </output_collection> - </test> - </tests> - - <help>Downloads files from a local list of Alveo items. You can download all files or those matching - a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in - your history.</help> - <citations> - <citation type='bibtex'> - @article{cassidy2014alveo, - title={The alveo virtual laboratory: a web based repository API}, - author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, - year={2014}, - publisher={Reykjavik, Iceland: European Language Resources Association} - } - </citation> - </citations> -</tool>
--- a/alveo_get_primary_text.py Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -from __future__ import print_function -import json -import argparse -import pyalveo -import sys -import os -from fnmatch import fnmatch - -API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module - -def parser(): - parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") - parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") - return parser.parse_args() - -def get_item_list(api_key, item_list_url): - client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) - return client.get_item_list(item_list_url) - -# this file name pattern allows galaxy to discover the dataset designation and type -FNPAT = "%(designation)s_%(ext)s" - -def galaxy_name(fname, ext): - """construct a filename suitable for Galaxy dataset discovery""" - - fname = FNPAT % {'designation': fname, 'ext': ext} - - return fname -import pprint -def download_documents(item_list, output_path): - """ - Downloads a list of documents to the directory specificed by output_path. - - :type documents: list of pyalveo.Document - :param documents: Documents to download - - :type output_path: String - :param output_path: directory to download to the documents to - """ - if not os.path.exists(output_path): - os.makedirs(output_path) - - downloaded = [] - - items = item_list.get_all() - filtered_documents = [] - for item in items: - md = item.metadata() - fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dc:identifier'], 'txt')) - content = item.get_primary_text() - if not content == None: - with open(fname, 'w') as out: - out.write(content) - - return downloaded - -def main(): - args = parser() - try: - api_key = open(args.api_key, 'r').read().strip() - item_list = get_item_list(api_key, args.item_list_url) - downloaded = download_documents(item_list, args.output_path) - # write out a list of downloaded files as a result? - except pyalveo.APIError as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - -if __name__ == '__main__': - main()
--- a/alveo_get_primary_text.xml Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -<tool id="alveo_get_primary_text" name="Get Text from Alveo" version="0.01" force_history_refresh="True"> - <description>Downloads primary text from the items in an Alveo Item List</description> - - <requirements> - <requirement type="package" version="0.6">pyalveo</requirement> - </requirements> - - <command interpreter="python"> - alveo_get_primary_text.py --api_key $api_key --item_list_url $item_list_url --output_path ItemListData - </command> - - <inputs> - <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> - <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/> - - <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import"> - <options from_dataset="import_list"> - <column name="name" index="0"/> - <column name="value" index="1"/> - </options> - </param> - - <param name="job_name" type="text" size="25" - label="Supply a name for the outputs to remind you what they contain" value="Item List downloaded from Alveo"/> - </inputs> - - <outputs> - <collection type="list" label="$job_name" name="output1"> - <discover_datasets pattern="(?P<designation>[^_]+)_(?P<ext>.+)" directory="ItemListData"/> - </collection> - </outputs> - - <tests> - <test> - <param name="api_key" value="api-key.dat"/> - <param name="import_list" value="item-lists.dat"/> - <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> - <param name="output_path" value="test_out"/> - <output_collection name="output1" type="list" count="6"> - <element name="GCSAusE02"> - <assert_contents> - <has_text_matching expression="background noises"/> - </assert_contents> - </element> - </output_collection> - </test> - <test> - <param name="api_key" value="api-key.dat"/> - <param name="import_list" value="item-lists.dat"/> - <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/52"/> - <param name="output_path" value="test_out"/> - <output_collection name="output1" type="list" count="0"> - </output_collection> - </test> - </tests> - - <help>Downloads the primary text for each item from an Alveo Item List</help> - <citations> - <citation type='bibtex'> - @article{cassidy2014alveo, - title={The alveo virtual laboratory: a web based repository API}, - author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, - year={2014}, - publisher={Reykjavik, Iceland: European Language Resources Association} - } - </citation> - </citations> -</tool>
--- a/alveo_item_list_downloader.py Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -from __future__ import print_function -import json -import argparse -import pyalveo -import sys -import os -from fnmatch import fnmatch - -API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module - -def parser(): - parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") - parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") - parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") - return parser.parse_args() - -def get_item_list(api_key, item_list_url): - client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) - return client.get_item_list(item_list_url) - -# this file name pattern allows galaxy to discover the dataset designation and type -FNPAT = "%(designation)s_%(ext)s" - - -def galaxy_name(itemname, fname): - """construct a filename suitable for Galaxy dataset discovery - designation - (dataset identifier) is the file basename - ext - defines the dataset type and is the file extension - """ - - root, ext = os.path.splitext(fname) - ext = ext[1:] # remove initial . - fname = FNPAT % {'designation': itemname, 'ext': ext} - - return fname - - -def download_documents(item_list, patterns, output_path): - """ - Downloads a list of documents to the directory specificed by output_path. - - :type documents: list of pyalveo.Document - :param documents: Documents to download - - :type output_path: String - :param output_path: directory to download to the documents to - """ - if not os.path.exists(output_path): - os.makedirs(output_path) - - downloaded = [] - - items = item_list.get_all() - filtered_documents = [] - for item in items: - documents = item.get_documents() - for doc in documents: - for pattern in patterns: - if not pattern == '' and fnmatch(doc.get_filename(), pattern): - fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) - try: - doc.download_content(dir_path=output_path, filename=fname) - downloaded.append(doc.get_filename()) - except: - # maybe it doesn't exist or we have no access - # TODO: report this - pass - return downloaded - -def main(): - args = parser() - try: - api_key = open(args.api_key, 'r').read().strip() - item_list = get_item_list(api_key, args.item_list_url) - patterns = args.patterns.split(',') - downloaded = download_documents(item_list, patterns, args.output_path) - # write out a list of downloaded files as a result? - except pyalveo.APIError as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - -if __name__ == '__main__': - main()
--- a/alveo_item_list_downloader.xml Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ -<tool id="alveo_item_list_downloader" name="Get Files from Alveo" version="0.01" force_history_refresh="True"> - <description>Downloads files from the items in an Alveo Item List</description> - - <requirements> - <requirement type="package" version="0.6">pyalveo</requirement> - </requirements> - - <command interpreter="python"> - alveo_item_list_downloader.py --api_key $api_key --item_list_url $item_list_url --patterns $patterns,$patternselect --output_path ItemListData - </command> - - <inputs> - <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> - <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/> - - <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import"> - <options from_dataset="import_list"> - <column name="name" index="0"/> - <column name="value" index="1"/> - </options> - </param> - - <param name="patternselect" type="select" multiple="true" label="Predefined imports" display="checkboxes"> - <option value='*'>All Files</option> - <option value='*speaker16.wav'>Austalk 16bit/16kHz Speaker Headset WAV (*speaker16.wav)</option> - <option value='*plain.txt'>Plain text documents (*plain.txt)</option> - <option value='*.txt'>All text documents (*.txt)</option> - <option value=''>Other - enter pattern below</option> - </param> - - <param name="patterns" type="text" label="File patterns to import" - optional="true" - help="One or more file patterns separated by commas eg. *.wav,*.txt"/> - - <param name="job_name" type="text" size="25" - label="Supply a name for the outputs to remind you what they contain" value="Item List downloaded from Alveo"/> - </inputs> - - <outputs> - <collection type="list" label="$job_name" name="output1"> - <discover_datasets pattern="(?P<designation>[^_]+)_(?P<ext>.+)" directory="ItemListData"/> - </collection> - </outputs> - - <tests> - <test> - <param name="api_key" value="api-key.dat"/> - <param name="import_list" value="item-lists.dat"/> - <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> - <param name="patterns" value=""/> - <param name="patternselect" value="*plain.txt"/> - <param name="output_path" value="test_out"/> - <output_collection name="output1" type="list" count="6"> - <element name="GCSAusE02"> - <assert_contents> - <has_text_matching expression="background noises"/> - </assert_contents> - </element> - </output_collection> - </test> - <test> - <param name="api_key" value="api-key.dat"/> - <param name="import_list" value="item-lists.dat"/> - <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> - <param name="patterns" value="*plain.txt"/> - <param name="patternselect" value=""/> - <param name="output_path" value="test_out"/> - <output_collection name="output1" type="list" count="6"> - <element name="GCSAusE02"> - <assert_contents> - <has_text_matching expression="background noises"/> - </assert_contents> - </element> - </output_collection> - </test> - </tests> - - <help>Downloads files from an Alveo Item list. You can download all files or those matching - a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in - your history.</help> - <citations> - <citation type='bibtex'> - @article{cassidy2014alveo, - title={The alveo virtual laboratory: a web based repository API}, - author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, - year={2014}, - publisher={Reykjavik, Iceland: European Language Resources Association} - } - </citation> - </citations> -</tool>
--- a/alveo_item_list_importer.py Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -from __future__ import print_function -import json -import argparse -import pyalveo -import sys - -API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module - - -def parser(): - parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--output', required=True, action="store", type=str, help="Path to output file") - return parser.parse_args() - -# TODO: export common function to helper module -def get_item_lists(api_key): - client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) - return client.get_item_lists() - -def write_table(item_lists, filename): - with open(filename, 'w') as outfile: - for list_set in item_lists.values(): - for item_list in list_set: - outfile.write("%s (%d)\t%s\n" % (item_list['name'], item_list['num_items'], item_list['item_list_url'])) - -def main(): - args = parser() - try: - api_key = open(args.api_key, 'r').read().strip() - item_lists = get_item_lists(api_key) - print(item_lists) - if item_lists: - write_table(item_lists, args.output) - except Exception as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - - -if __name__ == '__main__': - main()
--- a/alveo_item_list_importer.xml Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -<tool id="alveo_item_list_importer" name="Get Alveo Item Lists" version="0.01" force_history_refresh="True"> - <description>Retrieves item list metadata.</description> - - <requirements> - <requirement type="package" version="0.6">pyalveo</requirement> - </requirements> - - <command interpreter="python"> - alveo_item_list_importer.py --api_key $api_key --output $item_list - </command> - - - <inputs> - <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> - <param name="job_name" type="text" size="25" - label="Supply a name for the outputs to remind you what they contain" value="Alveo Item Lists"/> - </inputs> - - <outputs> - <data format="tabular" name="item_list" label="${job_name}"/> - </outputs> - - <help>Import Item Lists from Alveo. This imports the lists, but does not download the individual items. - That task is performed by the *Get Files from Alveo* tool. - </help> - - <citations> - <citation type='bibtex'> - @article{cassidy2014alveo, - title={The alveo virtual laboratory: a web based repository API}, - author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, - year={2014}, - publisher={Reykjavik, Iceland: European Language Resources Association} - } - </citation> - </citations> -</tool>
--- a/austalk-select-hVd-words.py Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -from __future__ import print_function -import argparse -import pyalveo -import sys - -API_URL = 'https://app.alveo.edu.au/' -PREFIXES = """ -PREFIX dc:<http://purl.org/dc/terms/> -PREFIX austalk:<http://ns.austalk.edu.au/> -PREFIX olac:<http://www.language-archives.org/OLAC/1.1/> -PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/> -PREFIX foaf:<http://xmlns.com/foaf/0.1/> -PREFIX dbpedia:<http://dbpedia.org/ontology/> -PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> -PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> -PREFIX geo:<http://www.w3.org/2003/01/geo/wgs84_pos#> -PREFIX iso639schema:<http://downlode.org/rdf/iso-639/schema#> -PREFIX austalkid:<http://id.austalk.edu.au/> -PREFIX iso639:<http://downlode.org/rdf/iso-639/languages#> -PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> -PREFIX is: <http://purl.org/ontology/is/core#> -PREFIX iso: <http://purl.org/iso25964/skos-thes#> -PREFIX dada: <http://purl.org/dada/schema/0.2#>""" - -def parser(): - parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier") - parser.add_argument('--words', required=False, default='all', action="store", type=str, help="Word group (all, monopthongs, dipthongs)") - parser.add_argument('--output', required=True, action="store", type=str, help="output file name") - return parser.parse_args() - -def find_hVd_words(api_key, speakerid, output, words='all'): - """Find words in the Austalk corpus - """ - - client = pyalveo.Client(api_key, API_URL, use_cache=False) - - query = PREFIXES + """ -SELECT distinct ?item ?prompt ?compname -WHERE { - ?item a ausnc:AusNCObject . - ?item olac:speaker ?speaker . - ?speaker austalk:id "%s" . - ?item austalk:prompt ?prompt . - ?item austalk:componentName ?compname . - """ % speakerid - - hVdWords = { - 'monopthongs': ['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', "whod"], - 'dipthongs': ['herd', 'howd', 'hoyd', 'haired', 'hard', 'heared'] - } - - if words == 'all': - words = hVdWords['monopthongs'] + hVdWords['dipthongs'] - else: - words = hVdWords[words] - - filterclause = 'FILTER regex(?prompt, "^' - filterclause += '$|^'.join(words) - filterclause += '$", "i")\n' - - query += filterclause + "}" - - result = client.sparql_query('austalk', query) - - items = [] - for b in result['results']['bindings']: - items.append((b['prompt']['value'], b['item']['value'])) - - with open(output, 'w') as out: - out.write("Speaker\tPrompt\tItemURL\n") - for item in items: - out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n") - - -def main(): - args = parser() - try: - api_key = open(args.api_key, 'r').read().strip() - find_hVd_words(api_key, args.speaker, args.output, args.words) - except Exception as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - -if __name__ == '__main__': - main()
--- a/austalk-select-hVd-words.xml Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -<tool id="austalk-select-hvd-words" name="Find HVD words in Austalk" version="0.01" force_history_refresh="True"> - <description>for a single speaker</description> - - <requirements> - <requirement type="package" version="0.6">pyalveo</requirement> - </requirements> - - <command interpreter="python"> - austalk-select-hVd-words.py --api_key $api_key --speaker $speaker --words $words --output $output - </command> - - <inputs> - <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> - <param name="speaker" type="text" format="text" label="Speaker ID" help="e.g. 1_123"/> - <param name="words" type="select" multiple="false" label="Word List" display="radioboxes"> - <option value='all'>All hVd words</option> - <option value='monopthongs'>hVd monopthongs</option> - <option value='dipthongs'>hVd dipthongs</option> - </param> - <param name="job_name" type="text" size="25" - label="Supply a name for the output to remind you what it contains" value="Query Results"/> - </inputs> - - <outputs> - <data format="tabular" name="output" label="$job_name" /> - </outputs> - - <tests> - </tests> - - <help>Find items corresponding to the hVd words for this Austalk speaker.</help> - - <citations> - <citation type='bibtex'> - @inproceedings{Buschmeir2013, - author = {{Hendrik Buschmeier}, Marcin Wlodarczak}, - booktitle = {Tagungsband der 24. Konferenz zur Elektronischen Sprachsignalverarbeitung (ESSV 2013)}, - pages = {152--157}, - title = {{TextGridTools: A TextGrid Processing and Analysis Toolkit for Python}}, - year = {2013} - } - </citation> - </citations> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phonR_tool.R Wed Aug 31 22:02:30 2016 -0400 @@ -0,0 +1,52 @@ +# +# Galaxy tool that plots Vowels using the phonR package. +# Accepts 8 inputs of the form <tsv data file> <output file name> <column1> <column2> <optPretty> <optEllipse> <optTokens> <optMeans> +# Created by Michael Bauer +# +library(phonR) +library('getopt') + +#create options +option_specification = matrix(c( + 'outdir', 'f', 1, 'character', + 'htmlfile', 'h', 1, 'character', + 'inputfile', 'i', 1, 'character', + 'column1', 'y', 1, 'integer', + 'column2', 'z', 1, 'integer', + 'columnvowels', 'x', 1, 'integer', + 'pretty', 'p', 1, 'logical', + 'ellipse', 'e', 1, 'logical', + 'tokens', 't', 1, 'logical', + 'means', 'm', 1, 'logical', + 'cextokens', 'c', 1, 'numeric', + 'alphatokens', 'a', 1, 'numeric', + 'cexmeans', 'b', 1, 'numeric' +), byrow=TRUE, ncol=4); + +# Parse options +options = getopt(option_specification); + +if (!is.null(options$outdir)) { + # Create the directory + dir.create(options$outdir,FALSE) +} + +pngfile <- gsub("[ ]+", "", paste(options$outdir,"/pngfile.png")) +htmlfile <- gsub("[ ]+", "", paste(options$htmlfile)) + +data = read.table(options$inputfile,sep="\t", header=TRUE); + +png(pngfile); + +plotVowels(data[,options$column1], data[,options$column2], data[,options$columnvowels], plot.tokens = options$tokens, + pch.tokens = data[,options$columnvowels], cex.tokens = options$cextokens, alpha.tokens = options$alphatokens, + plot.means = options$means, pch.means = data[,options$columnvowels], cex.means = options$cexmeans, + var.col.by = data[,options$columnvowels], ellipse.line = options$ellipse, pretty = options$pretty) +dev.off(); + +htmlfile_handle <- file(htmlfile) +html_output = c('<html><body>', + '<h3>Result:</h3><img src="pngfile.png"/>', + '</html></body>'); +writeLines(html_output, htmlfile_handle); +close(htmlfile_handle);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phonR_tool.xml Wed Aug 31 22:02:30 2016 -0400 @@ -0,0 +1,52 @@ +<tool id="vowel_plot" name="Plot Vowels" version="0.01" force_history_refresh="True"> + <description>Using phonR to produce a Vowel Plot</description> + + <requirements> + <requirement type="package" version="3.2.1">R</requirement> + <requirement type="package" version="1.0.3">phonR</requirement> + </requirements> + + <command interpreter="Rscript"> + $__tool_directory__/phonR_tool.R --input="${input}" --outdir="$htmlfile.files_path" --htmlfile="$htmlfile" --column1="${f1}" --column2="${f2}" --columnvowels="${vowel}" --pretty="${pretty}" --ellipse="${ellipse}" --tokens="${tokens}" --means="${means}" --cextokens="${cextokens}" --alphatokens="${alphatokens}" --cexmeans="${cexmeans}" + </command> + + <inputs> + <param name="input" type="data" format="tabular" label="Segment List" help=""/> + <param name="f1" type="data_column" data_ref="input" label="Column for f1" force_select="true" use_header_names="true"/> + <param name="f2" type="data_column" data_ref="input" label="Column for f2" force_select="true" use_header_names="true"/> + <param name="vowel" type="data_column" data_ref="input" label="Column with Vowels" force_select="true" use_header_names="true"/> + <param name="pretty" type="boolean" label="Make Pretty" + truevalue="TRUE" falsevalue="FALSE" checked="True" + help="Will apply various beautification techniques." /> + <param name="ellipse" type="boolean" label="Add Ellipses" + truevalue="TRUE" falsevalue="FALSE" checked="True" + help="Will add an ellipse around the location of each vowel cluster." /> + <param name="tokens" type="boolean" label="Add Tokens" + truevalue="TRUE" falsevalue="FALSE" checked="True" + help="Will add tokens to the plot." /> + <param name="means" type="boolean" label="Add Means" + truevalue="TRUE" falsevalue="FALSE" checked="True" + help="Will add means to the plot." /> + <param name="cextokens" type="float" label="cex.tokens" value="1.2" min="0.0" max="10.0" + help="Size of tokens on the plot." /> + <param name="alphatokens" type="float" label="alpha.tokens" value="0.2" min="0.0" max="1.0" + help="The alpha of the tokens on the plot (transparency). NOTE: Must be a value betweek 0 and 1!" /> + <param name="cexmeans" type="float" label="cex.means" value="2" min="0.0" max="10.0" + help="Size of the means on the plot." /> + </inputs> + + <outputs> + <data format="html" name="htmlfile" label="output.html" /> + </outputs> + + + <tests> + <test> + </test> + </tests> + + <help> + Will make a vowel plot from given data. Best used from data directly run from the "Get Formants at segment midpoint" Tool. + </help> + +</tool>
--- a/test Tue Aug 30 21:06:27 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -Adelaide (6840) https://app.alveo.edu.au/item_lists/478 -austalk_catepillar (309) https://app.alveo.edu.au/item_lists/64 -different (888) https://app.alveo.edu.au/item_lists/132 -gum-tree (58) https://app.alveo.edu.au/item_lists/84 -M&D_Test_140904 (10) https://app.alveo.edu.au/item_lists/168 -rose (245) https://app.alveo.edu.au/item_lists/82 -thistle (16) https://app.alveo.edu.au/item_lists/83 -ace-specialised (122) https://app.alveo.edu.au/item_lists/178 -austalk-adult-csl-child-match (250) https://app.alveo.edu.au/item_lists/904 -austalk-digits (144) https://app.alveo.edu.au/item_lists/517 -austalk_textgrid_sample (10) https://app.alveo.edu.au/item_lists/484 -COOEE ALL (1354) https://app.alveo.edu.au/item_lists/95 -cooee sample (129) https://app.alveo.edu.au/item_lists/53 -dialogue-all (76) https://app.alveo.edu.au/item_lists/116 -dialogue-sample (6) https://app.alveo.edu.au/item_lists/180 -mdsample (20) https://app.alveo.edu.au/item_lists/52