# HG changeset patch
# User stevecassidy
# Date 1472695350 14400
# Node ID c0569deac8d98402ad158670c8f068cd7f373675
# Parent 7b6021997b8e465e924a9105bdafd11e51ee635e
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
diff -r 7b6021997b8e -r c0569deac8d9 __pycache__/pyalveo.cpython-35.pyc
Binary file __pycache__/pyalveo.cpython-35.pyc has changed
diff -r 7b6021997b8e -r c0569deac8d9 alveo_api_key.py
--- a/alveo_api_key.py Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-from __future__ import print_function
-import argparse
-import pyalveo
-import sys
-
-API_URL = 'https://app.alveo.edu.au'
-
-def parser():
- parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
- parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
- parser.add_argument('--output_path', required=True, action="store", type=str, help="File to store the API key in")
- return parser.parse_args()
-
-def write_key(api_key, output_path, client_module=pyalveo):
- """Tests whether an API key is valid and writes it to a file.
-
- :type api_key: String
- :param api_key: Alveo API key
-
- :type output_path: String
- :param output_path: Path to the file to store the API key in
-
- :type client_module: pyalveo.Client
- :param client_module: Module providing the client (used for testing purposes),
- defaults to pyalveo
-
- :raises: pyalveo.APIError if the API request is not successful
-
- """
- client = client_module.Client(api_key, API_URL, use_cache=False)
- outfile = open(output_path, 'w')
- outfile.write(api_key)
- outfile.close()
-
-def main():
- args = parser()
- try:
- write_key(args.api_key, args.output_path)
- except Exception as e:
- print("ERROR: " + str(e), file=sys.stderr)
- sys.exit(1)
-
-if __name__ == '__main__':
- main()
diff -r 7b6021997b8e -r c0569deac8d9 alveo_api_key.xml
--- a/alveo_api_key.xml Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-
- for use with Alveo tools
-
-
- pyalveo
-
-
-
- alveo_api_key.py --api_key $api_key --output_path $output
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Stores Alveo API keys for use with the Alveo Galaxy tools.
-
-
-
- @article{cassidy2014alveo,
- title={The alveo virtual laboratory: a web based repository API},
- author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
- year={2014},
- publisher={Reykjavik, Iceland: European Language Resources Association}
- }
-
-
-
diff -r 7b6021997b8e -r c0569deac8d9 alveo_get_item_data.py
--- a/alveo_get_item_data.py Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-from __future__ import print_function
-import json
-import argparse
-import pyalveo
-import sys
-import os
-from fnmatch import fnmatch
-import csv
-
-API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
-
-def parser():
- parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
- parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
- parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
- parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
- parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
- return parser.parse_args()
-
-def read_item_list(filename, client):
- """Read an item list from a file
- which should be a tabular formatted file
- with one column header ItemURL.
- Return an instance of ItemGroup"""
-
- with open(filename) as fd:
- csvreader = csv.DictReader(fd, dialect='excel-tab')
- if 'ItemURL' not in csvreader.fieldnames:
- return None
- itemurls = []
- for row in csvreader:
- itemurls.append(row['ItemURL'])
-
- print("ITEMS:", itemurls)
- itemlist = pyalveo.ItemGroup(itemurls, client)
-
- return itemlist
-
-# this file name pattern allows galaxy to discover the dataset designation and type
-FNPAT = "%(designation)s#%(ext)s"
-
-def galaxy_name(itemname, fname):
- """construct a filename suitable for Galaxy dataset discovery
- designation - (dataset identifier) is the file basename
- ext - defines the dataset type and is the file extension
- """
-
- root, ext = os.path.splitext(fname)
- ext = ext[1:] # remove initial .
- fname = FNPAT % {'designation': itemname, 'ext': ext}
-
- return fname
-
-def download_documents(item_list, patterns, output_path):
- """
- Downloads a list of documents to the directory specificed by output_path.
-
- :type documents: list of pyalveo.Document
- :param documents: Documents to download
-
- :type output_path: String
- :param output_path: directory to download to the documents to
- """
- if not os.path.exists(output_path):
- os.makedirs(output_path)
-
- downloaded = []
-
- items = item_list.get_all()
- filtered_documents = []
- for item in items:
- documents = item.get_documents()
- for doc in documents:
- for pattern in patterns:
- if not pattern == '' and fnmatch(doc.get_filename(), pattern):
- fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename())
- try:
- doc.download_content(dir_path=output_path, filename=fname)
- downloaded.append(doc.get_filename())
- except:
- # maybe it doesn't exist or we have no access
- # TODO: report this
- pass
- return downloaded
-
-def main():
- args = parser()
- try:
- api_key = open(args.api_key, 'r').read().strip()
-
- client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False)
-
- item_list = read_item_list(args.item_list, client)
- patterns = args.patterns.split(',')
- downloaded = download_documents(item_list, patterns, args.output_path)
- except pyalveo.APIError as e:
- print("ERROR: " + str(e), file=sys.stderr)
- sys.exit(1)
-
-if __name__ == '__main__':
- main()
diff -r 7b6021997b8e -r c0569deac8d9 alveo_get_item_data.xml
--- a/alveo_get_item_data.xml Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-
- Downloads files from the items in an Galaxy list of items
-
-
- pyalveo
-
-
-
- alveo_get_item_data.py --api_key $api_key --item_list $item_list --patterns $patterns,$patternselect --output_path ItemListData
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Downloads files from a local list of Alveo items. You can download all files or those matching
- a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in
- your history.
-
-
- @article{cassidy2014alveo,
- title={The alveo virtual laboratory: a web based repository API},
- author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
- year={2014},
- publisher={Reykjavik, Iceland: European Language Resources Association}
- }
-
-
-
diff -r 7b6021997b8e -r c0569deac8d9 alveo_get_primary_text.py
--- a/alveo_get_primary_text.py Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-from __future__ import print_function
-import json
-import argparse
-import pyalveo
-import sys
-import os
-from fnmatch import fnmatch
-
-API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
-
-def parser():
- parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
- parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
- parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
- parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
- return parser.parse_args()
-
-def get_item_list(api_key, item_list_url):
- client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False)
- return client.get_item_list(item_list_url)
-
-# this file name pattern allows galaxy to discover the dataset designation and type
-FNPAT = "%(designation)s_%(ext)s"
-
-def galaxy_name(fname, ext):
- """construct a filename suitable for Galaxy dataset discovery"""
-
- fname = FNPAT % {'designation': fname, 'ext': ext}
-
- return fname
-import pprint
-def download_documents(item_list, output_path):
- """
- Downloads a list of documents to the directory specificed by output_path.
-
- :type documents: list of pyalveo.Document
- :param documents: Documents to download
-
- :type output_path: String
- :param output_path: directory to download to the documents to
- """
- if not os.path.exists(output_path):
- os.makedirs(output_path)
-
- downloaded = []
-
- items = item_list.get_all()
- filtered_documents = []
- for item in items:
- md = item.metadata()
- fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dc:identifier'], 'txt'))
- content = item.get_primary_text()
- if not content == None:
- with open(fname, 'w') as out:
- out.write(content)
-
- return downloaded
-
-def main():
- args = parser()
- try:
- api_key = open(args.api_key, 'r').read().strip()
- item_list = get_item_list(api_key, args.item_list_url)
- downloaded = download_documents(item_list, args.output_path)
- # write out a list of downloaded files as a result?
- except pyalveo.APIError as e:
- print("ERROR: " + str(e), file=sys.stderr)
- sys.exit(1)
-
-if __name__ == '__main__':
- main()
diff -r 7b6021997b8e -r c0569deac8d9 alveo_get_primary_text.xml
--- a/alveo_get_primary_text.xml Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-
- Downloads primary text from the items in an Alveo Item List
-
-
- pyalveo
-
-
-
- alveo_get_primary_text.py --api_key $api_key --item_list_url $item_list_url --output_path ItemListData
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Downloads the primary text for each item from an Alveo Item List
-
-
- @article{cassidy2014alveo,
- title={The alveo virtual laboratory: a web based repository API},
- author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
- year={2014},
- publisher={Reykjavik, Iceland: European Language Resources Association}
- }
-
-
-
diff -r 7b6021997b8e -r c0569deac8d9 alveo_item_list_downloader.py
--- a/alveo_item_list_downloader.py Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-from __future__ import print_function
-import json
-import argparse
-import pyalveo
-import sys
-import os
-from fnmatch import fnmatch
-
-API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
-
-def parser():
- parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
- parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
- parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
- parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
- parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
- return parser.parse_args()
-
-def get_item_list(api_key, item_list_url):
- client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False)
- return client.get_item_list(item_list_url)
-
-# this file name pattern allows galaxy to discover the dataset designation and type
-FNPAT = "%(designation)s_%(ext)s"
-
-
-def galaxy_name(itemname, fname):
- """construct a filename suitable for Galaxy dataset discovery
- designation - (dataset identifier) is the file basename
- ext - defines the dataset type and is the file extension
- """
-
- root, ext = os.path.splitext(fname)
- ext = ext[1:] # remove initial .
- fname = FNPAT % {'designation': itemname, 'ext': ext}
-
- return fname
-
-
-def download_documents(item_list, patterns, output_path):
- """
- Downloads a list of documents to the directory specificed by output_path.
-
- :type documents: list of pyalveo.Document
- :param documents: Documents to download
-
- :type output_path: String
- :param output_path: directory to download to the documents to
- """
- if not os.path.exists(output_path):
- os.makedirs(output_path)
-
- downloaded = []
-
- items = item_list.get_all()
- filtered_documents = []
- for item in items:
- documents = item.get_documents()
- for doc in documents:
- for pattern in patterns:
- if not pattern == '' and fnmatch(doc.get_filename(), pattern):
- fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename())
- try:
- doc.download_content(dir_path=output_path, filename=fname)
- downloaded.append(doc.get_filename())
- except:
- # maybe it doesn't exist or we have no access
- # TODO: report this
- pass
- return downloaded
-
-def main():
- args = parser()
- try:
- api_key = open(args.api_key, 'r').read().strip()
- item_list = get_item_list(api_key, args.item_list_url)
- patterns = args.patterns.split(',')
- downloaded = download_documents(item_list, patterns, args.output_path)
- # write out a list of downloaded files as a result?
- except pyalveo.APIError as e:
- print("ERROR: " + str(e), file=sys.stderr)
- sys.exit(1)
-
-if __name__ == '__main__':
- main()
diff -r 7b6021997b8e -r c0569deac8d9 alveo_item_list_downloader.xml
--- a/alveo_item_list_downloader.xml Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-
- Downloads files from the items in an Alveo Item List
-
-
- pyalveo
-
-
-
- alveo_item_list_downloader.py --api_key $api_key --item_list_url $item_list_url --patterns $patterns,$patternselect --output_path ItemListData
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Downloads files from an Alveo Item list. You can download all files or those matching
- a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in
- your history.
-
-
- @article{cassidy2014alveo,
- title={The alveo virtual laboratory: a web based repository API},
- author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
- year={2014},
- publisher={Reykjavik, Iceland: European Language Resources Association}
- }
-
-
-
diff -r 7b6021997b8e -r c0569deac8d9 alveo_item_list_importer.py
--- a/alveo_item_list_importer.py Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-from __future__ import print_function
-import json
-import argparse
-import pyalveo
-import sys
-
-API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
-
-
-def parser():
- parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
- parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
- parser.add_argument('--output', required=True, action="store", type=str, help="Path to output file")
- return parser.parse_args()
-
-# TODO: export common function to helper module
-def get_item_lists(api_key):
- client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False)
- return client.get_item_lists()
-
-def write_table(item_lists, filename):
- with open(filename, 'w') as outfile:
- for list_set in item_lists.values():
- for item_list in list_set:
- outfile.write("%s (%d)\t%s\n" % (item_list['name'], item_list['num_items'], item_list['item_list_url']))
-
-def main():
- args = parser()
- try:
- api_key = open(args.api_key, 'r').read().strip()
- item_lists = get_item_lists(api_key)
- print(item_lists)
- if item_lists:
- write_table(item_lists, args.output)
- except Exception as e:
- print("ERROR: " + str(e), file=sys.stderr)
- sys.exit(1)
-
-
-if __name__ == '__main__':
- main()
diff -r 7b6021997b8e -r c0569deac8d9 alveo_item_list_importer.xml
--- a/alveo_item_list_importer.xml Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-
- Retrieves item list metadata.
-
-
- pyalveo
-
-
-
- alveo_item_list_importer.py --api_key $api_key --output $item_list
-
-
-
-
-
-
-
-
-
-
-
-
- Import Item Lists from Alveo. This imports the lists, but does not download the individual items.
- That task is performed by the *Get Files from Alveo* tool.
-
-
-
-
- @article{cassidy2014alveo,
- title={The alveo virtual laboratory: a web based repository API},
- author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
- year={2014},
- publisher={Reykjavik, Iceland: European Language Resources Association}
- }
-
-
-
diff -r 7b6021997b8e -r c0569deac8d9 austalk-select-hVd-words.py
--- a/austalk-select-hVd-words.py Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-from __future__ import print_function
-import argparse
-import pyalveo
-import sys
-
-API_URL = 'https://app.alveo.edu.au/'
-PREFIXES = """
-PREFIX dc:
-PREFIX austalk:
-PREFIX olac:
-PREFIX ausnc:
-PREFIX foaf:
-PREFIX dbpedia:
-PREFIX rdf:
-PREFIX rdfs:
-PREFIX geo:
-PREFIX iso639schema:
-PREFIX austalkid:
-PREFIX iso639:
-PREFIX xsd:
-PREFIX is:
-PREFIX iso:
-PREFIX dada: """
-
-def parser():
- parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
- parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
- parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier")
- parser.add_argument('--words', required=False, default='all', action="store", type=str, help="Word group (all, monopthongs, dipthongs)")
- parser.add_argument('--output', required=True, action="store", type=str, help="output file name")
- return parser.parse_args()
-
-def find_hVd_words(api_key, speakerid, output, words='all'):
- """Find words in the Austalk corpus
- """
-
- client = pyalveo.Client(api_key, API_URL, use_cache=False)
-
- query = PREFIXES + """
-SELECT distinct ?item ?prompt ?compname
-WHERE {
- ?item a ausnc:AusNCObject .
- ?item olac:speaker ?speaker .
- ?speaker austalk:id "%s" .
- ?item austalk:prompt ?prompt .
- ?item austalk:componentName ?compname .
- """ % speakerid
-
- hVdWords = {
- 'monopthongs': ['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', "whod"],
- 'dipthongs': ['herd', 'howd', 'hoyd', 'haired', 'hard', 'heared']
- }
-
- if words == 'all':
- words = hVdWords['monopthongs'] + hVdWords['dipthongs']
- else:
- words = hVdWords[words]
-
- filterclause = 'FILTER regex(?prompt, "^'
- filterclause += '$|^'.join(words)
- filterclause += '$", "i")\n'
-
- query += filterclause + "}"
-
- result = client.sparql_query('austalk', query)
-
- items = []
- for b in result['results']['bindings']:
- items.append((b['prompt']['value'], b['item']['value']))
-
- with open(output, 'w') as out:
- out.write("Speaker\tPrompt\tItemURL\n")
- for item in items:
- out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n")
-
-
-def main():
- args = parser()
- try:
- api_key = open(args.api_key, 'r').read().strip()
- find_hVd_words(api_key, args.speaker, args.output, args.words)
- except Exception as e:
- print("ERROR: " + str(e), file=sys.stderr)
- sys.exit(1)
-
-if __name__ == '__main__':
- main()
diff -r 7b6021997b8e -r c0569deac8d9 austalk-select-hVd-words.xml
--- a/austalk-select-hVd-words.xml Tue Aug 30 21:06:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-
- for a single speaker
-
-
- pyalveo
-
-
-
- austalk-select-hVd-words.py --api_key $api_key --speaker $speaker --words $words --output $output
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Find items corresponding to the hVd words for this Austalk speaker.
-
-
-
- @inproceedings{Buschmeir2013,
- author = {{Hendrik Buschmeier}, Marcin Wlodarczak},
- booktitle = {Tagungsband der 24. Konferenz zur Elektronischen Sprachsignalverarbeitung (ESSV 2013)},
- pages = {152--157},
- title = {{TextGridTools: A TextGrid Processing and Analysis Toolkit for Python}},
- year = {2013}
- }
-
-
-
diff -r 7b6021997b8e -r c0569deac8d9 phonR_tool.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phonR_tool.R Wed Aug 31 22:02:30 2016 -0400
@@ -0,0 +1,52 @@
+#
+# Galaxy tool that plots Vowels using the phonR package.
+# Accepts 8 inputs of the form