# HG changeset patch
# User stevecassidy
# Date 1471319146 14400
# Node ID 6fef3489d97c86d628f6d3979b4a942885033c90
# Parent bfe39bd252df89460ee5cb271a3211e04b389525
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
diff -r bfe39bd252df -r 6fef3489d97c alveo_api_key.pyc
Binary file alveo_api_key.pyc has changed
diff -r bfe39bd252df -r 6fef3489d97c alveo_api_key.xml
--- a/alveo_api_key.xml Mon Jul 18 23:49:40 2016 -0400
+++ b/alveo_api_key.xml Mon Aug 15 23:45:46 2016 -0400
@@ -2,7 +2,7 @@
for use with Alveo tools
- pyalveo
+ pyalveo
@@ -19,7 +19,7 @@
-
+
diff -r bfe39bd252df -r 6fef3489d97c alveo_get_item_data.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_get_item_data.py Mon Aug 15 23:45:46 2016 -0400
@@ -0,0 +1,100 @@
+from __future__ import print_function
+import json
+import argparse
+import pyalveo
+import sys
+import os
+from fnmatch import fnmatch
+import csv
+
+API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
+
+def parser():
+ parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
+ parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+ parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
+ parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
+ parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
+ return parser.parse_args()
+
+def read_item_list(filename, client):
+ """Read an item list from a file
+ which should be a tabular formatted file
+ with one column header ItemURL.
+ Return an instance of ItemGroup"""
+
+ with open(filename) as fd:
+ csvreader = csv.DictReader(fd, dialect='excel-tab')
+ if 'ItemURL' not in csvreader.fieldnames:
+ return None
+ itemurls = []
+ for row in csvreader:
+ itemurls.append(row['ItemURL'])
+
+ itemlist = pyalveo.ItemGroup(itemurls, client)
+
+ return itemlist
+
+# this file name pattern allows galaxy to discover the dataset designation and type
+FNPAT = "%(designation)s#%(ext)s"
+
+def galaxy_name(itemname, fname):
+ """construct a filename suitable for Galaxy dataset discovery
+ designation - (dataset identifier) is the file basename
+ ext - defines the dataset type and is the file extension
+ """
+
+ root, ext = os.path.splitext(fname)
+ ext = ext[1:] # remove initial .
+ fname = FNPAT % {'designation': itemname, 'ext': ext}
+
+ return fname
+
+def download_documents(item_list, patterns, output_path):
+ """
+ Downloads a list of documents to the directory specificed by output_path.
+
+ :type documents: list of pyalveo.Document
+ :param documents: Documents to download
+
+ :type output_path: String
+ :param output_path: directory to download to the documents to
+ """
+ if not os.path.exists(output_path):
+ os.makedirs(output_path)
+
+ downloaded = []
+
+ items = item_list.get_all()
+ filtered_documents = []
+ for item in items:
+ documents = item.get_documents()
+ for doc in documents:
+ for pattern in patterns:
+ if not pattern == '' and fnmatch(doc.get_filename(), pattern):
+ fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename())
+ try:
+ doc.download_content(dir_path=output_path, filename=fname)
+ downloaded.append(doc.get_filename())
+ except:
+ # maybe it doesn't exist or we have no access
+ # TODO: report this
+ pass
+ return downloaded
+
+def main():
+ args = parser()
+ try:
+ api_key = open(args.api_key, 'r').read().strip()
+
+ client = pyalveo.Client(api_url=API_URL, api_key=api_key)
+
+ item_list = read_item_list(args.item_list, client)
+ patterns = args.patterns.split(',')
+ downloaded = download_documents(item_list, patterns, args.output_path)
+ except pyalveo.APIError as e:
+ print("ERROR: " + str(e), file=sys.stderr)
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()
diff -r bfe39bd252df -r 6fef3489d97c alveo_get_item_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_get_item_data.xml Mon Aug 15 23:45:46 2016 -0400
@@ -0,0 +1,68 @@
+
+ Downloads files from the items in an Galaxy list of items
+
+
+ pyalveo
+
+
+
+ alveo_get_item_data.py --api_key $api_key --item_list $item_list --patterns $patterns,$patternselect --output_path ItemListData
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Downloads files from a local list of Alveo items. You can download all files or those matching
+ a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in
+ your history.
+
+
+ @article{cassidy2014alveo,
+ title={The alveo virtual laboratory: a web based repository API},
+ author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
+ year={2014},
+ publisher={Reykjavik, Iceland: European Language Resources Association}
+ }
+
+
+
diff -r bfe39bd252df -r 6fef3489d97c alveo_get_primary_text.xml
--- a/alveo_get_primary_text.xml Mon Jul 18 23:49:40 2016 -0400
+++ b/alveo_get_primary_text.xml Mon Aug 15 23:45:46 2016 -0400
@@ -2,7 +2,7 @@
Downloads primary text from the items in an Alveo Item List
- pyalveo
+ pyalveo
diff -r bfe39bd252df -r 6fef3489d97c alveo_item_list_downloader.py
--- a/alveo_item_list_downloader.py Mon Jul 18 23:49:40 2016 -0400
+++ b/alveo_item_list_downloader.py Mon Aug 15 23:45:46 2016 -0400
@@ -23,15 +23,20 @@
# this file name pattern allows galaxy to discover the dataset designation and type
FNPAT = "%(designation)s_%(ext)s"
-def galaxy_name(fname):
- """construct a filename suitable for Galaxy dataset discovery"""
+
+def galaxy_name(itemname, fname):
+ """construct a filename suitable for Galaxy dataset discovery
+ designation - (dataset identifier) is the file basename
+ ext - defines the dataset type and is the file extension
+ """
root, ext = os.path.splitext(fname)
ext = ext[1:] # remove initial .
- fname = FNPAT % {'designation': fname, 'ext': ext}
+ fname = FNPAT % {'designation': itemname, 'ext': ext}
return fname
+
def download_documents(item_list, patterns, output_path):
"""
Downloads a list of documents to the directory specificed by output_path.
@@ -54,7 +59,7 @@
for doc in documents:
for pattern in patterns:
if not pattern == '' and fnmatch(doc.get_filename(), pattern):
- fname = galaxy_name(doc.get_filename())
+ fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename())
try:
doc.download_content(dir_path=output_path, filename=fname)
downloaded.append(doc.get_filename())
diff -r bfe39bd252df -r 6fef3489d97c alveo_item_list_downloader.xml
--- a/alveo_item_list_downloader.xml Mon Jul 18 23:49:40 2016 -0400
+++ b/alveo_item_list_downloader.xml Mon Aug 15 23:45:46 2016 -0400
@@ -2,7 +2,7 @@
Downloads files from the items in an Alveo Item List
- pyalveo
+ pyalveo
@@ -51,7 +51,7 @@
-
+
@@ -66,7 +66,7 @@
-
+
diff -r bfe39bd252df -r 6fef3489d97c alveo_item_list_importer.pyc
Binary file alveo_item_list_importer.pyc has changed
diff -r bfe39bd252df -r 6fef3489d97c alveo_item_list_importer.xml
--- a/alveo_item_list_importer.xml Mon Jul 18 23:49:40 2016 -0400
+++ b/alveo_item_list_importer.xml Mon Aug 15 23:45:46 2016 -0400
@@ -2,7 +2,7 @@
Retrieves item list metadata.
- pyalveo
+ pyalveo
diff -r bfe39bd252df -r 6fef3489d97c austalk-select-hVd-words.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/austalk-select-hVd-words.py Mon Aug 15 23:45:46 2016 -0400
@@ -0,0 +1,87 @@
+from __future__ import print_function
+import argparse
+import pyalveo
+import sys
+
+API_URL = 'https://app.alveo.edu.au/'
+PREFIXES = """
+PREFIX dc:
+PREFIX austalk:
+PREFIX olac:
+PREFIX ausnc:
+PREFIX foaf:
+PREFIX dbpedia:
+PREFIX rdf:
+PREFIX rdfs:
+PREFIX geo:
+PREFIX iso639schema:
+PREFIX austalkid:
+PREFIX iso639:
+PREFIX xsd:
+PREFIX is:
+PREFIX iso:
+PREFIX dada: """
+
+def parser():
+ parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
+ parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+ parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier")
+ parser.add_argument('--words', required=False, default='all', action="store", type=str, help="Word group (all, monopthongs, dipthongs)")
+ parser.add_argument('--output', required=True, action="store", type=str, help="output file name")
+ return parser.parse_args()
+
+def find_hVd_words(api_key, speakerid, output, words='all'):
+ """Find words in the Austalk corpus
+ """
+
+ client = pyalveo.Client(api_key, API_URL)
+
+ query = PREFIXES + """
+SELECT distinct ?item ?prompt ?compname
+WHERE {
+ ?item a ausnc:AusNCObject .
+ ?item olac:speaker ?speaker .
+ ?speaker austalk:id "%s" .
+ ?item austalk:prompt ?prompt .
+ ?item austalk:componentName ?compname .
+ """ % speakerid
+
+ hVdWords = {
+ 'monopthongs': ['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', "whod"],
+ 'dipthongs': ['herd', 'howd', 'hoyd', 'haired', 'hard', 'heared']
+ }
+
+ if words == 'all':
+ words = hVdWords['monopthongs'] + hVdWords['dipthongs']
+ else:
+ words = hVdWords[words]
+
+ filterclause = 'FILTER regex(?prompt, "^'
+ filterclause += '$|^'.join(words)
+ filterclause += '$", "i")\n'
+
+ query += filterclause + "}"
+
+ result = client.sparql_query('austalk', query)
+
+ items = []
+ for b in result['results']['bindings']:
+ items.append((b['prompt']['value'], b['item']['value']))
+
+ with open(output, 'w') as out:
+ out.write("Speaker\tPrompt\tItemURL\n")
+ for item in items:
+ out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n")
+
+
+def main():
+ args = parser()
+ try:
+ api_key = open(args.api_key, 'r').read().strip()
+ find_hVd_words(api_key, args.speaker, args.output, args.words)
+ except Exception as e:
+ print("ERROR: " + str(e), file=sys.stderr)
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()
diff -r bfe39bd252df -r 6fef3489d97c austalk-select-hVd-words.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/austalk-select-hVd-words.xml Mon Aug 15 23:45:46 2016 -0400
@@ -0,0 +1,44 @@
+
+ for a single speaker
+
+
+ pyalveo
+
+
+
+ austalk-select-hVd-words.py --api_key $api_key --speaker $speaker --words $words --output $output
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Find items corresponding to the hVd words for this Austalk speaker.
+
+
+
+ @inproceedings{Buschmeir2013,
+ author = {{Hendrik Buschmeier}, Marcin Wlodarczak},
+ booktitle = {Tagungsband der 24. Konferenz zur Elektronischen Sprachsignalverarbeitung (ESSV 2013)},
+ pages = {152--157},
+ title = {{TextGridTools: A TextGrid Processing and Analysis Toolkit for Python}},
+ year = {2013}
+ }
+
+
+
diff -r bfe39bd252df -r 6fef3489d97c test-data/api-key.dat
--- a/test-data/api-key.dat Mon Jul 18 23:49:40 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-insert your api key here
diff -r bfe39bd252df -r 6fef3489d97c test-data/item-lists.dat
--- a/test-data/item-lists.dat Mon Jul 18 23:49:40 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-austalk_catepillar (309) https://app.alveo.edu.au/item_lists/64
-different (888) https://app.alveo.edu.au/item_lists/132
-gum-tree (58) https://app.alveo.edu.au/item_lists/84
-M&D_Test_140904 (10) https://app.alveo.edu.au/item_lists/168
-rose (245) https://app.alveo.edu.au/item_lists/82
-thistle (16) https://app.alveo.edu.au/item_lists/83
-ace-specialised (122) https://app.alveo.edu.au/item_lists/178
-austalk_hide (42) https://app.alveo.edu.au/item_lists/251
-austalk-male-digits (144) https://app.alveo.edu.au/item_lists/412
-COOEE ALL (1354) https://app.alveo.edu.au/item_lists/95
-cooee sample (129) https://app.alveo.edu.au/item_lists/53
-dialogue-all (76) https://app.alveo.edu.au/item_lists/116
-dialogue-sample (6) https://app.alveo.edu.au/item_lists/180
-mdsample (20) https://app.alveo.edu.au/item_lists/52
-one austalk sample (1) https://app.alveo.edu.au/item_lists/179
diff -r bfe39bd252df -r 6fef3489d97c test_alveo_api_key.py
--- a/test_alveo_api_key.py Mon Jul 18 23:49:40 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-import unittest
-import os
-import alveo_api_key
-import pyalveo
-from mock import Mock
-
-class TestAlveoAPIKey(unittest.TestCase):
-
- OUTPUT_PATH = 'test.txt'
- API_KEY = 'test123'
- MOCK_CLIENT = Mock(pyalveo)
-
- def test_write_key(self):
- alveo_api_key.write_key(self.API_KEY, self.OUTPUT_PATH, self.MOCK_CLIENT)
- actual = open(self.OUTPUT_PATH, 'r').read()
- self.assertEqual(self.API_KEY, actual)
-
- def tearDown(self):
- try:
- os.remove(self.OUTPUT_PATH)
- except OSError:
- pass
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff -r bfe39bd252df -r 6fef3489d97c test_alveo_item_list_downloader.py
--- a/test_alveo_item_list_downloader.py Mon Jul 18 23:49:40 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-import unittest
-import os
-import json
-import alveo_item_list_importer
-import pyalveo
-from mock import Mock
-
-class TestAlveoItemListImporter(unittest.TestCase):
-
- API_KEY = 'test123'
- OUTPUT_PATH = 'test.csv'
- ITEM_LIST = '{"shared": [{"shared": true, "num_items": 309, "name": "austalk_catepillar", "item_list_url": "https://app.alveo.edu.au/item_lists/64"}]}'
- CSV_CONTENTS = 'austalk_catepillar (309)\thttps://app.alveo.edu.au/item_lists/64\n'
-
- def test_write_table(self):
- api_list = json.loads(self.ITEM_LIST)
- alveo_item_list_importer.write_table(api_list, self.OUTPUT_PATH)
- actual = open(self.OUTPUT_PATH, 'r').read()
- self.assertEqual(self.CSV_CONTENTS, actual)
-
- def tearDown(self):
- try:
- os.remove(self.OUTPUT_PATH)
- except OSError:
- pass
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff -r bfe39bd252df -r 6fef3489d97c test_alveo_item_list_importer.py
--- a/test_alveo_item_list_importer.py Mon Jul 18 23:49:40 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-import unittest
-import os
-import json
-import alveo_item_list_importer
-import pyalveo
-from mock import Mock
-
-class TestAlveoItemListImporter(unittest.TestCase):
-
- API_KEY = 'test123'
- OUTPUT_PATH = 'test.csv'
- ITEM_LIST = '{"shared": [{"shared": true, "num_items": 309, "name": "austalk_catepillar", "item_list_url": "https://app.alveo.edu.au/item_lists/64"}]}'
- CSV_CONTENTS = 'austalk_catepillar (309)\thttps://app.alveo.edu.au/item_lists/64\n'
-
- def test_write_table(self):
- api_list = json.loads(self.ITEM_LIST)
- alveo_item_list_importer.write_table(api_list, self.OUTPUT_PATH)
- actual = open(self.OUTPUT_PATH, 'r').read()
- self.assertEqual(self.CSV_CONTENTS, actual)
-
- def tearDown(self):
- try:
- os.remove(self.OUTPUT_PATH)
- except OSError:
- pass
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff -r bfe39bd252df -r 6fef3489d97c tool_dependencies.xml
--- a/tool_dependencies.xml Mon Jul 18 23:49:40 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-
-
-
-
-
- pyalveo==0.4
-
-
-
-