# HG changeset patch
# User stevecassidy
# Date 1468900180 14400
# Node ID bfe39bd252df89460ee5cb271a3211e04b389525
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
diff -r 000000000000 -r bfe39bd252df alveo_api_key.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_api_key.py Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,44 @@
+from __future__ import print_function
+import argparse
+import pyalveo
+import sys
+
+API_URL = 'https://app.alveo.edu.au'
+
+def parser():
+ parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
+ parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+ parser.add_argument('--output_path', required=True, action="store", type=str, help="File to store the API key in")
+ return parser.parse_args()
+
+def write_key(api_key, output_path, client_module=pyalveo):
+ """Tests whether an API key is valid and writes it to a file.
+
+ :type api_key: String
+ :param api_key: Alveo API key
+
+ :type output_path: String
+ :param output_path: Path to the file to store the API key in
+
+ :type client_module: pyalveo.Client
+ :param client_module: Module providing the client (used for testing purposes),
+ defaults to pyalveo
+
+ :raises: pyalveo.APIError if the API request is not successful
+
+ """
+ client = client_module.Client(api_key, API_URL)
+ outfile = open(output_path, 'w')
+ outfile.write(api_key)
+ outfile.close()
+
+def main():
+ args = parser()
+ try:
+ write_key(args.api_key, args.output_path)
+ except Exception as e:
+ print("ERROR: " + str(e), file=sys.stderr)
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff -r 000000000000 -r bfe39bd252df alveo_api_key.pyc
Binary file alveo_api_key.pyc has changed
diff -r 000000000000 -r bfe39bd252df alveo_api_key.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_api_key.xml Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,39 @@
+
+ for use with Alveo tools
+
+
+ pyalveo
+
+
+
+ alveo_api_key.py --api_key $api_key --output_path $output
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores Alveo API keys for use with the Alveo Galaxy tools.
+
+
+
+ @article{cassidy2014alveo,
+ title={The alveo virtual laboratory: a web based repository API},
+ author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
+ year={2014},
+ publisher={Reykjavik, Iceland: European Language Resources Association}
+ }
+
+
+
diff -r 000000000000 -r bfe39bd252df alveo_get_primary_text.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_get_primary_text.py Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,71 @@
+from __future__ import print_function
+import json
+import argparse
+import pyalveo
+import sys
+import os
+from fnmatch import fnmatch
+
+API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
+
+def parser():
+ parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
+ parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+ parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
+ parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
+ return parser.parse_args()
+
+def get_item_list(api_key, item_list_url):
+ client = pyalveo.Client(api_key=api_key, api_url=API_URL)
+ return client.get_item_list(item_list_url)
+
+# this file name pattern allows galaxy to discover the dataset designation and type
+FNPAT = "%(designation)s_%(ext)s"
+
+def galaxy_name(fname, ext):
+ """construct a filename suitable for Galaxy dataset discovery"""
+
+ fname = FNPAT % {'designation': fname, 'ext': ext}
+
+ return fname
+import pprint
+def download_documents(item_list, output_path):
+ """
+ Downloads a list of documents to the directory specificed by output_path.
+
+ :type documents: list of pyalveo.Document
+ :param documents: Documents to download
+
+ :type output_path: String
+ :param output_path: directory to download to the documents to
+ """
+ if not os.path.exists(output_path):
+ os.makedirs(output_path)
+
+ downloaded = []
+
+ items = item_list.get_all()
+ filtered_documents = []
+ for item in items:
+ md = item.metadata()
+ fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dc:identifier'], 'txt'))
+ content = item.get_primary_text()
+ if not content == None:
+ with open(fname, 'w') as out:
+ out.write(content)
+
+ return downloaded
+
+def main():
+ args = parser()
+ try:
+ api_key = open(args.api_key, 'r').read().strip()
+ item_list = get_item_list(api_key, args.item_list_url)
+ downloaded = download_documents(item_list, args.output_path)
+ # write out a list of downloaded files as a result?
+ except pyalveo.APIError as e:
+ print("ERROR: " + str(e), file=sys.stderr)
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()
diff -r 000000000000 -r bfe39bd252df alveo_get_primary_text.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_get_primary_text.xml Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,68 @@
+
+ Downloads primary text from the items in an Alveo Item List
+
+
+ pyalveo
+
+
+
+ alveo_get_primary_text.py --api_key $api_key --item_list_url $item_list_url --output_path ItemListData
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Downloads the primary text for each item from an Alveo Item List
+
+
+ @article{cassidy2014alveo,
+ title={The alveo virtual laboratory: a web based repository API},
+ author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
+ year={2014},
+ publisher={Reykjavik, Iceland: European Language Resources Association}
+ }
+
+
+
diff -r 000000000000 -r bfe39bd252df alveo_item_list_downloader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_item_list_downloader.py Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,80 @@
+from __future__ import print_function
+import json
+import argparse
+import pyalveo
+import sys
+import os
+from fnmatch import fnmatch
+
+API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
+
+def parser():
+ parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
+ parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+ parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
+ parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
+ parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
+ return parser.parse_args()
+
+def get_item_list(api_key, item_list_url):
+ client = pyalveo.Client(api_key=api_key, api_url=API_URL)
+ return client.get_item_list(item_list_url)
+
+# this file name pattern allows galaxy to discover the dataset designation and type
+FNPAT = "%(designation)s_%(ext)s"
+
+def galaxy_name(fname):
+ """construct a filename suitable for Galaxy dataset discovery"""
+
+ root, ext = os.path.splitext(fname)
+ ext = ext[1:] # remove initial .
+ fname = FNPAT % {'designation': fname, 'ext': ext}
+
+ return fname
+
+def download_documents(item_list, patterns, output_path):
+ """
+ Downloads a list of documents to the directory specificed by output_path.
+
+ :type documents: list of pyalveo.Document
+ :param documents: Documents to download
+
+ :type output_path: String
+ :param output_path: directory to download to the documents to
+ """
+ if not os.path.exists(output_path):
+ os.makedirs(output_path)
+
+ downloaded = []
+
+ items = item_list.get_all()
+ filtered_documents = []
+ for item in items:
+ documents = item.get_documents()
+ for doc in documents:
+ for pattern in patterns:
+ if not pattern == '' and fnmatch(doc.get_filename(), pattern):
+ fname = galaxy_name(doc.get_filename())
+ try:
+ doc.download_content(dir_path=output_path, filename=fname)
+ downloaded.append(doc.get_filename())
+ except:
+ # maybe it doesn't exist or we have no access
+ # TODO: report this
+ pass
+ return downloaded
+
+def main():
+ args = parser()
+ try:
+ api_key = open(args.api_key, 'r').read().strip()
+ item_list = get_item_list(api_key, args.item_list_url)
+ patterns = args.patterns.split(',')
+ downloaded = download_documents(item_list, patterns, args.output_path)
+ # write out a list of downloaded files as a result?
+ except pyalveo.APIError as e:
+ print("ERROR: " + str(e), file=sys.stderr)
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()
diff -r 000000000000 -r bfe39bd252df alveo_item_list_downloader.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_item_list_downloader.xml Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,91 @@
+
+ Downloads files from the items in an Alveo Item List
+
+
+ pyalveo
+
+
+
+ alveo_item_list_downloader.py --api_key $api_key --item_list_url $item_list_url --patterns $patterns,$patternselect --output_path ItemListData
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Downloads files from an Alveo Item list. You can download all files or those matching
+ a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in
+ your history.
+
+
+ @article{cassidy2014alveo,
+ title={The alveo virtual laboratory: a web based repository API},
+ author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
+ year={2014},
+ publisher={Reykjavik, Iceland: European Language Resources Association}
+ }
+
+
+
diff -r 000000000000 -r bfe39bd252df alveo_item_list_importer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_item_list_importer.py Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,40 @@
+from __future__ import print_function
+import json
+import argparse
+import pyalveo
+import sys
+
+API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
+
+
+def parser():
+ parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
+ parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+ parser.add_argument('--output', required=True, action="store", type=str, help="Path to output file")
+ return parser.parse_args()
+
+# TODO: export common function to helper module
+def get_item_lists(api_key):
+ client = pyalveo.Client(api_key=api_key, api_url=API_URL)
+ return client.get_item_lists()
+
+def write_table(item_lists, filename):
+ with open(filename, 'w') as outfile:
+ for list_set in item_lists.itervalues():
+ for item_list in list_set:
+ outfile.write("%s (%d)\t%s\n" % (item_list['name'], item_list['num_items'], item_list['item_list_url']))
+
+def main():
+ args = parser()
+ try:
+ api_key = open(args.api_key, 'r').read().strip()
+ item_lists = get_item_lists(api_key)
+ if item_lists:
+ write_table(item_lists, args.output)
+ except Exception as e:
+ print("ERROR: " + str(e), file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ main()
diff -r 000000000000 -r bfe39bd252df alveo_item_list_importer.pyc
Binary file alveo_item_list_importer.pyc has changed
diff -r 000000000000 -r bfe39bd252df alveo_item_list_importer.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_item_list_importer.xml Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,37 @@
+
+ Retrieves item list metadata.
+
+
+ pyalveo
+
+
+
+ alveo_item_list_importer.py --api_key $api_key --output $item_list
+
+
+
+
+
+
+
+
+
+
+
+
+ Import Item Lists from Alveo. This imports the lists, but does not download the individual items.
+ That task is performed by the *Get Files from Alveo* tool.
+
+
+
+
+ @article{cassidy2014alveo,
+ title={The alveo virtual laboratory: a web based repository API},
+ author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
+ year={2014},
+ publisher={Reykjavik, Iceland: European Language Resources Association}
+ }
+
+
+
diff -r 000000000000 -r bfe39bd252df test-data/api-key.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/api-key.dat Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,1 @@
+insert your api key here
diff -r 000000000000 -r bfe39bd252df test-data/item-lists.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/item-lists.dat Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,15 @@
+austalk_catepillar (309) https://app.alveo.edu.au/item_lists/64
+different (888) https://app.alveo.edu.au/item_lists/132
+gum-tree (58) https://app.alveo.edu.au/item_lists/84
+M&D_Test_140904 (10) https://app.alveo.edu.au/item_lists/168
+rose (245) https://app.alveo.edu.au/item_lists/82
+thistle (16) https://app.alveo.edu.au/item_lists/83
+ace-specialised (122) https://app.alveo.edu.au/item_lists/178
+austalk_hide (42) https://app.alveo.edu.au/item_lists/251
+austalk-male-digits (144) https://app.alveo.edu.au/item_lists/412
+COOEE ALL (1354) https://app.alveo.edu.au/item_lists/95
+cooee sample (129) https://app.alveo.edu.au/item_lists/53
+dialogue-all (76) https://app.alveo.edu.au/item_lists/116
+dialogue-sample (6) https://app.alveo.edu.au/item_lists/180
+mdsample (20) https://app.alveo.edu.au/item_lists/52
+one austalk sample (1) https://app.alveo.edu.au/item_lists/179
diff -r 000000000000 -r bfe39bd252df test_alveo_api_key.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_alveo_api_key.py Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,25 @@
+import unittest
+import os
+import alveo_api_key
+import pyalveo
+from mock import Mock
+
+class TestAlveoAPIKey(unittest.TestCase):
+
+ OUTPUT_PATH = 'test.txt'
+ API_KEY = 'test123'
+ MOCK_CLIENT = Mock(pyalveo)
+
+ def test_write_key(self):
+ alveo_api_key.write_key(self.API_KEY, self.OUTPUT_PATH, self.MOCK_CLIENT)
+ actual = open(self.OUTPUT_PATH, 'r').read()
+ self.assertEqual(self.API_KEY, actual)
+
+ def tearDown(self):
+ try:
+ os.remove(self.OUTPUT_PATH)
+ except OSError:
+ pass
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff -r 000000000000 -r bfe39bd252df test_alveo_item_list_downloader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_alveo_item_list_downloader.py Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,28 @@
+import unittest
+import os
+import json
+import alveo_item_list_importer
+import pyalveo
+from mock import Mock
+
+class TestAlveoItemListImporter(unittest.TestCase):
+
+ API_KEY = 'test123'
+ OUTPUT_PATH = 'test.csv'
+ ITEM_LIST = '{"shared": [{"shared": true, "num_items": 309, "name": "austalk_catepillar", "item_list_url": "https://app.alveo.edu.au/item_lists/64"}]}'
+ CSV_CONTENTS = 'austalk_catepillar (309)\thttps://app.alveo.edu.au/item_lists/64\n'
+
+ def test_write_table(self):
+ api_list = json.loads(self.ITEM_LIST)
+ alveo_item_list_importer.write_table(api_list, self.OUTPUT_PATH)
+ actual = open(self.OUTPUT_PATH, 'r').read()
+ self.assertEqual(self.CSV_CONTENTS, actual)
+
+ def tearDown(self):
+ try:
+ os.remove(self.OUTPUT_PATH)
+ except OSError:
+ pass
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff -r 000000000000 -r bfe39bd252df test_alveo_item_list_importer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_alveo_item_list_importer.py Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,28 @@
+import unittest
+import os
+import json
+import alveo_item_list_importer
+import pyalveo
+from mock import Mock
+
+class TestAlveoItemListImporter(unittest.TestCase):
+
+ API_KEY = 'test123'
+ OUTPUT_PATH = 'test.csv'
+ ITEM_LIST = '{"shared": [{"shared": true, "num_items": 309, "name": "austalk_catepillar", "item_list_url": "https://app.alveo.edu.au/item_lists/64"}]}'
+ CSV_CONTENTS = 'austalk_catepillar (309)\thttps://app.alveo.edu.au/item_lists/64\n'
+
+ def test_write_table(self):
+ api_list = json.loads(self.ITEM_LIST)
+ alveo_item_list_importer.write_table(api_list, self.OUTPUT_PATH)
+ actual = open(self.OUTPUT_PATH, 'r').read()
+ self.assertEqual(self.CSV_CONTENTS, actual)
+
+ def tearDown(self):
+ try:
+ os.remove(self.OUTPUT_PATH)
+ except OSError:
+ pass
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff -r 000000000000 -r bfe39bd252df tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,10 @@
+
+
+
+
+
+ pyalveo==0.4
+
+
+
+