# HG changeset patch # User stevecassidy # Date 1475766879 14400 # Node ID 56fda3d161f02ff57d6b284bf45eaf4604ba0afd # Parent e28c0258a09e364829ff40548e4c21bffcba75fe planemo upload commit 0203cb3a0b40d9348674b2b098af805e2986abca-dirty diff -r e28c0258a09e -r 56fda3d161f0 alveo_api_key.cwl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_api_key.cwl Thu Oct 06 11:14:39 2016 -0400 @@ -0,0 +1,11 @@ +#!/usr/bin/env cwl-runner +cwlVersion: 'cwl:draft-3' +class: CommandLineTool +id: "alveo_api_key" +label: "Get Alveo API Key" +inputs: [] # TODO +outputs: [] # TODO +baseCommand: [] +arguments: [] +description: | + TODO: Fill in description. \ No newline at end of file diff -r e28c0258a09e -r 56fda3d161f0 alveo_api_key.xml --- a/alveo_api_key.xml Sat Sep 03 02:54:47 2016 -0400 +++ b/alveo_api_key.xml Thu Oct 06 11:14:39 2016 -0400 @@ -6,7 +6,7 @@ - alveo_api_key.py --api_key $api_key --output_path $output + alveo_api_key.py --api_key "$api_key" --output_path $output @@ -19,6 +19,8 @@ + diff -r e28c0258a09e -r 56fda3d161f0 alveo_get_item_data.xml --- a/alveo_get_item_data.xml Sat Sep 03 02:54:47 2016 -0400 +++ b/alveo_get_item_data.xml Thu Oct 06 11:14:39 2016 -0400 @@ -52,9 +52,7 @@ - Downloads files from a local list of Alveo items. You can download all files or those matching - a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in - your history. + Downloads files from a local list of Alveo items. You can download all files or those matching a wildcard pattern (e.g. \*.txt). Results will be stored as a dataset collection in your history. @article{cassidy2014alveo, diff -r e28c0258a09e -r 56fda3d161f0 alveo_get_item_list.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_item_list.py Thu Oct 06 11:14:39 2016 -0400 @@ -0,0 +1,37 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys +import os +from fnmatch import fnmatch + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + +def parser(): + parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") + parser.add_argument('--output', required=True, action="store", type=str, help="output file name") + return parser.parse_args() + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + + client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) + item_list = client.get_item_list(args.item_list_url) + + with open(args.output, 'w') as out: + out.write("ItemURL\n") + for item in item_list: + out.write(item + "\n") + print(item) + + except pyalveo.APIError as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main() diff -r e28c0258a09e -r 56fda3d161f0 alveo_get_item_list.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_item_list.xml Thu Oct 06 11:14:39 2016 -0400 @@ -0,0 +1,52 @@ + + Retrieves Item URLs from an Alveo Item List + + + pyalveo + + + + alveo_get_item_list.py --api_key $api_key --item_list_url $item_list_url --output $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Get the URLs of all of the items from an Alveo item list. + + + @article{cassidy2014alveo, + title={The alveo virtual laboratory: a web based repository API}, + author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, + year={2014}, + publisher={Reykjavik, Iceland: European Language Resources Association} + } + + + diff -r e28c0258a09e -r 56fda3d161f0 alveo_item_list_downloader.py --- a/alveo_item_list_downloader.py Sat Sep 03 02:54:47 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -from __future__ import print_function -import json -import argparse -import pyalveo -import sys -import os -from fnmatch import fnmatch - -API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module - -def parser(): - parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") - parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") - parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") - return parser.parse_args() - -def get_item_list(api_key, item_list_url): - client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) - return client.get_item_list(item_list_url) - -# this file name pattern allows galaxy to discover the dataset designation and type -FNPAT = "%(designation)s_%(ext)s" - - -def galaxy_name(itemname, fname): - """construct a filename suitable for Galaxy dataset discovery - designation - (dataset identifier) is the file basename - ext - defines the dataset type and is the file extension - """ - - root, ext = os.path.splitext(fname) - ext = ext[1:] # remove initial . - fname = FNPAT % {'designation': itemname, 'ext': ext} - - return fname - - -def download_documents(item_list, patterns, output_path): - """ - Downloads a list of documents to the directory specificed by output_path. - - :type documents: list of pyalveo.Document - :param documents: Documents to download - - :type output_path: String - :param output_path: directory to download to the documents to - """ - if not os.path.exists(output_path): - os.makedirs(output_path) - - downloaded = [] - - items = item_list.get_all() - filtered_documents = [] - for item in items: - documents = item.get_documents() - for doc in documents: - for pattern in patterns: - if not pattern == '' and fnmatch(doc.get_filename(), pattern): - fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) - try: - doc.download_content(dir_path=output_path, filename=fname) - downloaded.append(doc.get_filename()) - except: - # maybe it doesn't exist or we have no access - # TODO: report this - pass - return downloaded - -def main(): - args = parser() - try: - api_key = open(args.api_key, 'r').read().strip() - item_list = get_item_list(api_key, args.item_list_url) - patterns = args.patterns.split(',') - downloaded = download_documents(item_list, patterns, args.output_path) - # write out a list of downloaded files as a result? - except pyalveo.APIError as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - -if __name__ == '__main__': - main() diff -r e28c0258a09e -r 56fda3d161f0 alveo_item_list_downloader.xml --- a/alveo_item_list_downloader.xml Sat Sep 03 02:54:47 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ - - Downloads files from the items in an Alveo Item List - - - pyalveo - - - - alveo_item_list_downloader.py --api_key $api_key --item_list_url $item_list_url --patterns $patterns,$patternselect --output_path ItemListData - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Downloads files from an Alveo Item list. You can download all files or those matching - a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in - your history. - - - @article{cassidy2014alveo, - title={The alveo virtual laboratory: a web based repository API}, - author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, - year={2014}, - publisher={Reykjavik, Iceland: European Language Resources Association} - } - - - diff -r e28c0258a09e -r 56fda3d161f0 alveo_item_list_importer.py --- a/alveo_item_list_importer.py Sat Sep 03 02:54:47 2016 -0400 +++ b/alveo_item_list_importer.py Thu Oct 06 11:14:39 2016 -0400 @@ -29,7 +29,6 @@ try: api_key = open(args.api_key, 'r').read().strip() item_lists = get_item_lists(api_key) - print(item_lists) if item_lists: write_table(item_lists, args.output) except Exception as e: