Mercurial > repos > stevecassidy > alveoimport
changeset 6:56fda3d161f0 draft
planemo upload commit 0203cb3a0b40d9348674b2b098af805e2986abca-dirty
author | stevecassidy |
---|---|
date | Thu, 06 Oct 2016 11:14:39 -0400 |
parents | e28c0258a09e |
children | 5a8d9ddabec4 |
files | alveo_api_key.cwl alveo_api_key.xml alveo_get_item_data.xml alveo_get_item_list.py alveo_get_item_list.xml alveo_item_list_downloader.py alveo_item_list_downloader.xml alveo_item_list_importer.py |
diffstat | 8 files changed, 104 insertions(+), 181 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_api_key.cwl Thu Oct 06 11:14:39 2016 -0400 @@ -0,0 +1,11 @@ +#!/usr/bin/env cwl-runner +cwlVersion: 'cwl:draft-3' +class: CommandLineTool +id: "alveo_api_key" +label: "Get Alveo API Key" +inputs: [] # TODO +outputs: [] # TODO +baseCommand: [] +arguments: [] +description: | + TODO: Fill in description. \ No newline at end of file
--- a/alveo_api_key.xml Sat Sep 03 02:54:47 2016 -0400 +++ b/alveo_api_key.xml Thu Oct 06 11:14:39 2016 -0400 @@ -6,7 +6,7 @@ </requirements> <command interpreter="python"> - alveo_api_key.py --api_key $api_key --output_path $output + alveo_api_key.py --api_key "$api_key" --output_path $output </command> <inputs> @@ -19,6 +19,8 @@ <tests> <test> + <!-- expect this to fail unless you enter a valid API key here and copy it to + the result file --> <param name="api_key" value="your api key here" /> <output name="output" file="api-key.dat" compare="contains" /> </test>
--- a/alveo_get_item_data.xml Sat Sep 03 02:54:47 2016 -0400 +++ b/alveo_get_item_data.xml Thu Oct 06 11:14:39 2016 -0400 @@ -52,9 +52,7 @@ </test> </tests> - <help>Downloads files from a local list of Alveo items. You can download all files or those matching - a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in - your history.</help> + <help>Downloads files from a local list of Alveo items. You can download all files or those matching a wildcard pattern (e.g. \*.txt). Results will be stored as a dataset collection in your history.</help> <citations> <citation type='bibtex'> @article{cassidy2014alveo,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_item_list.py Thu Oct 06 11:14:39 2016 -0400 @@ -0,0 +1,37 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys +import os +from fnmatch import fnmatch + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + +def parser(): + parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") + parser.add_argument('--output', required=True, action="store", type=str, help="output file name") + return parser.parse_args() + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + + client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) + item_list = client.get_item_list(args.item_list_url) + + with open(args.output, 'w') as out: + out.write("ItemURL\n") + for item in item_list: + out.write(item + "\n") + print(item) + + except pyalveo.APIError as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_get_item_list.xml Thu Oct 06 11:14:39 2016 -0400 @@ -0,0 +1,52 @@ +<tool id="alveo_get_item_list" name="Get Item List from Alveo" version="0.01" force_history_refresh="True"> + <description>Retrieves Item URLs from an Alveo Item List</description> + + <requirements> + <requirement type="package" version="0.6">pyalveo</requirement> + </requirements> + + <command interpreter="python"> + alveo_get_item_list.py --api_key $api_key --item_list_url $item_list_url --output $output + </command> + + <inputs> + <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> + <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/> + + <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import"> + <options from_dataset="import_list"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + + <param name="job_name" type="text" size="25" + label="Supply a name for the outputs to remind you what they contain" value="Item List"/> + </inputs> + + <outputs> + <data format="tabular" name="output" label="${job_name}"/> + </outputs> + + <tests> + <test> + <param name="api_key" value="api-key.dat"/> + <param name="import_list" value="item-lists.dat"/> + <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> + <param name="job_name" value="test_output_180.dat"/> + <output name="output" file="item_list_180.dat"/> + </test> + </tests> + + <help>Get the URLs of all of the items from an Alveo item list.</help> + <citations> + <citation type='bibtex'> + @article{cassidy2014alveo, + title={The alveo virtual laboratory: a web based repository API}, + author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, + year={2014}, + publisher={Reykjavik, Iceland: European Language Resources Association} + } + </citation> + </citations> +</tool>
--- a/alveo_item_list_downloader.py Sat Sep 03 02:54:47 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -from __future__ import print_function -import json -import argparse -import pyalveo -import sys -import os -from fnmatch import fnmatch - -API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module - -def parser(): - parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") - parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") - parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") - return parser.parse_args() - -def get_item_list(api_key, item_list_url): - client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False) - return client.get_item_list(item_list_url) - -# this file name pattern allows galaxy to discover the dataset designation and type -FNPAT = "%(designation)s_%(ext)s" - - -def galaxy_name(itemname, fname): - """construct a filename suitable for Galaxy dataset discovery - designation - (dataset identifier) is the file basename - ext - defines the dataset type and is the file extension - """ - - root, ext = os.path.splitext(fname) - ext = ext[1:] # remove initial . - fname = FNPAT % {'designation': itemname, 'ext': ext} - - return fname - - -def download_documents(item_list, patterns, output_path): - """ - Downloads a list of documents to the directory specificed by output_path. - - :type documents: list of pyalveo.Document - :param documents: Documents to download - - :type output_path: String - :param output_path: directory to download to the documents to - """ - if not os.path.exists(output_path): - os.makedirs(output_path) - - downloaded = [] - - items = item_list.get_all() - filtered_documents = [] - for item in items: - documents = item.get_documents() - for doc in documents: - for pattern in patterns: - if not pattern == '' and fnmatch(doc.get_filename(), pattern): - fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) - try: - doc.download_content(dir_path=output_path, filename=fname) - downloaded.append(doc.get_filename()) - except: - # maybe it doesn't exist or we have no access - # TODO: report this - pass - return downloaded - -def main(): - args = parser() - try: - api_key = open(args.api_key, 'r').read().strip() - item_list = get_item_list(api_key, args.item_list_url) - patterns = args.patterns.split(',') - downloaded = download_documents(item_list, patterns, args.output_path) - # write out a list of downloaded files as a result? - except pyalveo.APIError as e: - print("ERROR: " + str(e), file=sys.stderr) - sys.exit(1) - -if __name__ == '__main__': - main()
--- a/alveo_item_list_downloader.xml Sat Sep 03 02:54:47 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ -<tool id="alveo_item_list_downloader" name="Get Files from Alveo" version="0.01" force_history_refresh="True"> - <description>Downloads files from the items in an Alveo Item List</description> - - <requirements> - <requirement type="package" version="0.6">pyalveo</requirement> - </requirements> - - <command interpreter="python"> - alveo_item_list_downloader.py --api_key $api_key --item_list_url $item_list_url --patterns $patterns,$patternselect --output_path ItemListData - </command> - - <inputs> - <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/> - <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/> - - <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import"> - <options from_dataset="import_list"> - <column name="name" index="0"/> - <column name="value" index="1"/> - </options> - </param> - - <param name="patternselect" type="select" multiple="true" label="Predefined imports" display="checkboxes"> - <option value='*'>All Files</option> - <option value='*speaker16.wav'>Austalk 16bit/16kHz Speaker Headset WAV (*speaker16.wav)</option> - <option value='*plain.txt'>Plain text documents (*plain.txt)</option> - <option value='*.txt'>All text documents (*.txt)</option> - <option value=''>Other - enter pattern below</option> - </param> - - <param name="patterns" type="text" label="File patterns to import" - optional="true" - help="One or more file patterns separated by commas eg. *.wav,*.txt"/> - - <param name="job_name" type="text" size="25" - label="Supply a name for the outputs to remind you what they contain" value="Item List downloaded from Alveo"/> - </inputs> - - <outputs> - <collection type="list" label="$job_name" name="output1"> - <discover_datasets pattern="(?P<designation>[^_]+)_(?P<ext>.+)" directory="ItemListData"/> - </collection> - </outputs> - - <tests> - <test> - <param name="api_key" value="api-key.dat"/> - <param name="import_list" value="item-lists.dat"/> - <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> - <param name="patterns" value=""/> - <param name="patternselect" value="*plain.txt"/> - <param name="output_path" value="test_out"/> - <output_collection name="output1" type="list" count="6"> - <element name="GCSAusE02"> - <assert_contents> - <has_text_matching expression="background noises"/> - </assert_contents> - </element> - </output_collection> - </test> - <test> - <param name="api_key" value="api-key.dat"/> - <param name="import_list" value="item-lists.dat"/> - <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/> - <param name="patterns" value="*plain.txt"/> - <param name="patternselect" value=""/> - <param name="output_path" value="test_out"/> - <output_collection name="output1" type="list" count="6"> - <element name="GCSAusE02"> - <assert_contents> - <has_text_matching expression="background noises"/> - </assert_contents> - </element> - </output_collection> - </test> - </tests> - - <help>Downloads files from an Alveo Item list. You can download all files or those matching - a wildcard pattern (e.g. *.txt). Results will be stored as a dataset collection in - your history.</help> - <citations> - <citation type='bibtex'> - @article{cassidy2014alveo, - title={The alveo virtual laboratory: a web based repository API}, - author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others}, - year={2014}, - publisher={Reykjavik, Iceland: European Language Resources Association} - } - </citation> - </citations> -</tool>
--- a/alveo_item_list_importer.py Sat Sep 03 02:54:47 2016 -0400 +++ b/alveo_item_list_importer.py Thu Oct 06 11:14:39 2016 -0400 @@ -29,7 +29,6 @@ try: api_key = open(args.api_key, 'r').read().strip() item_lists = get_item_lists(api_key) - print(item_lists) if item_lists: write_table(item_lists, args.output) except Exception as e: