Mercurial > repos > stevecassidy > alveoimport
diff alveo_item_list_downloader.py @ 0:bfe39bd252df draft
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
author | stevecassidy |
---|---|
date | Mon, 18 Jul 2016 23:49:40 -0400 |
parents | |
children | 6fef3489d97c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alveo_item_list_downloader.py Mon Jul 18 23:49:40 2016 -0400 @@ -0,0 +1,80 @@ +from __future__ import print_function +import json +import argparse +import pyalveo +import sys +import os +from fnmatch import fnmatch + +API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module + +def parser(): + parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") + parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") + parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") + parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") + return parser.parse_args() + +def get_item_list(api_key, item_list_url): + client = pyalveo.Client(api_key=api_key, api_url=API_URL) + return client.get_item_list(item_list_url) + +# this file name pattern allows galaxy to discover the dataset designation and type +FNPAT = "%(designation)s_%(ext)s" + +def galaxy_name(fname): + """construct a filename suitable for Galaxy dataset discovery""" + + root, ext = os.path.splitext(fname) + ext = ext[1:] # remove initial . + fname = FNPAT % {'designation': fname, 'ext': ext} + + return fname + +def download_documents(item_list, patterns, output_path): + """ + Downloads a list of documents to the directory specificed by output_path. + + :type documents: list of pyalveo.Document + :param documents: Documents to download + + :type output_path: String + :param output_path: directory to download to the documents to + """ + if not os.path.exists(output_path): + os.makedirs(output_path) + + downloaded = [] + + items = item_list.get_all() + filtered_documents = [] + for item in items: + documents = item.get_documents() + for doc in documents: + for pattern in patterns: + if not pattern == '' and fnmatch(doc.get_filename(), pattern): + fname = galaxy_name(doc.get_filename()) + try: + doc.download_content(dir_path=output_path, filename=fname) + downloaded.append(doc.get_filename()) + except: + # maybe it doesn't exist or we have no access + # TODO: report this + pass + return downloaded + +def main(): + args = parser() + try: + api_key = open(args.api_key, 'r').read().strip() + item_list = get_item_list(api_key, args.item_list_url) + patterns = args.patterns.split(',') + downloaded = download_documents(item_list, patterns, args.output_path) + # write out a list of downloaded files as a result? + except pyalveo.APIError as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()