diff alveo_item_list_downloader.py @ 0:bfe39bd252df draft

planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
author stevecassidy
date Mon, 18 Jul 2016 23:49:40 -0400
parents
children 6fef3489d97c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_item_list_downloader.py	Mon Jul 18 23:49:40 2016 -0400
@@ -0,0 +1,80 @@
+from __future__ import print_function
+import json
+import argparse
+import pyalveo
+import sys
+import os
+from fnmatch import fnmatch
+
+API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
+
+def parser():
+    parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
+    parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+    parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
+    parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
+    parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
+    return parser.parse_args()
+
+def get_item_list(api_key, item_list_url):
+    client = pyalveo.Client(api_key=api_key, api_url=API_URL)
+    return client.get_item_list(item_list_url)
+
+# this file name pattern allows galaxy to discover the dataset designation and type
+FNPAT = "%(designation)s_%(ext)s"
+
+def galaxy_name(fname):
+    """construct a filename suitable for Galaxy dataset discovery"""
+
+    root, ext = os.path.splitext(fname)
+    ext = ext[1:] # remove initial .
+    fname = FNPAT % {'designation': fname, 'ext': ext}
+
+    return fname
+
+def download_documents(item_list, patterns, output_path):
+    """
+    Downloads a list of documents to the directory specificed by output_path.
+
+    :type documents: list of pyalveo.Document
+    :param documents: Documents to download
+
+    :type output_path: String
+    :param output_path: directory to download to the documents to
+    """
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    downloaded = []
+
+    items = item_list.get_all()
+    filtered_documents = []
+    for item in items:
+        documents = item.get_documents()
+        for doc in documents:
+            for pattern in patterns:
+                if not pattern == '' and fnmatch(doc.get_filename(), pattern):
+                    fname = galaxy_name(doc.get_filename())
+                    try:
+                        doc.download_content(dir_path=output_path, filename=fname)
+                        downloaded.append(doc.get_filename())
+                    except:
+                        # maybe it doesn't exist or we have no access
+                        # TODO: report this
+                        pass
+    return downloaded
+
+def main():
+    args = parser()
+    try:
+        api_key = open(args.api_key, 'r').read().strip()
+        item_list = get_item_list(api_key, args.item_list_url)
+        patterns = args.patterns.split(',')
+        downloaded = download_documents(item_list, patterns, args.output_path)
+        # write out a list of downloaded files as a result?
+    except pyalveo.APIError as e:
+        print("ERROR: " + str(e), file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()