diff alveo_get_item_data.py @ 14:a38315ecf593 draft

planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author stevecassidy
date Wed, 01 Nov 2017 01:18:15 -0400
parents be3fd14899a1
children 5e1b7d922ea3
line wrap: on
line diff
--- a/alveo_get_item_data.py	Wed Feb 01 22:34:24 2017 -0500
+++ b/alveo_get_item_data.py	Wed Nov 01 01:18:15 2017 -0400
@@ -1,43 +1,25 @@
 from __future__ import print_function
-import json
 import argparse
 import pyalveo
 import sys
 import os
 from fnmatch import fnmatch
-import csv
+from util import API_URL, read_item_list
 
-API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
 
 def parser():
-    parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
-    parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
-    parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
-    parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
-    parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
-    return parser.parse_args()
+    p = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
+    p.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+    p.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
+    p.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
+    p.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
+    return p.parse_args()
 
-def read_item_list(filename, client):
-    """Read an item list from a file
-    which should be a tabular formatted file
-    with one column header ItemURL.
-    Return an instance of ItemGroup"""
-
-    with open(filename) as fd:
-        csvreader = csv.DictReader(fd, dialect='excel-tab')
-        if 'ItemURL' not in csvreader.fieldnames:
-            return None
-        itemurls = []
-        for row in csvreader:
-            itemurls.append(row['ItemURL'])
-
-    itemlist = pyalveo.ItemGroup(itemurls, client)
-
-    return itemlist
 
 # this file name pattern allows galaxy to discover the dataset designation and type
 FNPAT = "%(designation)s#%(ext)s"
 
+
 def galaxy_name(itemname, fname):
     """construct a filename suitable for Galaxy dataset discovery
     designation - (dataset identifier) is the file basename
@@ -45,11 +27,12 @@
     """
 
     root, ext = os.path.splitext(fname)
-    ext = ext[1:] # remove initial .
+    ext = ext[1:]  # remove initial .
     fname = FNPAT % {'designation': root, 'ext': ext}
 
     return fname
 
+
 def download_documents(item_list, patterns, output_path):
     """
     Downloads a list of documents to the directory specificed by output_path.
@@ -66,22 +49,21 @@
     downloaded = []
 
     items = item_list.get_all()
-    filtered_documents = []
     for item in items:
         documents = item.get_documents()
         for doc in documents:
             for pattern in patterns:
                 if not pattern == '' and fnmatch(doc.get_filename(), pattern):
-                    fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename())
+                    fname = galaxy_name(item.metadata()['alveo:metadata']['dcterms:identifier'], doc.get_filename())
                     try:
                         doc.download_content(dir_path=output_path, filename=fname)
                         downloaded.append(doc.get_filename())
-                    except:
-                        # maybe it doesn't exist or we have no access
-                        # TODO: report this
-                        pass
+                    except pyalveo.APIError as e:
+                        print("ERROR: " + str(e), file=sys.stderr)
+                        sys.exit(1)
     return downloaded
 
+
 def main():
     args = parser()
     try:
@@ -91,10 +73,11 @@
 
         item_list = read_item_list(args.item_list, client)
         patterns = args.patterns.split(',')
-        downloaded = download_documents(item_list, patterns, args.output_path)
+        download_documents(item_list, patterns, args.output_path)
     except pyalveo.APIError as e:
         print("ERROR: " + str(e), file=sys.stderr)
         sys.exit(1)
 
+
 if __name__ == '__main__':
     main()