Mercurial > repos > stevecassidy > alveoimport
diff alveo_get_item_data.py @ 14:a38315ecf593 draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author | stevecassidy |
---|---|
date | Wed, 01 Nov 2017 01:18:15 -0400 |
parents | be3fd14899a1 |
children | 5e1b7d922ea3 |
line wrap: on
line diff
--- a/alveo_get_item_data.py Wed Feb 01 22:34:24 2017 -0500 +++ b/alveo_get_item_data.py Wed Nov 01 01:18:15 2017 -0400 @@ -1,43 +1,25 @@ from __future__ import print_function -import json import argparse import pyalveo import sys import os from fnmatch import fnmatch -import csv +from util import API_URL, read_item_list -API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module def parser(): - parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") - parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") - parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") - parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") - return parser.parse_args() + p = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") + p.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") + p.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") + p.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") + p.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") + return p.parse_args() -def read_item_list(filename, client): - """Read an item list from a file - which should be a tabular formatted file - with one column header ItemURL. - Return an instance of ItemGroup""" - - with open(filename) as fd: - csvreader = csv.DictReader(fd, dialect='excel-tab') - if 'ItemURL' not in csvreader.fieldnames: - return None - itemurls = [] - for row in csvreader: - itemurls.append(row['ItemURL']) - - itemlist = pyalveo.ItemGroup(itemurls, client) - - return itemlist # this file name pattern allows galaxy to discover the dataset designation and type FNPAT = "%(designation)s#%(ext)s" + def galaxy_name(itemname, fname): """construct a filename suitable for Galaxy dataset discovery designation - (dataset identifier) is the file basename @@ -45,11 +27,12 @@ """ root, ext = os.path.splitext(fname) - ext = ext[1:] # remove initial . + ext = ext[1:] # remove initial . fname = FNPAT % {'designation': root, 'ext': ext} return fname + def download_documents(item_list, patterns, output_path): """ Downloads a list of documents to the directory specificed by output_path. @@ -66,22 +49,21 @@ downloaded = [] items = item_list.get_all() - filtered_documents = [] for item in items: documents = item.get_documents() for doc in documents: for pattern in patterns: if not pattern == '' and fnmatch(doc.get_filename(), pattern): - fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) + fname = galaxy_name(item.metadata()['alveo:metadata']['dcterms:identifier'], doc.get_filename()) try: doc.download_content(dir_path=output_path, filename=fname) downloaded.append(doc.get_filename()) - except: - # maybe it doesn't exist or we have no access - # TODO: report this - pass + except pyalveo.APIError as e: + print("ERROR: " + str(e), file=sys.stderr) + sys.exit(1) return downloaded + def main(): args = parser() try: @@ -91,10 +73,11 @@ item_list = read_item_list(args.item_list, client) patterns = args.patterns.split(',') - downloaded = download_documents(item_list, patterns, args.output_path) + download_documents(item_list, patterns, args.output_path) except pyalveo.APIError as e: print("ERROR: " + str(e), file=sys.stderr) sys.exit(1) + if __name__ == '__main__': main()