Mercurial > repos > stevecassidy > alveoimport
diff alveo_get_primary_text.py @ 9:2f4907372748 draft
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
author | stevecassidy |
---|---|
date | Wed, 16 Nov 2016 15:01:03 -0500 |
parents | 3a9f20428cff |
children | a38315ecf593 |
line wrap: on
line diff
--- a/alveo_get_primary_text.py Wed Oct 12 22:08:02 2016 -0400 +++ b/alveo_get_primary_text.py Wed Nov 16 15:01:03 2016 -0500 @@ -5,13 +5,15 @@ import sys import os from fnmatch import fnmatch +import csv + API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module def parser(): parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") - parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") + parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") return parser.parse_args() @@ -28,6 +30,7 @@ fname = FNPAT % {'designation': fname, 'ext': ext} return fname + import pprint def download_documents(item_list, output_path): """ @@ -56,13 +59,31 @@ return downloaded +def read_item_list(filename, client): + """Read an item list from a file + which should be a tabular formatted file + with one column header ItemURL. + Return an instance of ItemGroup""" + + with open(filename) as fd: + csvreader = csv.DictReader(fd, dialect='excel-tab') + if 'ItemURL' not in csvreader.fieldnames: + return None + itemurls = [] + for row in csvreader: + itemurls.append(row['ItemURL']) + + itemlist = pyalveo.ItemGroup(itemurls, client) + + return itemlist + def main(): args = parser() try: api_key = open(args.api_key, 'r').read().strip() - item_list = get_item_list(api_key, args.item_list_url) + client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) + item_list = read_item_list(args.item_list, client) downloaded = download_documents(item_list, args.output_path) - # write out a list of downloaded files as a result? except pyalveo.APIError as e: print("ERROR: " + str(e), file=sys.stderr) sys.exit(1)