Mercurial > repos > stevecassidy > alveoimport
comparison alveo_get_item_data.py @ 14:a38315ecf593 draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author | stevecassidy |
---|---|
date | Wed, 01 Nov 2017 01:18:15 -0400 |
parents | be3fd14899a1 |
children | 5e1b7d922ea3 |
comparison
equal
deleted
inserted
replaced
13:be3fd14899a1 | 14:a38315ecf593 |
---|---|
1 from __future__ import print_function | 1 from __future__ import print_function |
2 import json | |
3 import argparse | 2 import argparse |
4 import pyalveo | 3 import pyalveo |
5 import sys | 4 import sys |
6 import os | 5 import os |
7 from fnmatch import fnmatch | 6 from fnmatch import fnmatch |
8 import csv | 7 from util import API_URL, read_item_list |
9 | 8 |
10 API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module | |
11 | 9 |
12 def parser(): | 10 def parser(): |
13 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") | 11 p = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") |
14 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") | 12 p.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") |
15 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") | 13 p.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") |
16 parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") | 14 p.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") |
17 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") | 15 p.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") |
18 return parser.parse_args() | 16 return p.parse_args() |
19 | 17 |
20 def read_item_list(filename, client): | |
21 """Read an item list from a file | |
22 which should be a tabular formatted file | |
23 with one column header ItemURL. | |
24 Return an instance of ItemGroup""" | |
25 | |
26 with open(filename) as fd: | |
27 csvreader = csv.DictReader(fd, dialect='excel-tab') | |
28 if 'ItemURL' not in csvreader.fieldnames: | |
29 return None | |
30 itemurls = [] | |
31 for row in csvreader: | |
32 itemurls.append(row['ItemURL']) | |
33 | |
34 itemlist = pyalveo.ItemGroup(itemurls, client) | |
35 | |
36 return itemlist | |
37 | 18 |
38 # this file name pattern allows galaxy to discover the dataset designation and type | 19 # this file name pattern allows galaxy to discover the dataset designation and type |
39 FNPAT = "%(designation)s#%(ext)s" | 20 FNPAT = "%(designation)s#%(ext)s" |
21 | |
40 | 22 |
41 def galaxy_name(itemname, fname): | 23 def galaxy_name(itemname, fname): |
42 """construct a filename suitable for Galaxy dataset discovery | 24 """construct a filename suitable for Galaxy dataset discovery |
43 designation - (dataset identifier) is the file basename | 25 designation - (dataset identifier) is the file basename |
44 ext - defines the dataset type and is the file extension | 26 ext - defines the dataset type and is the file extension |
45 """ | 27 """ |
46 | 28 |
47 root, ext = os.path.splitext(fname) | 29 root, ext = os.path.splitext(fname) |
48 ext = ext[1:] # remove initial . | 30 ext = ext[1:] # remove initial . |
49 fname = FNPAT % {'designation': root, 'ext': ext} | 31 fname = FNPAT % {'designation': root, 'ext': ext} |
50 | 32 |
51 return fname | 33 return fname |
34 | |
52 | 35 |
53 def download_documents(item_list, patterns, output_path): | 36 def download_documents(item_list, patterns, output_path): |
54 """ | 37 """ |
55 Downloads a list of documents to the directory specificed by output_path. | 38 Downloads a list of documents to the directory specificed by output_path. |
56 | 39 |
64 os.makedirs(output_path) | 47 os.makedirs(output_path) |
65 | 48 |
66 downloaded = [] | 49 downloaded = [] |
67 | 50 |
68 items = item_list.get_all() | 51 items = item_list.get_all() |
69 filtered_documents = [] | |
70 for item in items: | 52 for item in items: |
71 documents = item.get_documents() | 53 documents = item.get_documents() |
72 for doc in documents: | 54 for doc in documents: |
73 for pattern in patterns: | 55 for pattern in patterns: |
74 if not pattern == '' and fnmatch(doc.get_filename(), pattern): | 56 if not pattern == '' and fnmatch(doc.get_filename(), pattern): |
75 fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) | 57 fname = galaxy_name(item.metadata()['alveo:metadata']['dcterms:identifier'], doc.get_filename()) |
76 try: | 58 try: |
77 doc.download_content(dir_path=output_path, filename=fname) | 59 doc.download_content(dir_path=output_path, filename=fname) |
78 downloaded.append(doc.get_filename()) | 60 downloaded.append(doc.get_filename()) |
79 except: | 61 except pyalveo.APIError as e: |
80 # maybe it doesn't exist or we have no access | 62 print("ERROR: " + str(e), file=sys.stderr) |
81 # TODO: report this | 63 sys.exit(1) |
82 pass | |
83 return downloaded | 64 return downloaded |
65 | |
84 | 66 |
85 def main(): | 67 def main(): |
86 args = parser() | 68 args = parser() |
87 try: | 69 try: |
88 api_key = open(args.api_key, 'r').read().strip() | 70 api_key = open(args.api_key, 'r').read().strip() |
89 | 71 |
90 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) | 72 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) |
91 | 73 |
92 item_list = read_item_list(args.item_list, client) | 74 item_list = read_item_list(args.item_list, client) |
93 patterns = args.patterns.split(',') | 75 patterns = args.patterns.split(',') |
94 downloaded = download_documents(item_list, patterns, args.output_path) | 76 download_documents(item_list, patterns, args.output_path) |
95 except pyalveo.APIError as e: | 77 except pyalveo.APIError as e: |
96 print("ERROR: " + str(e), file=sys.stderr) | 78 print("ERROR: " + str(e), file=sys.stderr) |
97 sys.exit(1) | 79 sys.exit(1) |
98 | 80 |
81 | |
99 if __name__ == '__main__': | 82 if __name__ == '__main__': |
100 main() | 83 main() |