comparison alveo_get_item_data.py @ 14:a38315ecf593 draft

planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author stevecassidy
date Wed, 01 Nov 2017 01:18:15 -0400
parents be3fd14899a1
children 5e1b7d922ea3
comparison
equal deleted inserted replaced
13:be3fd14899a1 14:a38315ecf593
1 from __future__ import print_function 1 from __future__ import print_function
2 import json
3 import argparse 2 import argparse
4 import pyalveo 3 import pyalveo
5 import sys 4 import sys
6 import os 5 import os
7 from fnmatch import fnmatch 6 from fnmatch import fnmatch
8 import csv 7 from util import API_URL, read_item_list
9 8
10 API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
11 9
12 def parser(): 10 def parser():
13 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") 11 p = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
14 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") 12 p.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
15 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") 13 p.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
16 parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") 14 p.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
17 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") 15 p.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
18 return parser.parse_args() 16 return p.parse_args()
19 17
20 def read_item_list(filename, client):
21 """Read an item list from a file
22 which should be a tabular formatted file
23 with one column header ItemURL.
24 Return an instance of ItemGroup"""
25
26 with open(filename) as fd:
27 csvreader = csv.DictReader(fd, dialect='excel-tab')
28 if 'ItemURL' not in csvreader.fieldnames:
29 return None
30 itemurls = []
31 for row in csvreader:
32 itemurls.append(row['ItemURL'])
33
34 itemlist = pyalveo.ItemGroup(itemurls, client)
35
36 return itemlist
37 18
38 # this file name pattern allows galaxy to discover the dataset designation and type 19 # this file name pattern allows galaxy to discover the dataset designation and type
39 FNPAT = "%(designation)s#%(ext)s" 20 FNPAT = "%(designation)s#%(ext)s"
21
40 22
41 def galaxy_name(itemname, fname): 23 def galaxy_name(itemname, fname):
42 """construct a filename suitable for Galaxy dataset discovery 24 """construct a filename suitable for Galaxy dataset discovery
43 designation - (dataset identifier) is the file basename 25 designation - (dataset identifier) is the file basename
44 ext - defines the dataset type and is the file extension 26 ext - defines the dataset type and is the file extension
45 """ 27 """
46 28
47 root, ext = os.path.splitext(fname) 29 root, ext = os.path.splitext(fname)
48 ext = ext[1:] # remove initial . 30 ext = ext[1:] # remove initial .
49 fname = FNPAT % {'designation': root, 'ext': ext} 31 fname = FNPAT % {'designation': root, 'ext': ext}
50 32
51 return fname 33 return fname
34
52 35
53 def download_documents(item_list, patterns, output_path): 36 def download_documents(item_list, patterns, output_path):
54 """ 37 """
55 Downloads a list of documents to the directory specificed by output_path. 38 Downloads a list of documents to the directory specificed by output_path.
56 39
64 os.makedirs(output_path) 47 os.makedirs(output_path)
65 48
66 downloaded = [] 49 downloaded = []
67 50
68 items = item_list.get_all() 51 items = item_list.get_all()
69 filtered_documents = []
70 for item in items: 52 for item in items:
71 documents = item.get_documents() 53 documents = item.get_documents()
72 for doc in documents: 54 for doc in documents:
73 for pattern in patterns: 55 for pattern in patterns:
74 if not pattern == '' and fnmatch(doc.get_filename(), pattern): 56 if not pattern == '' and fnmatch(doc.get_filename(), pattern):
75 fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) 57 fname = galaxy_name(item.metadata()['alveo:metadata']['dcterms:identifier'], doc.get_filename())
76 try: 58 try:
77 doc.download_content(dir_path=output_path, filename=fname) 59 doc.download_content(dir_path=output_path, filename=fname)
78 downloaded.append(doc.get_filename()) 60 downloaded.append(doc.get_filename())
79 except: 61 except pyalveo.APIError as e:
80 # maybe it doesn't exist or we have no access 62 print("ERROR: " + str(e), file=sys.stderr)
81 # TODO: report this 63 sys.exit(1)
82 pass
83 return downloaded 64 return downloaded
65
84 66
85 def main(): 67 def main():
86 args = parser() 68 args = parser()
87 try: 69 try:
88 api_key = open(args.api_key, 'r').read().strip() 70 api_key = open(args.api_key, 'r').read().strip()
89 71
90 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) 72 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False)
91 73
92 item_list = read_item_list(args.item_list, client) 74 item_list = read_item_list(args.item_list, client)
93 patterns = args.patterns.split(',') 75 patterns = args.patterns.split(',')
94 downloaded = download_documents(item_list, patterns, args.output_path) 76 download_documents(item_list, patterns, args.output_path)
95 except pyalveo.APIError as e: 77 except pyalveo.APIError as e:
96 print("ERROR: " + str(e), file=sys.stderr) 78 print("ERROR: " + str(e), file=sys.stderr)
97 sys.exit(1) 79 sys.exit(1)
98 80
81
99 if __name__ == '__main__': 82 if __name__ == '__main__':
100 main() 83 main()