annotate alveo_item_list_downloader.py @ 1:6fef3489d97c draft

planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
author stevecassidy
date Mon, 15 Aug 2016 23:45:46 -0400
parents bfe39bd252df
children 7b6021997b8e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
1 from __future__ import print_function
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
2 import json
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
3 import argparse
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
4 import pyalveo
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
5 import sys
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
6 import os
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
7 from fnmatch import fnmatch
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
8
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
9 API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
10
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
11 def parser():
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
12 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
13 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
14 parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
15 parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
16 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
17 return parser.parse_args()
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
18
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
19 def get_item_list(api_key, item_list_url):
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
20 client = pyalveo.Client(api_key=api_key, api_url=API_URL)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
21 return client.get_item_list(item_list_url)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
22
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
23 # this file name pattern allows galaxy to discover the dataset designation and type
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
24 FNPAT = "%(designation)s_%(ext)s"
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
25
1
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
26
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
27 def galaxy_name(itemname, fname):
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
28 """construct a filename suitable for Galaxy dataset discovery
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
29 designation - (dataset identifier) is the file basename
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
30 ext - defines the dataset type and is the file extension
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
31 """
0
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
32
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
33 root, ext = os.path.splitext(fname)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
34 ext = ext[1:] # remove initial .
1
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
35 fname = FNPAT % {'designation': itemname, 'ext': ext}
0
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
36
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
37 return fname
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
38
1
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
39
0
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
40 def download_documents(item_list, patterns, output_path):
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
41 """
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
42 Downloads a list of documents to the directory specificed by output_path.
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
43
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
44 :type documents: list of pyalveo.Document
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
45 :param documents: Documents to download
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
46
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
47 :type output_path: String
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
48 :param output_path: directory to download to the documents to
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
49 """
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
50 if not os.path.exists(output_path):
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
51 os.makedirs(output_path)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
52
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
53 downloaded = []
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
54
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
55 items = item_list.get_all()
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
56 filtered_documents = []
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
57 for item in items:
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
58 documents = item.get_documents()
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
59 for doc in documents:
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
60 for pattern in patterns:
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
61 if not pattern == '' and fnmatch(doc.get_filename(), pattern):
1
6fef3489d97c planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents: 0
diff changeset
62 fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename())
0
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
63 try:
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
64 doc.download_content(dir_path=output_path, filename=fname)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
65 downloaded.append(doc.get_filename())
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
66 except:
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
67 # maybe it doesn't exist or we have no access
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
68 # TODO: report this
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
69 pass
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
70 return downloaded
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
71
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
72 def main():
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
73 args = parser()
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
74 try:
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
75 api_key = open(args.api_key, 'r').read().strip()
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
76 item_list = get_item_list(api_key, args.item_list_url)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
77 patterns = args.patterns.split(',')
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
78 downloaded = download_documents(item_list, patterns, args.output_path)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
79 # write out a list of downloaded files as a result?
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
80 except pyalveo.APIError as e:
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
81 print("ERROR: " + str(e), file=sys.stderr)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
82 sys.exit(1)
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
83
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
84 if __name__ == '__main__':
bfe39bd252df planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff changeset
85 main()