Mercurial > repos > stevecassidy > alveoimport
annotate alveo_item_list_downloader.py @ 1:6fef3489d97c draft
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
author | stevecassidy |
---|---|
date | Mon, 15 Aug 2016 23:45:46 -0400 |
parents | bfe39bd252df |
children | 7b6021997b8e |
rev | line source |
---|---|
0
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
1 from __future__ import print_function |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
2 import json |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
3 import argparse |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
4 import pyalveo |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
5 import sys |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
6 import os |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
7 from fnmatch import fnmatch |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
8 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
9 API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
10 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
11 def parser(): |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
12 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
13 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
14 parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download") |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
15 parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download") |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
16 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
17 return parser.parse_args() |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
18 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
19 def get_item_list(api_key, item_list_url): |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
20 client = pyalveo.Client(api_key=api_key, api_url=API_URL) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
21 return client.get_item_list(item_list_url) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
22 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
23 # this file name pattern allows galaxy to discover the dataset designation and type |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
24 FNPAT = "%(designation)s_%(ext)s" |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
25 |
1
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
26 |
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
27 def galaxy_name(itemname, fname): |
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
28 """construct a filename suitable for Galaxy dataset discovery |
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
29 designation - (dataset identifier) is the file basename |
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
30 ext - defines the dataset type and is the file extension |
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
31 """ |
0
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
32 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
33 root, ext = os.path.splitext(fname) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
34 ext = ext[1:] # remove initial . |
1
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
35 fname = FNPAT % {'designation': itemname, 'ext': ext} |
0
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
36 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
37 return fname |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
38 |
1
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
39 |
0
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
40 def download_documents(item_list, patterns, output_path): |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
41 """ |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
42 Downloads a list of documents to the directory specificed by output_path. |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
43 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
44 :type documents: list of pyalveo.Document |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
45 :param documents: Documents to download |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
46 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
47 :type output_path: String |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
48 :param output_path: directory to download to the documents to |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
49 """ |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
50 if not os.path.exists(output_path): |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
51 os.makedirs(output_path) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
52 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
53 downloaded = [] |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
54 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
55 items = item_list.get_all() |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
56 filtered_documents = [] |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
57 for item in items: |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
58 documents = item.get_documents() |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
59 for doc in documents: |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
60 for pattern in patterns: |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
61 if not pattern == '' and fnmatch(doc.get_filename(), pattern): |
1
6fef3489d97c
planemo upload commit 0fca33c3b7285bd31f6c7380393d08bbdad4e4d6
stevecassidy
parents:
0
diff
changeset
|
62 fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename()) |
0
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
63 try: |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
64 doc.download_content(dir_path=output_path, filename=fname) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
65 downloaded.append(doc.get_filename()) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
66 except: |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
67 # maybe it doesn't exist or we have no access |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
68 # TODO: report this |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
69 pass |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
70 return downloaded |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
71 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
72 def main(): |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
73 args = parser() |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
74 try: |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
75 api_key = open(args.api_key, 'r').read().strip() |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
76 item_list = get_item_list(api_key, args.item_list_url) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
77 patterns = args.patterns.split(',') |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
78 downloaded = download_documents(item_list, patterns, args.output_path) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
79 # write out a list of downloaded files as a result? |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
80 except pyalveo.APIError as e: |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
81 print("ERROR: " + str(e), file=sys.stderr) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
82 sys.exit(1) |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
83 |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
84 if __name__ == '__main__': |
bfe39bd252df
planemo upload commit 5de43e6a614de2a1b2065bc63823ecc9854ebb32-dirty
stevecassidy
parents:
diff
changeset
|
85 main() |