annotate alveo_get_item_data.py @ 14:a38315ecf593 draft

planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author stevecassidy
date Wed, 01 Nov 2017 01:18:15 -0400
parents be3fd14899a1
children 5e1b7d922ea3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
1 from __future__ import print_function
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
2 import argparse
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
3 import pyalveo
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
4 import sys
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
5 import os
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
6 from fnmatch import fnmatch
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
7 from util import API_URL, read_item_list
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
8
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
9
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
10 def parser():
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
11 p = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
12 p.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
13 p.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
14 p.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
15 p.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
16 return p.parse_args()
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
17
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
18
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
19 # this file name pattern allows galaxy to discover the dataset designation and type
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
20 FNPAT = "%(designation)s#%(ext)s"
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
21
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
22
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
23 def galaxy_name(itemname, fname):
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
24 """construct a filename suitable for Galaxy dataset discovery
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
25 designation - (dataset identifier) is the file basename
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
26 ext - defines the dataset type and is the file extension
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
27 """
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
28
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
29 root, ext = os.path.splitext(fname)
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
30 ext = ext[1:] # remove initial .
13
be3fd14899a1 planemo upload commit e5d2a8ba1fcf33758cdc07e0a14e86427752c968-dirty
stevecassidy
parents: 10
diff changeset
31 fname = FNPAT % {'designation': root, 'ext': ext}
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
32
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
33 return fname
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
34
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
35
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
36 def download_documents(item_list, patterns, output_path):
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
37 """
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
38 Downloads a list of documents to the directory specificed by output_path.
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
39
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
40 :type documents: list of pyalveo.Document
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
41 :param documents: Documents to download
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
42
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
43 :type output_path: String
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
44 :param output_path: directory to download to the documents to
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
45 """
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
46 if not os.path.exists(output_path):
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
47 os.makedirs(output_path)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
48
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
49 downloaded = []
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
50
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
51 items = item_list.get_all()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
52 for item in items:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
53 documents = item.get_documents()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
54 for doc in documents:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
55 for pattern in patterns:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
56 if not pattern == '' and fnmatch(doc.get_filename(), pattern):
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
57 fname = galaxy_name(item.metadata()['alveo:metadata']['dcterms:identifier'], doc.get_filename())
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
58 try:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
59 doc.download_content(dir_path=output_path, filename=fname)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
60 downloaded.append(doc.get_filename())
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
61 except pyalveo.APIError as e:
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
62 print("ERROR: " + str(e), file=sys.stderr)
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
63 sys.exit(1)
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
64 return downloaded
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
65
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
66
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
67 def main():
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
68 args = parser()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
69 try:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
70 api_key = open(args.api_key, 'r').read().strip()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
71
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
72 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
73
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
74 item_list = read_item_list(args.item_list, client)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
75 patterns = args.patterns.split(',')
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
76 download_documents(item_list, patterns, args.output_path)
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
77 except pyalveo.APIError as e:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
78 print("ERROR: " + str(e), file=sys.stderr)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
79 sys.exit(1)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
80
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 13
diff changeset
81
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
82 if __name__ == '__main__':
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
83 main()