annotate alveo_get_primary_text.py @ 14:a38315ecf593 draft

planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author stevecassidy
date Wed, 01 Nov 2017 01:18:15 -0400
parents 2f4907372748
children 3fd0f8f1f3ce
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
1 from __future__ import print_function
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
2 import argparse
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
3 import pyalveo
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
4 import sys
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
5 import os
9
2f4907372748 planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents: 4
diff changeset
6
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
7 from util import API_URL, read_item_list
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
8
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
9
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
10 def parser():
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
11 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
12 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
9
2f4907372748 planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents: 4
diff changeset
13 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
14 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
15 return parser.parse_args()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
16
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
17
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
18 # this file name pattern allows galaxy to discover the dataset designation and type
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
19 FNPAT = "%(designation)s_%(ext)s"
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
20
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
21
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
22 def galaxy_name(fname, ext):
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
23 """construct a filename suitable for Galaxy dataset discovery"""
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
24
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
25 fname = FNPAT % {'designation': fname, 'ext': ext}
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
26
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
27 return fname
9
2f4907372748 planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents: 4
diff changeset
28
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
29
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
30 def download_text(item_list, output_path):
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
31 """
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
32 Downloads primary text from a list of items to the directory specified by output_path.
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
33
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
34 :type item_list: ItemGroup
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
35 :param item_list: item list to download
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
36
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
37 :type output_path: String
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
38 :param output_path: directory to download to the documents to
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
39 """
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
40 if not os.path.exists(output_path):
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
41 os.makedirs(output_path)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
42
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
43 downloaded = []
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
44
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
45 items = item_list.get_all()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
46 for item in items:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
47 md = item.metadata()
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
48 fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dcterms:identifier'], 'txt'))
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
49 content = item.get_primary_text()
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
50 if content is not None:
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
51 with open(fname, 'w') as out:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
52 out.write(content)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
53
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
54 return downloaded
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
55
9
2f4907372748 planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents: 4
diff changeset
56
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
57 def main():
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
58 args = parser()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
59 try:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
60 api_key = open(args.api_key, 'r').read().strip()
9
2f4907372748 planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents: 4
diff changeset
61 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False)
2f4907372748 planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents: 4
diff changeset
62 item_list = read_item_list(args.item_list, client)
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
63 download_text(item_list, args.output_path)
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
64 except pyalveo.APIError as e:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
65 print("ERROR: " + str(e), file=sys.stderr)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
66 sys.exit(1)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
67
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 9
diff changeset
68
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
69 if __name__ == '__main__':
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
70 main()