comparison alveo_get_primary_text.py @ 18:5e1b7d922ea3 draft default tip

planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit 53cde5cc9b8c1adcccdc3cfa52d8ca82079aeda7
author stevecassidy
date Mon, 15 Jan 2018 18:34:57 -0500
parents b69f6d41d17c
children
comparison
equal deleted inserted replaced
17:b69f6d41d17c 18:5e1b7d922ea3
1 from __future__ import print_function
2 import argparse
3 import pyalveo
4 import sys
5 import os
6
7 from util import API_URL, read_item_list
8
9
10 def parser():
11 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
12 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
13 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs")
14 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
15 return parser.parse_args()
16
17
18 # this file name pattern allows galaxy to discover the dataset designation and type
19 FNPAT = "%(designation)s_%(ext)s"
20
21
22 def galaxy_name(fname, ext):
23 """construct a filename suitable for Galaxy dataset discovery"""
24
25 fname = FNPAT % {'designation': fname, 'ext': ext}
26
27 return fname
28
29
30 def download_text(item_list, output_path):
31 """
32 Downloads primary text from a list of items to the directory specified by output_path.
33
34 :type item_list: ItemGroup
35 :param item_list: item list to download
36
37 :type output_path: String
38 :param output_path: directory to download to the documents to
39 """
40 if not os.path.exists(output_path):
41 os.makedirs(output_path)
42
43 downloaded = []
44
45 items = item_list.get_all()
46 for item in items:
47 md = item.metadata()
48 fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dcterms:identifier'], 'txt'))
49 content = item.get_primary_text()
50 if content is not None:
51 with open(fname, 'w') as out:
52 out.write(content.decode('utf-8'))
53
54 return downloaded
55
56
57 def main():
58 args = parser()
59 try:
60 api_key = open(args.api_key, 'r').read().strip()
61 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False)
62 item_list = read_item_list(args.item_list, client)
63 download_text(item_list, args.output_path)
64 except pyalveo.APIError as e:
65 print("ERROR: " + str(e), file=sys.stderr)
66 sys.exit(1)
67
68
69 if __name__ == '__main__':
70 main()