Mercurial > repos > stevecassidy > alveoimport
comparison alveo_get_primary_text.py @ 18:5e1b7d922ea3 draft default tip
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit 53cde5cc9b8c1adcccdc3cfa52d8ca82079aeda7
author | stevecassidy |
---|---|
date | Mon, 15 Jan 2018 18:34:57 -0500 |
parents | b69f6d41d17c |
children |
comparison
equal
deleted
inserted
replaced
17:b69f6d41d17c | 18:5e1b7d922ea3 |
---|---|
1 from __future__ import print_function | |
2 import argparse | |
3 import pyalveo | |
4 import sys | |
5 import os | |
6 | |
7 from util import API_URL, read_item_list | |
8 | |
9 | |
10 def parser(): | |
11 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") | |
12 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") | |
13 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") | |
14 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") | |
15 return parser.parse_args() | |
16 | |
17 | |
18 # this file name pattern allows galaxy to discover the dataset designation and type | |
19 FNPAT = "%(designation)s_%(ext)s" | |
20 | |
21 | |
22 def galaxy_name(fname, ext): | |
23 """construct a filename suitable for Galaxy dataset discovery""" | |
24 | |
25 fname = FNPAT % {'designation': fname, 'ext': ext} | |
26 | |
27 return fname | |
28 | |
29 | |
30 def download_text(item_list, output_path): | |
31 """ | |
32 Downloads primary text from a list of items to the directory specified by output_path. | |
33 | |
34 :type item_list: ItemGroup | |
35 :param item_list: item list to download | |
36 | |
37 :type output_path: String | |
38 :param output_path: directory to download to the documents to | |
39 """ | |
40 if not os.path.exists(output_path): | |
41 os.makedirs(output_path) | |
42 | |
43 downloaded = [] | |
44 | |
45 items = item_list.get_all() | |
46 for item in items: | |
47 md = item.metadata() | |
48 fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dcterms:identifier'], 'txt')) | |
49 content = item.get_primary_text() | |
50 if content is not None: | |
51 with open(fname, 'w') as out: | |
52 out.write(content.decode('utf-8')) | |
53 | |
54 return downloaded | |
55 | |
56 | |
57 def main(): | |
58 args = parser() | |
59 try: | |
60 api_key = open(args.api_key, 'r').read().strip() | |
61 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) | |
62 item_list = read_item_list(args.item_list, client) | |
63 download_text(item_list, args.output_path) | |
64 except pyalveo.APIError as e: | |
65 print("ERROR: " + str(e), file=sys.stderr) | |
66 sys.exit(1) | |
67 | |
68 | |
69 if __name__ == '__main__': | |
70 main() |