Mercurial > repos > stevecassidy > alveoimport
annotate alveo_get_primary_text.py @ 16:fd22df0c6d9b draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit f2432aaedd36ae7662873623d8861d0982dffdd2-dirty
| author | stevecassidy |
|---|---|
| date | Sun, 03 Dec 2017 18:21:20 -0500 |
| parents | 3fd0f8f1f3ce |
| children |
| rev | line source |
|---|---|
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
1 from __future__ import print_function |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
2 import argparse |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
3 import pyalveo |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
4 import sys |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
5 import os |
|
9
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
4
diff
changeset
|
6 |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
7 from util import API_URL, read_item_list |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
8 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
9 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
10 def parser(): |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
11 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
12 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") |
|
9
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
4
diff
changeset
|
13 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
14 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
15 return parser.parse_args() |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
16 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
17 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
18 # this file name pattern allows galaxy to discover the dataset designation and type |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
19 FNPAT = "%(designation)s_%(ext)s" |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
20 |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
21 |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
22 def galaxy_name(fname, ext): |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
23 """construct a filename suitable for Galaxy dataset discovery""" |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
24 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
25 fname = FNPAT % {'designation': fname, 'ext': ext} |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
26 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
27 return fname |
|
9
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
4
diff
changeset
|
28 |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
29 |
|
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
30 def download_text(item_list, output_path): |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
31 """ |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
32 Downloads primary text from a list of items to the directory specified by output_path. |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
33 |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
34 :type item_list: ItemGroup |
|
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
35 :param item_list: item list to download |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
36 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
37 :type output_path: String |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
38 :param output_path: directory to download to the documents to |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
39 """ |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
40 if not os.path.exists(output_path): |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
41 os.makedirs(output_path) |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
42 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
43 downloaded = [] |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
44 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
45 items = item_list.get_all() |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
46 for item in items: |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
47 md = item.metadata() |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
48 fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dcterms:identifier'], 'txt')) |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
49 content = item.get_primary_text() |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
50 if content is not None: |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
51 with open(fname, 'w') as out: |
|
15
3fd0f8f1f3ce
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit 63c347461a5d4fed06799e1d3b699e0d30b48191
stevecassidy
parents:
14
diff
changeset
|
52 out.write(content.decode('utf-8')) |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
53 |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
54 return downloaded |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
55 |
|
9
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
4
diff
changeset
|
56 |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
57 def main(): |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
58 args = parser() |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
59 try: |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
60 api_key = open(args.api_key, 'r').read().strip() |
|
9
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
4
diff
changeset
|
61 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) |
|
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
4
diff
changeset
|
62 item_list = read_item_list(args.item_list, client) |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
63 download_text(item_list, args.output_path) |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
64 except pyalveo.APIError as e: |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
65 print("ERROR: " + str(e), file=sys.stderr) |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
66 sys.exit(1) |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
67 |
|
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
9
diff
changeset
|
68 |
|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
69 if __name__ == '__main__': |
|
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
70 main() |
