Mercurial > repos > stevecassidy > alveoimport
annotate alveo_get_primary_text.py @ 14:a38315ecf593 draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
| author | stevecassidy | 
|---|---|
| date | Wed, 01 Nov 2017 01:18:15 -0400 | 
| parents | 2f4907372748 | 
| children | 3fd0f8f1f3ce | 
| rev | line source | 
|---|---|
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 1 from __future__ import print_function | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 2 import argparse | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 3 import pyalveo | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 4 import sys | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 5 import os | 
| 9 
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 stevecassidy parents: 
4diff
changeset | 6 | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 7 from util import API_URL, read_item_list | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 8 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 9 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 10 def parser(): | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 11 parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List") | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 12 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") | 
| 9 
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 stevecassidy parents: 
4diff
changeset | 13 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 14 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 15 return parser.parse_args() | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 16 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 17 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 18 # this file name pattern allows galaxy to discover the dataset designation and type | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 19 FNPAT = "%(designation)s_%(ext)s" | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 20 | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 21 | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 22 def galaxy_name(fname, ext): | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 23 """construct a filename suitable for Galaxy dataset discovery""" | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 24 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 25 fname = FNPAT % {'designation': fname, 'ext': ext} | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 26 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 27 return fname | 
| 9 
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 stevecassidy parents: 
4diff
changeset | 28 | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 29 | 
| 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 30 def download_text(item_list, output_path): | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 31 """ | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 32 Downloads primary text from a list of items to the directory specified by output_path. | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 33 | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 34 :type item_list: ItemGroup | 
| 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 35 :param item_list: item list to download | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 36 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 37 :type output_path: String | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 38 :param output_path: directory to download to the documents to | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 39 """ | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 40 if not os.path.exists(output_path): | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 41 os.makedirs(output_path) | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 42 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 43 downloaded = [] | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 44 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 45 items = item_list.get_all() | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 46 for item in items: | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 47 md = item.metadata() | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 48 fname = os.path.join(output_path, galaxy_name(md['alveo:metadata']['dcterms:identifier'], 'txt')) | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 49 content = item.get_primary_text() | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 50 if content is not None: | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 51 with open(fname, 'w') as out: | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 52 out.write(content) | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 53 | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 54 return downloaded | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 55 | 
| 9 
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 stevecassidy parents: 
4diff
changeset | 56 | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 57 def main(): | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 58 args = parser() | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 59 try: | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 60 api_key = open(args.api_key, 'r').read().strip() | 
| 9 
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 stevecassidy parents: 
4diff
changeset | 61 client = pyalveo.Client(api_url=API_URL, api_key=api_key, use_cache=False) | 
| 
2f4907372748
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 stevecassidy parents: 
4diff
changeset | 62 item_list = read_item_list(args.item_list, client) | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 63 download_text(item_list, args.output_path) | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 64 except pyalveo.APIError as e: | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 65 print("ERROR: " + str(e), file=sys.stderr) | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 66 sys.exit(1) | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 67 | 
| 14 
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
 stevecassidy parents: 
9diff
changeset | 68 | 
| 4 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 69 if __name__ == '__main__': | 
| 
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
 stevecassidy parents: diff
changeset | 70 main() | 
