Mercurial > repos > stevecassidy > alveoimport
comparison austalk-select-hVd-words.py @ 14:a38315ecf593 draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author | stevecassidy |
---|---|
date | Wed, 01 Nov 2017 01:18:15 -0400 |
parents | c99e7f09ce12 |
children |
comparison
equal
deleted
inserted
replaced
13:be3fd14899a1 | 14:a38315ecf593 |
---|---|
1 from __future__ import print_function | 1 from __future__ import print_function |
2 import argparse | 2 import argparse |
3 import pyalveo | 3 import pyalveo |
4 import sys | 4 import sys |
5 from util import API_URL | |
5 | 6 |
6 API_URL = 'https://app.alveo.edu.au/' | |
7 PREFIXES = """ | 7 PREFIXES = """ |
8 PREFIX dc:<http://purl.org/dc/terms/> | 8 PREFIX dcterms:<http://purl.org/dc/terms/> |
9 PREFIX austalk:<http://ns.austalk.edu.au/> | 9 PREFIX austalk:<http://ns.austalk.edu.au/> |
10 PREFIX olac:<http://www.language-archives.org/OLAC/1.1/> | 10 PREFIX olac:<http://www.language-archives.org/OLAC/1.1/> |
11 PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/> | 11 PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/> |
12 PREFIX foaf:<http://xmlns.com/foaf/0.1/> | 12 PREFIX foaf:<http://xmlns.com/foaf/0.1/> |
13 PREFIX dbpedia:<http://dbpedia.org/ontology/> | 13 PREFIX dbpedia:<http://dbpedia.org/ontology/> |
20 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> | 20 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> |
21 PREFIX is: <http://purl.org/ontology/is/core#> | 21 PREFIX is: <http://purl.org/ontology/is/core#> |
22 PREFIX iso: <http://purl.org/iso25964/skos-thes#> | 22 PREFIX iso: <http://purl.org/iso25964/skos-thes#> |
23 PREFIX dada: <http://purl.org/dada/schema/0.2#>""" | 23 PREFIX dada: <http://purl.org/dada/schema/0.2#>""" |
24 | 24 |
25 | |
25 def parser(): | 26 def parser(): |
26 parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") | 27 parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") |
27 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") | 28 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") |
28 parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier") | 29 parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier") |
29 parser.add_argument('--words', required=False, default='all', action="store", type=str, help="Word group (all, monopthongs, dipthongs)") | 30 parser.add_argument('--words', required=False, default='all', action="store", type=str, |
31 help="Word group (all, monopthongs, dipthongs)") | |
30 parser.add_argument('--output', required=True, action="store", type=str, help="output file name") | 32 parser.add_argument('--output', required=True, action="store", type=str, help="output file name") |
31 return parser.parse_args() | 33 return parser.parse_args() |
34 | |
32 | 35 |
33 def find_hVd_words(api_key, speakerid, output, words='all'): | 36 def find_hVd_words(api_key, speakerid, output, words='all'): |
34 """Find words in the Austalk corpus | 37 """Find words in the Austalk corpus |
35 """ | 38 """ |
36 | 39 |
40 SELECT distinct ?item ?prompt ?compname | 43 SELECT distinct ?item ?prompt ?compname |
41 WHERE { | 44 WHERE { |
42 ?item a ausnc:AusNCObject . | 45 ?item a ausnc:AusNCObject . |
43 ?item olac:speaker ?speaker . | 46 ?item olac:speaker ?speaker . |
44 ?speaker austalk:id "%s" . | 47 ?speaker austalk:id "%s" . |
45 ?item austalk:prompt ?prompt . | 48 ?item austalk:prototype ?prot . |
49 ?prot austalk:prompt ?prompt . | |
46 ?item austalk:componentName ?compname . | 50 ?item austalk:componentName ?compname . |
47 """ % speakerid | 51 """ % speakerid |
48 | 52 |
49 hVdWords = { | 53 hVdWords = dict(monopthongs=['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', |
50 'monopthongs': ['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', 'whod', 'herd', 'haired', 'hard', 'horde'], | 54 'whod', 'herd', 'haired', 'hard', 'horde'], |
51 'dipthongs': ['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared'] | 55 dipthongs=['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared']) |
52 } | |
53 | 56 |
54 if words == 'all': | 57 if words == 'all': |
55 words = hVdWords['monopthongs'] + hVdWords['dipthongs'] | 58 words = hVdWords['monopthongs'] + hVdWords['dipthongs'] |
56 else: | 59 else: |
57 words = hVdWords[words] | 60 words = hVdWords[words] |
60 filterclause += '$|^'.join(words) | 63 filterclause += '$|^'.join(words) |
61 filterclause += '$", "i")\n' | 64 filterclause += '$", "i")\n' |
62 | 65 |
63 query += filterclause + "}" | 66 query += filterclause + "}" |
64 | 67 |
68 print(query) | |
65 result = client.sparql_query('austalk', query) | 69 result = client.sparql_query('austalk', query) |
66 | 70 |
67 items = [] | 71 items = [] |
68 for b in result['results']['bindings']: | 72 for b in result['results']['bindings']: |
69 items.append((b['prompt']['value'], b['item']['value'])) | 73 items.append((b['prompt']['value'], b['item']['value'])) |
70 | 74 |
71 with open(output, 'w') as out: | 75 with open(output, 'w') as out: |
72 out.write("Speaker\tPrompt\tItemURL\n") | 76 out.write("Speaker\tPrompt\tItemURL\n") |
73 for item in items: | 77 for item in items: |
74 # TODO: fix this once the RDF data is fixed in alveo | 78 out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n") |
75 # need to modify the item URL | |
76 itemurl = item[1].replace('http://id.austalk.edu.au/item/', 'https://app.alveo.edu.au/catalog/austalk/') | |
77 | |
78 out.write(speakerid + "\t" + item[0] + "\t" + itemurl + "\n") | |
79 | 79 |
80 | 80 |
81 def main(): | 81 def main(): |
82 args = parser() | 82 args = parser() |
83 try: | 83 try: |
85 find_hVd_words(api_key, args.speaker, args.output, args.words) | 85 find_hVd_words(api_key, args.speaker, args.output, args.words) |
86 except Exception as e: | 86 except Exception as e: |
87 print("ERROR: " + str(e), file=sys.stderr) | 87 print("ERROR: " + str(e), file=sys.stderr) |
88 sys.exit(1) | 88 sys.exit(1) |
89 | 89 |
90 | |
90 if __name__ == '__main__': | 91 if __name__ == '__main__': |
91 main() | 92 main() |