comparison austalk-select-hVd-words.py @ 14:a38315ecf593 draft

planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author stevecassidy
date Wed, 01 Nov 2017 01:18:15 -0400
parents c99e7f09ce12
children
comparison
equal deleted inserted replaced
13:be3fd14899a1 14:a38315ecf593
1 from __future__ import print_function 1 from __future__ import print_function
2 import argparse 2 import argparse
3 import pyalveo 3 import pyalveo
4 import sys 4 import sys
5 from util import API_URL
5 6
6 API_URL = 'https://app.alveo.edu.au/'
7 PREFIXES = """ 7 PREFIXES = """
8 PREFIX dc:<http://purl.org/dc/terms/> 8 PREFIX dcterms:<http://purl.org/dc/terms/>
9 PREFIX austalk:<http://ns.austalk.edu.au/> 9 PREFIX austalk:<http://ns.austalk.edu.au/>
10 PREFIX olac:<http://www.language-archives.org/OLAC/1.1/> 10 PREFIX olac:<http://www.language-archives.org/OLAC/1.1/>
11 PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/> 11 PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/>
12 PREFIX foaf:<http://xmlns.com/foaf/0.1/> 12 PREFIX foaf:<http://xmlns.com/foaf/0.1/>
13 PREFIX dbpedia:<http://dbpedia.org/ontology/> 13 PREFIX dbpedia:<http://dbpedia.org/ontology/>
20 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 20 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
21 PREFIX is: <http://purl.org/ontology/is/core#> 21 PREFIX is: <http://purl.org/ontology/is/core#>
22 PREFIX iso: <http://purl.org/iso25964/skos-thes#> 22 PREFIX iso: <http://purl.org/iso25964/skos-thes#>
23 PREFIX dada: <http://purl.org/dada/schema/0.2#>""" 23 PREFIX dada: <http://purl.org/dada/schema/0.2#>"""
24 24
25
25 def parser(): 26 def parser():
26 parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") 27 parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
27 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") 28 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
28 parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier") 29 parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier")
29 parser.add_argument('--words', required=False, default='all', action="store", type=str, help="Word group (all, monopthongs, dipthongs)") 30 parser.add_argument('--words', required=False, default='all', action="store", type=str,
31 help="Word group (all, monopthongs, dipthongs)")
30 parser.add_argument('--output', required=True, action="store", type=str, help="output file name") 32 parser.add_argument('--output', required=True, action="store", type=str, help="output file name")
31 return parser.parse_args() 33 return parser.parse_args()
34
32 35
33 def find_hVd_words(api_key, speakerid, output, words='all'): 36 def find_hVd_words(api_key, speakerid, output, words='all'):
34 """Find words in the Austalk corpus 37 """Find words in the Austalk corpus
35 """ 38 """
36 39
40 SELECT distinct ?item ?prompt ?compname 43 SELECT distinct ?item ?prompt ?compname
41 WHERE { 44 WHERE {
42 ?item a ausnc:AusNCObject . 45 ?item a ausnc:AusNCObject .
43 ?item olac:speaker ?speaker . 46 ?item olac:speaker ?speaker .
44 ?speaker austalk:id "%s" . 47 ?speaker austalk:id "%s" .
45 ?item austalk:prompt ?prompt . 48 ?item austalk:prototype ?prot .
49 ?prot austalk:prompt ?prompt .
46 ?item austalk:componentName ?compname . 50 ?item austalk:componentName ?compname .
47 """ % speakerid 51 """ % speakerid
48 52
49 hVdWords = { 53 hVdWords = dict(monopthongs=['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod',
50 'monopthongs': ['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', 'whod', 'herd', 'haired', 'hard', 'horde'], 54 'whod', 'herd', 'haired', 'hard', 'horde'],
51 'dipthongs': ['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared'] 55 dipthongs=['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared'])
52 }
53 56
54 if words == 'all': 57 if words == 'all':
55 words = hVdWords['monopthongs'] + hVdWords['dipthongs'] 58 words = hVdWords['monopthongs'] + hVdWords['dipthongs']
56 else: 59 else:
57 words = hVdWords[words] 60 words = hVdWords[words]
60 filterclause += '$|^'.join(words) 63 filterclause += '$|^'.join(words)
61 filterclause += '$", "i")\n' 64 filterclause += '$", "i")\n'
62 65
63 query += filterclause + "}" 66 query += filterclause + "}"
64 67
68 print(query)
65 result = client.sparql_query('austalk', query) 69 result = client.sparql_query('austalk', query)
66 70
67 items = [] 71 items = []
68 for b in result['results']['bindings']: 72 for b in result['results']['bindings']:
69 items.append((b['prompt']['value'], b['item']['value'])) 73 items.append((b['prompt']['value'], b['item']['value']))
70 74
71 with open(output, 'w') as out: 75 with open(output, 'w') as out:
72 out.write("Speaker\tPrompt\tItemURL\n") 76 out.write("Speaker\tPrompt\tItemURL\n")
73 for item in items: 77 for item in items:
74 # TODO: fix this once the RDF data is fixed in alveo 78 out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n")
75 # need to modify the item URL
76 itemurl = item[1].replace('http://id.austalk.edu.au/item/', 'https://app.alveo.edu.au/catalog/austalk/')
77
78 out.write(speakerid + "\t" + item[0] + "\t" + itemurl + "\n")
79 79
80 80
81 def main(): 81 def main():
82 args = parser() 82 args = parser()
83 try: 83 try:
85 find_hVd_words(api_key, args.speaker, args.output, args.words) 85 find_hVd_words(api_key, args.speaker, args.output, args.words)
86 except Exception as e: 86 except Exception as e:
87 print("ERROR: " + str(e), file=sys.stderr) 87 print("ERROR: " + str(e), file=sys.stderr)
88 sys.exit(1) 88 sys.exit(1)
89 89
90
90 if __name__ == '__main__': 91 if __name__ == '__main__':
91 main() 92 main()