annotate austalk-select-hVd-words.py @ 14:a38315ecf593 draft

planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author stevecassidy
date Wed, 01 Nov 2017 01:18:15 -0400
parents c99e7f09ce12
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
1 from __future__ import print_function
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
2 import argparse
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
3 import pyalveo
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
4 import sys
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
5 from util import API_URL
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
6
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
7 PREFIXES = """
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
8 PREFIX dcterms:<http://purl.org/dc/terms/>
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
9 PREFIX austalk:<http://ns.austalk.edu.au/>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
10 PREFIX olac:<http://www.language-archives.org/OLAC/1.1/>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
11 PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
12 PREFIX foaf:<http://xmlns.com/foaf/0.1/>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
13 PREFIX dbpedia:<http://dbpedia.org/ontology/>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
14 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
15 PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
16 PREFIX geo:<http://www.w3.org/2003/01/geo/wgs84_pos#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
17 PREFIX iso639schema:<http://downlode.org/rdf/iso-639/schema#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
18 PREFIX austalkid:<http://id.austalk.edu.au/>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
19 PREFIX iso639:<http://downlode.org/rdf/iso-639/languages#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
20 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
21 PREFIX is: <http://purl.org/ontology/is/core#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
22 PREFIX iso: <http://purl.org/iso25964/skos-thes#>
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
23 PREFIX dada: <http://purl.org/dada/schema/0.2#>"""
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
24
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
25
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
26 def parser():
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
27 parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists")
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
28 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
29 parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier")
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
30 parser.add_argument('--words', required=False, default='all', action="store", type=str,
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
31 help="Word group (all, monopthongs, dipthongs)")
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
32 parser.add_argument('--output', required=True, action="store", type=str, help="output file name")
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
33 return parser.parse_args()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
34
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
35
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
36 def find_hVd_words(api_key, speakerid, output, words='all'):
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
37 """Find words in the Austalk corpus
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
38 """
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
39
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
40 client = pyalveo.Client(api_key, API_URL, use_cache=False)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
41
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
42 query = PREFIXES + """
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
43 SELECT distinct ?item ?prompt ?compname
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
44 WHERE {
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
45 ?item a ausnc:AusNCObject .
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
46 ?item olac:speaker ?speaker .
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
47 ?speaker austalk:id "%s" .
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
48 ?item austalk:prototype ?prot .
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
49 ?prot austalk:prompt ?prompt .
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
50 ?item austalk:componentName ?compname .
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
51 """ % speakerid
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
52
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
53 hVdWords = dict(monopthongs=['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod',
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
54 'whod', 'herd', 'haired', 'hard', 'horde'],
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
55 dipthongs=['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared'])
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
56
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
57 if words == 'all':
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
58 words = hVdWords['monopthongs'] + hVdWords['dipthongs']
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
59 else:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
60 words = hVdWords[words]
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
61
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
62 filterclause = 'FILTER regex(?prompt, "^'
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
63 filterclause += '$|^'.join(words)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
64 filterclause += '$", "i")\n'
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
65
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
66 query += filterclause + "}"
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
67
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
68 print(query)
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
69 result = client.sparql_query('austalk', query)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
70
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
71 items = []
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
72 for b in result['results']['bindings']:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
73 items.append((b['prompt']['value'], b['item']['value']))
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
74
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
75 with open(output, 'w') as out:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
76 out.write("Speaker\tPrompt\tItemURL\n")
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
77 for item in items:
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
78 out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n")
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
79
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
80
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
81 def main():
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
82 args = parser()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
83 try:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
84 api_key = open(args.api_key, 'r').read().strip()
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
85 find_hVd_words(api_key, args.speaker, args.output, args.words)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
86 except Exception as e:
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
87 print("ERROR: " + str(e), file=sys.stderr)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
88 sys.exit(1)
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
89
14
a38315ecf593 planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents: 12
diff changeset
90
4
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
91 if __name__ == '__main__':
3a9f20428cff planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff changeset
92 main()