Mercurial > repos > stevecassidy > alveoimport
annotate austalk-select-hVd-words.py @ 14:a38315ecf593 draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author | stevecassidy |
---|---|
date | Wed, 01 Nov 2017 01:18:15 -0400 |
parents | c99e7f09ce12 |
children |
rev | line source |
---|---|
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
1 from __future__ import print_function |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
2 import argparse |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
3 import pyalveo |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
4 import sys |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
5 from util import API_URL |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
6 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
7 PREFIXES = """ |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
8 PREFIX dcterms:<http://purl.org/dc/terms/> |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
9 PREFIX austalk:<http://ns.austalk.edu.au/> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
10 PREFIX olac:<http://www.language-archives.org/OLAC/1.1/> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
11 PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
12 PREFIX foaf:<http://xmlns.com/foaf/0.1/> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
13 PREFIX dbpedia:<http://dbpedia.org/ontology/> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
14 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
15 PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
16 PREFIX geo:<http://www.w3.org/2003/01/geo/wgs84_pos#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
17 PREFIX iso639schema:<http://downlode.org/rdf/iso-639/schema#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
18 PREFIX austalkid:<http://id.austalk.edu.au/> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
19 PREFIX iso639:<http://downlode.org/rdf/iso-639/languages#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
20 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
21 PREFIX is: <http://purl.org/ontology/is/core#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
22 PREFIX iso: <http://purl.org/iso25964/skos-thes#> |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
23 PREFIX dada: <http://purl.org/dada/schema/0.2#>""" |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
24 |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
25 |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
26 def parser(): |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
27 parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
28 parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
29 parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier") |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
30 parser.add_argument('--words', required=False, default='all', action="store", type=str, |
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
31 help="Word group (all, monopthongs, dipthongs)") |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
32 parser.add_argument('--output', required=True, action="store", type=str, help="output file name") |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
33 return parser.parse_args() |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
34 |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
35 |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
36 def find_hVd_words(api_key, speakerid, output, words='all'): |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
37 """Find words in the Austalk corpus |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
38 """ |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
39 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
40 client = pyalveo.Client(api_key, API_URL, use_cache=False) |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
41 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
42 query = PREFIXES + """ |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
43 SELECT distinct ?item ?prompt ?compname |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
44 WHERE { |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
45 ?item a ausnc:AusNCObject . |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
46 ?item olac:speaker ?speaker . |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
47 ?speaker austalk:id "%s" . |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
48 ?item austalk:prototype ?prot . |
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
49 ?prot austalk:prompt ?prompt . |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
50 ?item austalk:componentName ?compname . |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
51 """ % speakerid |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
52 |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
53 hVdWords = dict(monopthongs=['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', |
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
54 'whod', 'herd', 'haired', 'hard', 'horde'], |
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
55 dipthongs=['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared']) |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
56 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
57 if words == 'all': |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
58 words = hVdWords['monopthongs'] + hVdWords['dipthongs'] |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
59 else: |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
60 words = hVdWords[words] |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
61 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
62 filterclause = 'FILTER regex(?prompt, "^' |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
63 filterclause += '$|^'.join(words) |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
64 filterclause += '$", "i")\n' |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
65 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
66 query += filterclause + "}" |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
67 |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
68 print(query) |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
69 result = client.sparql_query('austalk', query) |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
70 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
71 items = [] |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
72 for b in result['results']['bindings']: |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
73 items.append((b['prompt']['value'], b['item']['value'])) |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
74 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
75 with open(output, 'w') as out: |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
76 out.write("Speaker\tPrompt\tItemURL\n") |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
77 for item in items: |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
78 out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n") |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
79 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
80 |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
81 def main(): |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
82 args = parser() |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
83 try: |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
84 api_key = open(args.api_key, 'r').read().strip() |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
85 find_hVd_words(api_key, args.speaker, args.output, args.words) |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
86 except Exception as e: |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
87 print("ERROR: " + str(e), file=sys.stderr) |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
88 sys.exit(1) |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
89 |
14
a38315ecf593
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
stevecassidy
parents:
12
diff
changeset
|
90 |
4
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
91 if __name__ == '__main__': |
3a9f20428cff
planemo upload commit f36456464c692ed9d39a9cf654d09fe793113cce-dirty
stevecassidy
parents:
diff
changeset
|
92 main() |