Mercurial > repos > stevecassidy > alveoimport
diff austalk-select-hVd-words.py @ 14:a38315ecf593 draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author | stevecassidy |
---|---|
date | Wed, 01 Nov 2017 01:18:15 -0400 |
parents | c99e7f09ce12 |
children |
line wrap: on
line diff
--- a/austalk-select-hVd-words.py Wed Feb 01 22:34:24 2017 -0500 +++ b/austalk-select-hVd-words.py Wed Nov 01 01:18:15 2017 -0400 @@ -2,10 +2,10 @@ import argparse import pyalveo import sys +from util import API_URL -API_URL = 'https://app.alveo.edu.au/' PREFIXES = """ -PREFIX dc:<http://purl.org/dc/terms/> +PREFIX dcterms:<http://purl.org/dc/terms/> PREFIX austalk:<http://ns.austalk.edu.au/> PREFIX olac:<http://www.language-archives.org/OLAC/1.1/> PREFIX ausnc:<http://ns.ausnc.org.au/schemas/ausnc_md_model/> @@ -22,14 +22,17 @@ PREFIX iso: <http://purl.org/iso25964/skos-thes#> PREFIX dada: <http://purl.org/dada/schema/0.2#>""" + def parser(): parser = argparse.ArgumentParser(description="Retrieves Alveo Item Lists") parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key") parser.add_argument('--speaker', required=True, action="store", type=str, help="Speaker identifier") - parser.add_argument('--words', required=False, default='all', action="store", type=str, help="Word group (all, monopthongs, dipthongs)") + parser.add_argument('--words', required=False, default='all', action="store", type=str, + help="Word group (all, monopthongs, dipthongs)") parser.add_argument('--output', required=True, action="store", type=str, help="output file name") return parser.parse_args() + def find_hVd_words(api_key, speakerid, output, words='all'): """Find words in the Austalk corpus """ @@ -42,14 +45,14 @@ ?item a ausnc:AusNCObject . ?item olac:speaker ?speaker . ?speaker austalk:id "%s" . - ?item austalk:prompt ?prompt . + ?item austalk:prototype ?prot . + ?prot austalk:prompt ?prompt . ?item austalk:componentName ?compname . """ % speakerid - hVdWords = { - 'monopthongs': ['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', 'whod', 'herd', 'haired', 'hard', 'horde'], - 'dipthongs': ['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared'] - } + hVdWords = dict(monopthongs=['head', 'had', 'hud', 'heed', 'hid', 'hood', 'hod', + 'whod', 'herd', 'haired', 'hard', 'horde'], + dipthongs=['howd', 'hoyd', 'hide', 'hode', 'hade', 'heared']) if words == 'all': words = hVdWords['monopthongs'] + hVdWords['dipthongs'] @@ -62,6 +65,7 @@ query += filterclause + "}" + print(query) result = client.sparql_query('austalk', query) items = [] @@ -71,11 +75,7 @@ with open(output, 'w') as out: out.write("Speaker\tPrompt\tItemURL\n") for item in items: - # TODO: fix this once the RDF data is fixed in alveo - # need to modify the item URL - itemurl = item[1].replace('http://id.austalk.edu.au/item/', 'https://app.alveo.edu.au/catalog/austalk/') - - out.write(speakerid + "\t" + item[0] + "\t" + itemurl + "\n") + out.write(speakerid + "\t" + item[0] + "\t" + item[1] + "\n") def main(): @@ -87,5 +87,6 @@ print("ERROR: " + str(e), file=sys.stderr) sys.exit(1) + if __name__ == '__main__': main()