Mercurial > repos > stevecassidy > maus
annotate items_to_bpf.py @ 1:4162c1e2ad5f draft
planemo upload commit e7b3a927f5f798f0a9c9a51b62bc8afa5f2c1dab-dirty
| author | stevecassidy | 
|---|---|
| date | Wed, 16 Nov 2016 17:33:15 -0500 | 
| parents | d4c27fdc928b | 
| children | c87ee3aec57b | 
| rev | line source | 
|---|---|
| 
0
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
1 from __future__ import print_function | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
2 import argparse | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
3 import os | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
4 import csv | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
5 import re | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
6 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
7 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
8 def parser(): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
9 parser = argparse.ArgumentParser(description="Generate BPF Orthographic Transcription from Item List") | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
10 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
11 parser.add_argument('--lexicon', required=True, action="store", type=str, help="File containing lexicon (tsv)") | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
12 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
13 return parser.parse_args() | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
14 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
15 def read_item_list(filename): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
16 """Read an item list from a file | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
17 which should be a tabular formatted file | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
18 with one column header ItemURL. | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
19 Return an instance of ItemGroup""" | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
20 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
21 with open(filename) as fd: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
22 csvreader = csv.DictReader(fd, dialect='excel-tab') | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
23 print("CSV", csvreader.fieldnames) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
24 if 'ItemURL' not in csvreader.fieldnames: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
25 return None | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
26 if 'Prompt' not in csvreader.fieldnames: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
27 return None | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
28 itemurls = [] | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
29 for row in csvreader: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
30 itemurls.append((row['Prompt'], row['ItemURL'])) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
31 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
32 return itemurls | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
33 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
34 # this file name pattern allows galaxy to discover the dataset designation and type | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
35 FNPAT = "%(designation)s#%(ext)s" | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
36 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
37 def galaxy_name(itemurl, ext): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
38 """Construct a filename suitable for dataset discovery | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
39 by Galaxy. | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
40 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
41 @type itemurl: C{String} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
42 @param itemurl: the item URL from Alveo | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
43 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
44 @type ext: C{String} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
45 @param ext: the datatype extension for the resulting file | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
46 """ | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
47 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
48 itemname = itemurl.split('/')[-1] | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
49 fname = FNPAT % {'designation': itemname, 'ext': ext} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
50 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
51 return fname | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
52 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
53 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
54 def build_bpf(ortho_trans, lexicon): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
55 """ Given an orthographic transcript, generate a BPF-format phonetic | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
56 transcription for passing to MAUS, using the specified lexicon. | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
57 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
58 @type ortho_trans: C{String} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
59 @param ortho_trans: the (space-separated) orthographic transcript | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
60 @type lex: C{Dict} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
61 @param lex: the lexicon to use to translate words to phonetic sybmols | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
62 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
63 @rtype: C{String} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
64 @returns: the BPF-formatted transcript | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
65 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
66 @raises IncompleteLexiconError: if there is a word appearing in the | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
67 orthographic transcript that is not covered by the lexicon | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
68 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
69 """ | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
70 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
71 spl = re.compile(r'[\s.,!?"\-]') | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
72 words = [w.lower() for w in spl.split(ortho_trans) if w] | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
73 ort = [] | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
74 kan = [] | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
75 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
76 for n, word in enumerate(words): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
77 try: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
78 ort.append("ORT: %d %s" % (n, word)) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
79 kan.append("KAN: %d %s" % (n, lexicon[word])) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
80 except KeyError: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
81 raise IncompleteLexiconError("'" + word + | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
82 "' not present in lexicon") | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
83 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
84 nl = u"\n" | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
85 return nl.join(ort) + nl + nl.join(kan) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
86 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
87 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
88 def load_lexicon(lexiconfile): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
89 """ Load the given file as a lexicon dictionary. | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
90 Should be a tsv file with two columns, first column | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
91 is orthography, second is phonetic transcription. | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
92 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
93 @type lexiconfile: C{String} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
94 @param lexiconfile: the filename of the lexicon file | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
95 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
96 @rtype: C{Dict} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
97 @returns: the lexicon, as a dictionary with orthographic entries as keys | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
98 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
99 """ | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
100 lex = {} | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
101 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
102 with open(lexiconfile) as f: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
103 for line in f: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
104 orth, pron = line.split('\t') | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
105 lex[orth] = pron | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
106 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
107 return lex | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
108 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
109 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
110 def list_to_bpf(item_list, lexicon, output_path): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
111 """ | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
112 Generate a BPF file for each item in this item list. | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
113 Items consist of (prompt, ItemURL). URL is used to generate output | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
114 file name. | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
115 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
116 :type documents: list of pyalveo.Document | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
117 :param documents: Documents to download | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
118 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
119 :type output_path: String | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
120 :param output_path: directory to download to the documents to | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
121 """ | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
122 if not os.path.exists(output_path): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
123 os.makedirs(output_path) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
124 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
125 for prompt, itemURL in item_list: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
126 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
127 fname = galaxy_name(itemURL, 'par') | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
128 bpftext = build_bpf(prompt, lexicon) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
129 with open(os.path.join(output_path, fname), 'w') as out: | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
130 out.write(bpftext) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
131 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
132 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
133 def main(): | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
134 args = parser() | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
135 item_list = read_item_list(args.item_list) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
136 lexicon = load_lexicon(args.lexicon) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
137 list_to_bpf(item_list, lexicon, args.output_path) | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
138 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
139 | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
140 if __name__ == '__main__': | 
| 
 
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
 
stevecassidy 
parents:  
diff
changeset
 | 
141 main() | 
