Mercurial > repos > stevecassidy > maus
annotate items_to_bpf.py @ 6:28df10f22b51 draft default tip
planemo upload commit 72cee9103c0ae4acb5794afaed179bea2c729f2c-dirty
| author | stevecassidy |
|---|---|
| date | Sat, 11 Mar 2017 21:38:41 -0500 |
| parents | c87ee3aec57b |
| children |
| rev | line source |
|---|---|
|
0
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
1 from __future__ import print_function |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
2 import argparse |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
3 import os |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
4 import csv |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
5 import re |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
6 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
7 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
8 def parser(): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser(description="Generate BPF Orthographic Transcription from Item List") |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
10 parser.add_argument('--item_list', required=True, action="store", type=str, help="File containing list of item URLs") |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
11 parser.add_argument('--lexicon', required=True, action="store", type=str, help="File containing lexicon (tsv)") |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
12 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
13 return parser.parse_args() |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
14 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
15 def read_item_list(filename): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
16 """Read an item list from a file |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
17 which should be a tabular formatted file |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
18 with one column header ItemURL. |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
19 Return an instance of ItemGroup""" |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
20 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
21 with open(filename) as fd: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
22 csvreader = csv.DictReader(fd, dialect='excel-tab') |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
23 if 'ItemURL' not in csvreader.fieldnames: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
24 return None |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
25 if 'Prompt' not in csvreader.fieldnames: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
26 return None |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
27 itemurls = [] |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
28 for row in csvreader: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
29 itemurls.append((row['Prompt'], row['ItemURL'])) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
30 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
31 return itemurls |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
32 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
33 # this file name pattern allows galaxy to discover the dataset designation and type |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
34 FNPAT = "%(designation)s#%(ext)s" |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
35 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
36 def galaxy_name(itemurl, ext): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
37 """Construct a filename suitable for dataset discovery |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
38 by Galaxy. |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
39 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
40 @type itemurl: C{String} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
41 @param itemurl: the item URL from Alveo |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
42 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
43 @type ext: C{String} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
44 @param ext: the datatype extension for the resulting file |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
45 """ |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
46 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
47 itemname = itemurl.split('/')[-1] |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
48 fname = FNPAT % {'designation': itemname, 'ext': ext} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
49 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
50 return fname |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
51 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
52 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
53 def build_bpf(ortho_trans, lexicon): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
54 """ Given an orthographic transcript, generate a BPF-format phonetic |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
55 transcription for passing to MAUS, using the specified lexicon. |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
56 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
57 @type ortho_trans: C{String} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
58 @param ortho_trans: the (space-separated) orthographic transcript |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
59 @type lex: C{Dict} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
60 @param lex: the lexicon to use to translate words to phonetic sybmols |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
61 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
62 @rtype: C{String} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
63 @returns: the BPF-formatted transcript |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
64 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
65 @raises IncompleteLexiconError: if there is a word appearing in the |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
66 orthographic transcript that is not covered by the lexicon |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
67 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
68 """ |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
69 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
70 spl = re.compile(r'[\s.,!?"\-]') |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
71 words = [w.lower() for w in spl.split(ortho_trans) if w] |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
72 ort = [] |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
73 kan = [] |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
74 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
75 for n, word in enumerate(words): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
76 try: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
77 ort.append("ORT: %d %s" % (n, word)) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
78 kan.append("KAN: %d %s" % (n, lexicon[word])) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
79 except KeyError: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
80 raise IncompleteLexiconError("'" + word + |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
81 "' not present in lexicon") |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
82 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
83 nl = u"\n" |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
84 return nl.join(ort) + nl + nl.join(kan) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
85 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
86 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
87 def load_lexicon(lexiconfile): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
88 """ Load the given file as a lexicon dictionary. |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
89 Should be a tsv file with two columns, first column |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
90 is orthography, second is phonetic transcription. |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
91 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
92 @type lexiconfile: C{String} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
93 @param lexiconfile: the filename of the lexicon file |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
94 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
95 @rtype: C{Dict} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
96 @returns: the lexicon, as a dictionary with orthographic entries as keys |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
97 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
98 """ |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
99 lex = {} |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
100 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
101 with open(lexiconfile) as f: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
102 for line in f: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
103 orth, pron = line.split('\t') |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
104 lex[orth] = pron |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
105 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
106 return lex |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
107 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
108 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
109 def list_to_bpf(item_list, lexicon, output_path): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
110 """ |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
111 Generate a BPF file for each item in this item list. |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
112 Items consist of (prompt, ItemURL). URL is used to generate output |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
113 file name. |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
114 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
115 :type documents: list of pyalveo.Document |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
116 :param documents: Documents to download |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
117 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
118 :type output_path: String |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
119 :param output_path: directory to download to the documents to |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
120 """ |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
121 if not os.path.exists(output_path): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
122 os.makedirs(output_path) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
123 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
124 for prompt, itemURL in item_list: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
125 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
126 fname = galaxy_name(itemURL, 'par') |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
127 bpftext = build_bpf(prompt, lexicon) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
128 with open(os.path.join(output_path, fname), 'w') as out: |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
129 out.write(bpftext) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
130 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
131 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
132 def main(): |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
133 args = parser() |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
134 item_list = read_item_list(args.item_list) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
135 lexicon = load_lexicon(args.lexicon) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
136 list_to_bpf(item_list, lexicon, args.output_path) |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
137 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
138 |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
139 if __name__ == '__main__': |
|
d4c27fdc928b
planemo upload commit 7b5663b41b2dc11f9e375b8f386bc31855800bcf-dirty
stevecassidy
parents:
diff
changeset
|
140 main() |
