annotate transterm.py @ 0:d5c3354c166d draft default tip

Uploaded
author cpt_testbed
date Fri, 29 Apr 2022 10:33:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
1 #!/usr/bin/env python
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
2 import os
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
3 import sys
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
4 import argparse
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
5 import subprocess
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
6 import tempfile
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
7 import re
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
8 import logging
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
9 from Bio.Seq import Seq
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
10 from Bio.SeqRecord import SeqRecord
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
11 from Bio.SeqFeature import SeqFeature, FeatureLocation
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
12 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
13 from gff3 import feature_lambda, feature_test_type
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
14
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
15 logging.basicConfig(level=logging.INFO)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
16 log = logging.getLogger(__name__)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
17 SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
18
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
19 REGEX_TERM = re.compile(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
20 " TERM \d+ \s* (\d+) - (\d+)\s*(\+|-) [^ ]* \s* (\d+)\s* ([0-9.-]+)\s* -([0-9.]+)\s*\|\s*(.*)"
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
21 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
22 PARTS_TERM = re.compile(" ([^ ]*)\s+([^ ]*)\s+([^ ]*)\s+([^ ]*)\s+([^ ]*)")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
23 COLS = (
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
24 "start",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
25 "end",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
26 "strand",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
27 "confidence",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
28 "hp score",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
29 "tail score",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
30 "notes",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
31 "5' tail",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
32 "5' stem",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
33 "loop",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
34 "3' stem",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
35 "3' tail",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
36 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
37
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
38
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
39 def build_expterm():
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
40 pass
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
41
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
42
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
43 def generate_annotation_file(gff3):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
44 # TODO: cleanup
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
45 t = tempfile.NamedTemporaryFile(mode="w",delete=False, suffix=".coords")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
46 for rec in gffParse(gff3):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
47 features = feature_lambda(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
48 rec.features, feature_test_type, {"type": "CDS"}, subfeatures=False
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
49 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
50 for feature in sorted(features, key=lambda x: x.location.start):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
51 t.write(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
52 "\t".join(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
53 map(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
54 str,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
55 [
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
56 feature.id,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
57 feature.location.start + 1,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
58 feature.location.end,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
59 rec.id,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
60 ],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
61 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
62 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
63 + "\n"
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
64 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
65 name = t.name
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
66 t.close()
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
67 return name
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
68
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
69
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
70 def run_transterm(expterm, fasta, annotations):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
71 output = subprocess.check_output(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
72 ["transterm", "-p", expterm, "--all-context", fasta.name, annotations]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
73 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
74 return output
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
75
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
76
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
77 def pairwise(it):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
78 it = iter(it)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
79 while True:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
80 try:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
81 yield next(it), next(it)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
82 except StopIteration:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
83 return
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
84
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
85 def parse_transterm(data):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
86 data = data.decode("utf-8")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
87 data = data.split("SEQUENCE")[1:]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
88 for datum in data:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
89 lines = datum.split("\n")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
90
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
91 seq_name = lines[0][1:]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
92 seq_name = seq_name[0 : seq_name.index(" ")]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
93
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
94 record = SeqRecord(Seq("ACTG"), id=seq_name)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
95
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
96 important_lines = [
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
97 x for x in lines if len(x.strip()) > 0 and x.startswith(" ")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
98 ]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
99 # Must have an even #
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
100 assert len(important_lines) % 2 == 0
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
101 for idx, (a, b) in enumerate(pairwise(important_lines)):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
102 parsed_data = REGEX_TERM.match(a).groups() + PARTS_TERM.match(b).groups()
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
103
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
104 pd = {k: v for (k, v) in zip(COLS, parsed_data)}
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
105 # start , end , strand , confidence , hp score , tail score , notes
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
106 # , 5' tail , 5'stem , loop , 3' stem , 3'loop
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
107 start = int(pd["start"])
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
108 end = int(pd["end"])
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
109
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
110 notes = {k: pd[k] for k in COLS[6:]}
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
111 notes2 = {}
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
112 # However, if on - strand, we need to swap 5', 3':
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
113 if pd["strand"] == "-":
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
114 # Also need to flip the data
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
115 notes2 = {
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
116 "[1] 5' stem": str(Seq(notes["3' stem"]).reverse_complement()),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
117 "[2] loop": str(Seq(notes["loop"]).reverse_complement()),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
118 "[3] 3' stem": str(Seq(notes["5' stem"]).reverse_complement()),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
119 "[4] 3' tail": str(Seq(notes["5' tail"]).reverse_complement()),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
120 }
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
121 else:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
122 notes2 = {
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
123 "[1] 5' stem": notes["5' stem"],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
124 "[2] loop": notes["loop"],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
125 "[3] 3' stem": notes["3' stem"],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
126 "[4] 3' tail": notes["3' tail"],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
127 }
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
128
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
129 qualifiers = {
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
130 "score": [pd["confidence"]],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
131 "source": ["TranstermHP"],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
132 "ID": ["terminator_%s" % idx],
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
133 }
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
134 current_start = min(start, end) - 1
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
135 current_end = max(start, end)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
136
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
137 if pd["strand"] == "+":
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
138 # Let's extend the current_end to include any Ts we find.
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
139 # Take the 3' tail, and check to see how many Ts we can strip:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
140 #
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
141 # Updated algo: take as many chars as possible until >1 is non-T
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
142 # If the non-T is last, strip.
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
143 # Otherwise (internal), leave it.
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
144 addition = ""
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
145 prime3tail = notes["3' tail"]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
146 for idx in range(len(prime3tail)):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
147 if (
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
148 prime3tail[idx] != "T"
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
149 and addition.count("A")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
150 + addition.count("C")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
151 + addition.count("G")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
152 == 1
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
153 ):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
154 break
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
155
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
156 addition += prime3tail[idx]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
157
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
158 if addition[-1] != "T":
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
159 addition = addition[0:-1]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
160
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
161 current_end += len(addition)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
162 else:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
163 addition = ""
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
164 prime5tail = notes["5' tail"][::-1]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
165 for idx in range(len(prime5tail)):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
166 if (
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
167 prime5tail[idx] != "A"
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
168 and addition.count("T")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
169 + addition.count("C")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
170 + addition.count("G")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
171 == 1
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
172 ):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
173 break
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
174
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
175 addition += prime5tail[idx]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
176
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
177 if addition[-1] != "A":
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
178 addition = addition[0:-1]
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
179
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
180 current_start -= len(addition)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
181
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
182 qualifiers.update(notes2)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
183 feature = gffSeqFeature(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
184 FeatureLocation(current_start, current_end),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
185 type="terminator",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
186 strand=1 if pd["strand"] == "+" else -1,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
187 qualifiers=qualifiers,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
188 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
189 record.features.append(feature)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
190
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
191 yield record
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
192
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
193
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
194 def main(fasta, gff3, existing_expterm="", **kwargs):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
195 coords_file = generate_annotation_file(gff3)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
196 transterm_output = run_transterm(existing_expterm, fasta, coords_file)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
197 try:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
198 os.unlink(coords_file)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
199 except Exception:
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
200 pass
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
201 # Not my job
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
202
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
203 for record in parse_transterm(transterm_output):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
204 yield record
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
205
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
206
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
207 if __name__ == "__main__":
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
208 parser = argparse.ArgumentParser(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
209 description="Export corresponding sequence in genome from GFF3", epilog=""
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
210 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
211 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
212 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
213
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
214 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
215 "--min_conf",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
216 type=int,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
217 default=76,
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
218 help="Only output terminators with confidence >= n",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
219 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
220
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
221 # parser.add_argument('--gc', type=float, default=-2.3, help='Score of a G-C pair')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
222 # parser.add_argument('--au', type=float, default=-0.9, help='Score of an A-U pair')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
223 # parser.add_argument('--gu', type=float, default=1.3, help='Score of a G-U pair')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
224 # parser.add_argument('--mm', type=float, default=3.5, help='Score of any other pair')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
225 # parser.add_argument('--gap', type=int, default=6, help='Score of a gap in the hairpin')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
226 # parser.add_argument('--max_hp_score', type=float, default=-2, help='Maximum allowable hairpin score')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
227 # parser.add_argument('--max_tail_score', type=float, default=-2.5, help='Maximum allowable tail score')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
228 # parser.add_argument('--max_len', type=int, default=59, help='Total extent of hairpin <= n NT long')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
229 # parser.add_argument('--min_stem', type=int, default=4, help='Stem must be n nucleotides long')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
230 # parser.add_argument('--max_loop', type=int, default=13, help='The loop portion can be no longer than n')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
231 # parser.add_argument('--min_loop', type=int, default=3, help='Loop portion of the hairpin must be at least n long')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
232 # parser.add_argument('--uwin_require', type=int, default=3, help='Number of "U" nucleotides in the --uwin_length long region.')
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
233 # parser.add_argument('--loop_penalty', default='1,2,3,4,5,6,7,8,9,10,11', help='The cost of loops of various lengths can be set using --loop_penalty=f1,f2,f3,f4,f5,...fn, where f1 is the cost of a loop of length --min_loop, f2 is the cost of a loop of length --min_loop+1, as so on. If there are too few terms to cover up to max_loop, the last term is repeated.',)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
234
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
235 args = parser.parse_args()
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
236
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
237 for record in main(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
238 existing_expterm=os.path.join(SCRIPT_PATH, "expterm.dat"), **vars(args)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
239 ):
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
240 gffWrite([record], sys.stdout)