annotate assess_alignment.py @ 11:fff48fa8a22b draft

planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
author sanbi-uwc
date Tue, 27 Sep 2022 09:18:56 +0000
parents fb905d0f8201
children ee98dcd9aad4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
1 #!/usr/bin/env python
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
2
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
3 import argparse
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
4 import json
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
5 import sys
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
6 from dataclasses import dataclass
10
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
7 from textwrap import fill
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
8 from typing import TextIO
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
9
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
10
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
11 @dataclass
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
12 class Sample:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
13 name: str
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
14 reference: str
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
15 mismatches: int
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
16 quality: str
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
17
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
18
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
19 def load_json(json_file: TextIO) -> dict:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
20 data = json.load(json_file)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
21 if "msa" not in data:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
22 raise ValueError("MSA missing from JSON, cannot proceed")
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
23 if "gappedTraces" not in data:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
24 raise ValueError("gappedTraces missing from JSON, cannot proceed")
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
25 return data
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
26
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
27
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
28 def analyse_mismatches(
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
29 json_file: TextIO,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
30 offset: int,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
31 length: int,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
32 vp1only: bool = True,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
33 sec_is_conflict: bool = False,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
34 ) -> list:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
35 data = load_json(json_file)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
36 msas = [al for al in data["msa"] if not al["reference"]]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
37 reference = None
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
38 for al in data["msa"]:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
39 if al["reference"]:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
40 if reference is None:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
41 reference = al
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
42 else:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
43 sys.exit(
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
44 "more than one reference found in JSON MSA list, cannot proceed"
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
45 )
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
46 min_start = min([int(al["leadingGaps"]) for al in msas])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
47 max_end = max([int(al["leadingGaps"]) + len(al["align"]) for al in msas])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
48 base_state = ["n"] * len(reference["align"])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
49 mismatch_bases = {}
10
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
50 base_start = 0
9
acaaf49e2747 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 8
diff changeset
51 for i, reference_base in enumerate(reference["align"]):
10
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
52 if vp1only and not base_start:
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
53 if data['gappedConsensus'][i] != '-':
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
54 base_start = i
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
55 for k, al in enumerate(msas):
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
56 leading_gaps = int(al["leadingGaps"])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
57 align_len = len(al["align"])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
58 if leading_gaps < i and (leading_gaps + align_len) > i:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
59 vp1pos = i - offset
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
60 if vp1only and vp1pos < 0 or vp1pos > length:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
61 # skip positions outside of vp1 gene region
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
62 continue
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
63 al_base = al["align"][i - leading_gaps]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
64 has_secondary_basecall = False
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
65 if sec_is_conflict:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
66 gappedTrace = data["gappedTraces"][k]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
67 pos = i - int(gappedTrace["leadingGaps"])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
68 # print(len(gappedTrace['basecallPos']), pos, k, len(gappedTrace['basecalls']), gappedTrace['basecallPos'][pos])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
69 basecall_str = gappedTrace["basecalls"][
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
70 str(gappedTrace["basecallPos"][pos])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
71 ]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
72 if "|" in basecall_str:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
73 has_secondary_basecall = True
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
74 # set this position to conflicted
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
75 base_state[i] = "C"
9
acaaf49e2747 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 8
diff changeset
76 if al_base != reference_base:
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
77 # let's deal with all the cases where the base state doesn't match the reference
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
78 if base_state[i] == "G":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
79 # the base state was G (a trace matches reference) and now we see a mismatch
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
80 base_state[i] = "C"
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
81 elif base_state[i] == "C":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
82 # already marked as conflicting - a mismatch doesn't change that
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
83 pass
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
84 elif base_state[i] == "n" or base_state[i] == "M":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
85 # we never saw this before or its already marked as a mismatch
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
86 base_state[i] = "M"
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
87 mismatch_bases[i] = al_base
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
88 else:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
89 sys.exit("unexpected base state: " + base_state[i])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
90 else:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
91 if base_state[i] == "G" or base_state[i] == "n":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
92 # we saw this before and got a match or
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
93 # we never saw this before
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
94 base_state[i] = "G"
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
95 elif base_state[i] == "M":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
96 # we saw this before but it was a mismatch - mark this as a conflict
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
97 base_state[i] = "C"
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
98 if i in mismatch_bases:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
99 del mismatch_bases[i]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
100 elif base_state[i] == "C":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
101 # we have seen a conflict here before
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
102 pass
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
103 else:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
104 sys.exit("unexpected base_state: " + base_state[i])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
105 conflicts = base_state.count("C")
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
106 matches = base_state.count("G")
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
107 mismatches = base_state.count("M")
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
108 mismatch_list = []
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
109 for i, state in enumerate(base_state):
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
110 # i is in zero-based genome coordinates
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
111 if state == "M":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
112 # for mismatch store [pos_in_genome, pos_in_vp1, reference_base, sequenced_base]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
113 mismatch_list.append(
4
1897677e107c planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 2
diff changeset
114 [i + 1, i - offset + 1, reference["align"][i], mismatch_bases[i]]
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
115 )
10
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
116 if vp1only:
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
117 # we have trim consensus bases before vp1 region
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
118 if base_start <= offset:
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
119 cons_start = offset - base_start
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
120 else:
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
121 cons_start = 0
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
122 consensus = data['gapFreeConsensus'][cons_start:cons_start + length]
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
123 else:
fb905d0f8201 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 9
diff changeset
124 consensus = data['gapFreeConsensus']
9
acaaf49e2747 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 8
diff changeset
125 return [conflicts, matches, mismatches, mismatch_list, consensus]
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
126
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
127
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
128 def analyse_trace_quality(json_file: TextIO) -> float:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
129 data = load_json(json_file)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
130
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
131 traces = data["gappedTraces"]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
132 overall_avg = 0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
133 for trace in traces:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
134 start = min(trace["basecallPos"])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
135 end = max(trace["basecallPos"])
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
136 call_quality = {}
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
137 avg_ratio = 0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
138 for base in ("A", "C", "G", "T"):
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
139 calls = trace["peak" + base][start : end + 1]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
140 min_call = min(calls)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
141 max_call = max(calls)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
142 avg_call = sum(calls) / len(calls)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
143 ratio = max_call / avg_call
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
144 call_quality["avg" + base] = avg_call
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
145 call_quality["min" + base] = min_call
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
146 call_quality["max" + base] = max_call
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
147 call_quality["ratio" + base] = ratio
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
148 avg_ratio += ratio
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
149 avg_ratio = avg_ratio / 4
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
150 overall_avg += avg_ratio
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
151 overall_avg = overall_avg / len(traces)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
152 return overall_avg
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
153
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
154
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
155 def comma_split(args: str) -> list[str]:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
156 return args.split(",")
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
157
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
158
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
159 if __name__ == "__main__":
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
160 parser = argparse.ArgumentParser()
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
161 parser.add_argument("--output_filename", help="Path to output file")
5
0e556a3f85d6 planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 4
diff changeset
162 parser.add_argument("--consensus_output_filename", help="Path to output file for best consensus")
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
163 parser.add_argument("--sample_name", help="Name of sample being analysed")
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
164 parser.add_argument(
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
165 "--dataset_names", type=comma_split, help="Comma separated names for datasets"
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
166 )
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
167 parser.add_argument("--datasets", nargs="+")
5
0e556a3f85d6 planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 4
diff changeset
168 parser.add_argument("--consensi", nargs="+")
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
169 args = parser.parse_args()
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
170
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
171 offsets = {
2
31ca16290d4f planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 0
diff changeset
172 # these are in 0-based coordinates, so off-by-one from NCBI 1-based coordinates
31ca16290d4f planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 0
diff changeset
173 "poliovirus1sabin": 2479, # V01150
31ca16290d4f planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 0
diff changeset
174 "poliovirus2sabin": 2481, # AY184220
31ca16290d4f planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 0
diff changeset
175 "poliovirus3sabin": 2478, # X00925
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
176 }
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
177
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
178 lengths = {
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
179 "poliovirus1sabin": 906,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
180 "poliovirus2sabin": 903,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
181 "poliovirus3sabin": 900,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
182 }
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
183
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
184 min_mismatches = None
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
185 for file_index, json_filename in enumerate(args.datasets):
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
186 dataset_name = args.dataset_names[file_index].replace(
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
187 ".json", ""
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
188 ) # take the name but remove any json suffix
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
189 offset = offsets[dataset_name]
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
190 length = lengths[dataset_name]
9
acaaf49e2747 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 8
diff changeset
191 (conflicts, matches, mismatches, mismatch_list, consensus) = analyse_mismatches(
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
192 open(json_filename), offset, length
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
193 )
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
194 # analyse_mismatches(json_filename, True)
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
195 quality = analyse_trace_quality(open(json_filename))
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
196 if min_mismatches is None or mismatches < min_mismatches:
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
197 min_mismatches = mismatches
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
198 best_match_mismatch_list = mismatch_list
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
199 best_match_quality = quality
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
200 best_match_reference = dataset_name
9
acaaf49e2747 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 8
diff changeset
201 best_consensus = consensus
2
31ca16290d4f planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 0
diff changeset
202 percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2)
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
203
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
204 info = {
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
205 "sample_name": args.sample_name,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
206 "best_reference": best_match_reference,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
207 "mismatches": min_mismatches,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
208 "mismatch_list": best_match_mismatch_list,
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
209 "quality": best_match_quality,
5
0e556a3f85d6 planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 4
diff changeset
210 "perc_mismatches": percent_mismatches,
9
acaaf49e2747 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 8
diff changeset
211 "consensus": best_consensus
0
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
212 }
8bc106442b1a planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
diff changeset
213 json.dump(info, open(args.output_filename, "w"))
5
0e556a3f85d6 planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 4
diff changeset
214
11
fff48fa8a22b planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
sanbi-uwc
parents: 10
diff changeset
215 open(args.consensus_output_filename, "w").write(f'>{args.sample_name}\n' + fill(best_consensus) + '\n')