Mercurial > repos > sanbi-uwc > assess_poliovirus_alignment
comparison assess_alignment.py @ 10:fb905d0f8201 draft
planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
author | sanbi-uwc |
---|---|
date | Tue, 27 Sep 2022 08:44:35 +0000 |
parents | acaaf49e2747 |
children | fff48fa8a22b |
comparison
equal
deleted
inserted
replaced
9:acaaf49e2747 | 10:fb905d0f8201 |
---|---|
2 | 2 |
3 import argparse | 3 import argparse |
4 import json | 4 import json |
5 import sys | 5 import sys |
6 from dataclasses import dataclass | 6 from dataclasses import dataclass |
7 from textwrap import fill | |
7 from typing import TextIO | 8 from typing import TextIO |
8 | 9 |
9 | 10 |
10 @dataclass | 11 @dataclass |
11 class Sample: | 12 class Sample: |
44 ) | 45 ) |
45 min_start = min([int(al["leadingGaps"]) for al in msas]) | 46 min_start = min([int(al["leadingGaps"]) for al in msas]) |
46 max_end = max([int(al["leadingGaps"]) + len(al["align"]) for al in msas]) | 47 max_end = max([int(al["leadingGaps"]) + len(al["align"]) for al in msas]) |
47 base_state = ["n"] * len(reference["align"]) | 48 base_state = ["n"] * len(reference["align"]) |
48 mismatch_bases = {} | 49 mismatch_bases = {} |
49 consensus = '' | 50 base_start = 0 |
50 for i, reference_base in enumerate(reference["align"]): | 51 for i, reference_base in enumerate(reference["align"]): |
52 if vp1only and not base_start: | |
53 if data['gappedConsensus'][i] != '-': | |
54 base_start = i | |
51 for k, al in enumerate(msas): | 55 for k, al in enumerate(msas): |
52 leading_gaps = int(al["leadingGaps"]) | 56 leading_gaps = int(al["leadingGaps"]) |
53 align_len = len(al["align"]) | 57 align_len = len(al["align"]) |
54 if leading_gaps < i and (leading_gaps + align_len) > i: | 58 if leading_gaps < i and (leading_gaps + align_len) > i: |
55 vp1pos = i - offset | 59 vp1pos = i - offset |
56 if vp1only and vp1pos < 0 or vp1pos > length: | 60 if vp1only and vp1pos < 0 or vp1pos > length: |
57 # skip positions outside of vp1 gene region | 61 # skip positions outside of vp1 gene region |
58 continue | 62 continue |
59 al_base = al["align"][i - leading_gaps] | 63 al_base = al["align"][i - leading_gaps] |
60 consensus += al_base | |
61 has_secondary_basecall = False | 64 has_secondary_basecall = False |
62 if sec_is_conflict: | 65 if sec_is_conflict: |
63 gappedTrace = data["gappedTraces"][k] | 66 gappedTrace = data["gappedTraces"][k] |
64 pos = i - int(gappedTrace["leadingGaps"]) | 67 pos = i - int(gappedTrace["leadingGaps"]) |
65 # print(len(gappedTrace['basecallPos']), pos, k, len(gappedTrace['basecalls']), gappedTrace['basecallPos'][pos]) | 68 # print(len(gappedTrace['basecallPos']), pos, k, len(gappedTrace['basecalls']), gappedTrace['basecallPos'][pos]) |
108 if state == "M": | 111 if state == "M": |
109 # for mismatch store [pos_in_genome, pos_in_vp1, reference_base, sequenced_base] | 112 # for mismatch store [pos_in_genome, pos_in_vp1, reference_base, sequenced_base] |
110 mismatch_list.append( | 113 mismatch_list.append( |
111 [i + 1, i - offset + 1, reference["align"][i], mismatch_bases[i]] | 114 [i + 1, i - offset + 1, reference["align"][i], mismatch_bases[i]] |
112 ) | 115 ) |
116 if vp1only: | |
117 # we have trim consensus bases before vp1 region | |
118 if base_start <= offset: | |
119 cons_start = offset - base_start | |
120 else: | |
121 cons_start = 0 | |
122 consensus = data['gapFreeConsensus'][cons_start:cons_start + length] | |
123 else: | |
124 consensus = data['gapFreeConsensus'] | |
113 return [conflicts, matches, mismatches, mismatch_list, consensus] | 125 return [conflicts, matches, mismatches, mismatch_list, consensus] |
114 | 126 |
115 | 127 |
116 def analyse_trace_quality(json_file: TextIO) -> float: | 128 def analyse_trace_quality(json_file: TextIO) -> float: |
117 data = load_json(json_file) | 129 data = load_json(json_file) |
198 "perc_mismatches": percent_mismatches, | 210 "perc_mismatches": percent_mismatches, |
199 "consensus": best_consensus | 211 "consensus": best_consensus |
200 } | 212 } |
201 json.dump(info, open(args.output_filename, "w")) | 213 json.dump(info, open(args.output_filename, "w")) |
202 | 214 |
203 open(args.consensus_output_filename, "w").write(best_consensus) | 215 open(args.consensus_output_filename, "w").write(f'>{args.sample_name}\n' + fill(best_consensus)) |