Mercurial > repos > sanbi-uwc > assess_poliovirus_alignment
comparison assess_alignment.py @ 2:31ca16290d4f draft
planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
author | sanbi-uwc |
---|---|
date | Thu, 21 Jul 2022 16:43:16 +0000 |
parents | 8bc106442b1a |
children | 1897677e107c |
comparison
equal
deleted
inserted
replaced
1:9105ec016911 | 2:31ca16290d4f |
---|---|
104 for i, state in enumerate(base_state): | 104 for i, state in enumerate(base_state): |
105 # i is in zero-based genome coordinates | 105 # i is in zero-based genome coordinates |
106 if state == "M": | 106 if state == "M": |
107 # for mismatch store [pos_in_genome, pos_in_vp1, reference_base, sequenced_base] | 107 # for mismatch store [pos_in_genome, pos_in_vp1, reference_base, sequenced_base] |
108 mismatch_list.append( | 108 mismatch_list.append( |
109 [i, i - offset, reference["align"][i], mismatch_bases[i]] | 109 [i, i - offset + 1, reference["align"][i], mismatch_bases[i]] |
110 ) | 110 ) |
111 return [conflicts, matches, mismatches, mismatch_list] | 111 return [conflicts, matches, mismatches, mismatch_list] |
112 | 112 |
113 | 113 |
114 def analyse_trace_quality(json_file: TextIO) -> float: | 114 def analyse_trace_quality(json_file: TextIO) -> float: |
151 ) | 151 ) |
152 parser.add_argument("--datasets", nargs="+") | 152 parser.add_argument("--datasets", nargs="+") |
153 args = parser.parse_args() | 153 args = parser.parse_args() |
154 | 154 |
155 offsets = { | 155 offsets = { |
156 "poliovirus1sabin": 2480, | 156 # these are in 0-based coordinates, so off-by-one from NCBI 1-based coordinates |
157 "poliovirus2sabin": 2482, | 157 "poliovirus1sabin": 2479, # V01150 |
158 "poliovirus3sabin": 2477, | 158 "poliovirus2sabin": 2481, # AY184220 |
159 "poliovirus3sabin": 2478, # X00925 | |
159 } | 160 } |
160 | 161 |
161 lengths = { | 162 lengths = { |
162 "poliovirus1sabin": 906, | 163 "poliovirus1sabin": 906, |
163 "poliovirus2sabin": 903, | 164 "poliovirus2sabin": 903, |
179 if min_mismatches is None or mismatches < min_mismatches: | 180 if min_mismatches is None or mismatches < min_mismatches: |
180 min_mismatches = mismatches | 181 min_mismatches = mismatches |
181 best_match_mismatch_list = mismatch_list | 182 best_match_mismatch_list = mismatch_list |
182 best_match_quality = quality | 183 best_match_quality = quality |
183 best_match_reference = dataset_name | 184 best_match_reference = dataset_name |
185 percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2) | |
184 | 186 |
185 info = { | 187 info = { |
186 "sample_name": args.sample_name, | 188 "sample_name": args.sample_name, |
187 "best_reference": best_match_reference, | 189 "best_reference": best_match_reference, |
188 "mismatches": min_mismatches, | 190 "mismatches": min_mismatches, |
189 "mismatch_list": best_match_mismatch_list, | 191 "mismatch_list": best_match_mismatch_list, |
190 "quality": best_match_quality, | 192 "quality": best_match_quality, |
193 "perc_mismatches": percent_mismatches | |
191 } | 194 } |
192 json.dump(info, open(args.output_filename, "w")) | 195 json.dump(info, open(args.output_filename, "w")) |