assess_poliovirus_alignment: assess_alignment.py comparison

planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty

comparison

equal deleted inserted replaced

-:9105ec016911
+:31ca16290d4f
 for i, state in enumerate(base_state):
 # i is in zero-based genome coordinates
 if state == "M":
 # for mismatch store [pos_in_genome, pos_in_vp1, reference_base, sequenced_base]
 mismatch_list.append(
-[i, i - offset, reference["align"][i], mismatch_bases[i]]
+[i, i - offset + 1, reference["align"][i], mismatch_bases[i]]
 )
 return [conflicts, matches, mismatches, mismatch_list]
 def analyse_trace_quality(json_file: TextIO) -> float:
 )
 parser.add_argument("--datasets", nargs="+")
 args = parser.parse_args()
 offsets = {
-"poliovirus1sabin": 2480,
+# these are in 0-based coordinates, so off-by-one from NCBI 1-based coordinates
-"poliovirus2sabin": 2482,
+"poliovirus1sabin": 2479, # V01150
-"poliovirus3sabin": 2477,
+"poliovirus2sabin": 2481, # AY184220
+"poliovirus3sabin": 2478, # X00925
 }
 lengths = {
 "poliovirus1sabin": 906,
 "poliovirus2sabin": 903,
 if min_mismatches is None or mismatches < min_mismatches:
 min_mismatches = mismatches
 best_match_mismatch_list = mismatch_list
 best_match_quality = quality
 best_match_reference = dataset_name
+percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2)
 info = {
 "sample_name": args.sample_name,
 "best_reference": best_match_reference,
 "mismatches": min_mismatches,
 "mismatch_list": best_match_mismatch_list,
 "quality": best_match_quality,
+"perc_mismatches": percent_mismatches
 }
 json.dump(info, open(args.output_filename, "w"))

Mercurial > repos > sanbi-uwc > assess_poliovirus_alignment