# HG changeset patch # User sanbi-uwc # Date 1658421796 0 # Node ID 31ca16290d4f64c24d80d9ef632d49d743200240 # Parent 9105ec01691169b101ff851ac9d8d597ce26ac01 planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty diff -r 9105ec016911 -r 31ca16290d4f assess_alignment.py --- a/assess_alignment.py Tue Jul 19 11:57:09 2022 +0000 +++ b/assess_alignment.py Thu Jul 21 16:43:16 2022 +0000 @@ -106,7 +106,7 @@ if state == "M": # for mismatch store [pos_in_genome, pos_in_vp1, reference_base, sequenced_base] mismatch_list.append( - [i, i - offset, reference["align"][i], mismatch_bases[i]] + [i, i - offset + 1, reference["align"][i], mismatch_bases[i]] ) return [conflicts, matches, mismatches, mismatch_list] @@ -153,9 +153,10 @@ args = parser.parse_args() offsets = { - "poliovirus1sabin": 2480, - "poliovirus2sabin": 2482, - "poliovirus3sabin": 2477, + # these are in 0-based coordinates, so off-by-one from NCBI 1-based coordinates + "poliovirus1sabin": 2479, # V01150 + "poliovirus2sabin": 2481, # AY184220 + "poliovirus3sabin": 2478, # X00925 } lengths = { @@ -181,6 +182,7 @@ best_match_mismatch_list = mismatch_list best_match_quality = quality best_match_reference = dataset_name + percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2) info = { "sample_name": args.sample_name, @@ -188,5 +190,6 @@ "mismatches": min_mismatches, "mismatch_list": best_match_mismatch_list, "quality": best_match_quality, + "perc_mismatches": percent_mismatches } json.dump(info, open(args.output_filename, "w")) diff -r 9105ec016911 -r 31ca16290d4f assess_poliovirus_alignment.xml --- a/assess_poliovirus_alignment.xml Tue Jul 19 11:57:09 2022 +0000 +++ b/assess_poliovirus_alignment.xml Thu Jul 21 16:43:16 2022 +0000 @@ -1,4 +1,4 @@ - + python