# HG changeset patch # User sanbi-uwc # Date 1664257973 0 # Node ID acaaf49e2747354791d76fc7d1b4a2b266f09280 # Parent 852e76e7d22ae3620028d24ebfeebf3df13c3dd8 planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty diff -r 852e76e7d22a -r acaaf49e2747 assess_alignment.py --- a/assess_alignment.py Wed Sep 14 11:25:43 2022 +0000 +++ b/assess_alignment.py Tue Sep 27 05:52:53 2022 +0000 @@ -46,7 +46,8 @@ max_end = max([int(al["leadingGaps"]) + len(al["align"]) for al in msas]) base_state = ["n"] * len(reference["align"]) mismatch_bases = {} - for i, base in enumerate(reference["align"]): + consensus = '' + for i, reference_base in enumerate(reference["align"]): for k, al in enumerate(msas): leading_gaps = int(al["leadingGaps"]) align_len = len(al["align"]) @@ -56,6 +57,7 @@ # skip positions outside of vp1 gene region continue al_base = al["align"][i - leading_gaps] + consensus += al_base has_secondary_basecall = False if sec_is_conflict: gappedTrace = data["gappedTraces"][k] @@ -68,7 +70,7 @@ has_secondary_basecall = True # set this position to conflicted base_state[i] = "C" - if al_base != base: + if al_base != reference_base: # let's deal with all the cases where the base state doesn't match the reference if base_state[i] == "G": # the base state was G (a trace matches reference) and now we see a mismatch @@ -108,7 +110,7 @@ mismatch_list.append( [i + 1, i - offset + 1, reference["align"][i], mismatch_bases[i]] ) - return [conflicts, matches, mismatches, mismatch_list] + return [conflicts, matches, mismatches, mismatch_list, consensus] def analyse_trace_quality(json_file: TextIO) -> float: @@ -174,7 +176,7 @@ ) # take the name but remove any json suffix offset = offsets[dataset_name] length = lengths[dataset_name] - (conflicts, matches, mismatches, mismatch_list) = analyse_mismatches( + (conflicts, matches, mismatches, mismatch_list, consensus) = analyse_mismatches( open(json_filename), offset, length ) # analyse_mismatches(json_filename, True) @@ -184,7 +186,7 @@ best_match_mismatch_list = mismatch_list best_match_quality = quality best_match_reference = dataset_name - best_consensus = open(args.consensi[file_index]).read().replace('>Consensus', f'>{args.sample_name}') + best_consensus = consensus percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2) info = { @@ -194,6 +196,7 @@ "mismatch_list": best_match_mismatch_list, "quality": best_match_quality, "perc_mismatches": percent_mismatches, + "consensus": best_consensus } json.dump(info, open(args.output_filename, "w")) diff -r 852e76e7d22a -r acaaf49e2747 assess_poliovirus_alignment.xml --- a/assess_poliovirus_alignment.xml Wed Sep 14 11:25:43 2022 +0000 +++ b/assess_poliovirus_alignment.xml Tue Sep 27 05:52:53 2022 +0000 @@ -1,4 +1,4 @@ - + python @@ -37,7 +37,19 @@ - + + + + + + + + + + + + +