Mercurial > repos > sanbi-uwc > assess_poliovirus_alignment
changeset 9:acaaf49e2747 draft
planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
| author | sanbi-uwc | 
|---|---|
| date | Tue, 27 Sep 2022 05:52:53 +0000 | 
| parents | 852e76e7d22a | 
| children | fb905d0f8201 | 
| files | assess_alignment.py assess_poliovirus_alignment.xml | 
| diffstat | 2 files changed, 22 insertions(+), 7 deletions(-) [+] | 
line wrap: on
 line diff
--- a/assess_alignment.py Wed Sep 14 11:25:43 2022 +0000 +++ b/assess_alignment.py Tue Sep 27 05:52:53 2022 +0000 @@ -46,7 +46,8 @@ max_end = max([int(al["leadingGaps"]) + len(al["align"]) for al in msas]) base_state = ["n"] * len(reference["align"]) mismatch_bases = {} - for i, base in enumerate(reference["align"]): + consensus = '' + for i, reference_base in enumerate(reference["align"]): for k, al in enumerate(msas): leading_gaps = int(al["leadingGaps"]) align_len = len(al["align"]) @@ -56,6 +57,7 @@ # skip positions outside of vp1 gene region continue al_base = al["align"][i - leading_gaps] + consensus += al_base has_secondary_basecall = False if sec_is_conflict: gappedTrace = data["gappedTraces"][k] @@ -68,7 +70,7 @@ has_secondary_basecall = True # set this position to conflicted base_state[i] = "C" - if al_base != base: + if al_base != reference_base: # let's deal with all the cases where the base state doesn't match the reference if base_state[i] == "G": # the base state was G (a trace matches reference) and now we see a mismatch @@ -108,7 +110,7 @@ mismatch_list.append( [i + 1, i - offset + 1, reference["align"][i], mismatch_bases[i]] ) - return [conflicts, matches, mismatches, mismatch_list] + return [conflicts, matches, mismatches, mismatch_list, consensus] def analyse_trace_quality(json_file: TextIO) -> float: @@ -174,7 +176,7 @@ ) # take the name but remove any json suffix offset = offsets[dataset_name] length = lengths[dataset_name] - (conflicts, matches, mismatches, mismatch_list) = analyse_mismatches( + (conflicts, matches, mismatches, mismatch_list, consensus) = analyse_mismatches( open(json_filename), offset, length ) # analyse_mismatches(json_filename, True) @@ -184,7 +186,7 @@ best_match_mismatch_list = mismatch_list best_match_quality = quality best_match_reference = dataset_name - best_consensus = open(args.consensi[file_index]).read().replace('>Consensus', f'>{args.sample_name}') + best_consensus = consensus percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2) info = { @@ -194,6 +196,7 @@ "mismatch_list": best_match_mismatch_list, "quality": best_match_quality, "perc_mismatches": percent_mismatches, + "consensus": best_consensus } json.dump(info, open(args.output_filename, "w"))
--- a/assess_poliovirus_alignment.xml Wed Sep 14 11:25:43 2022 +0000 +++ b/assess_poliovirus_alignment.xml Tue Sep 27 05:52:53 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="assess_poliovirus_alignment" name="Assess poliovirus alignment" version="0.7.0+galaxy0" profile="21.05"> +<tool id="assess_poliovirus_alignment" name="Assess poliovirus alignment" version="0.8.0+galaxy0" profile="21.05"> <requirements> <requirement type="package" version="3.9">python</requirement> </requirements> @@ -37,7 +37,19 @@ <element name="poliovirus3sabin" value="sample1/poliovirus3sabin.json" /> </collection> </param> - <output name="output1" ftype="json" file="sample1_output.json" /> + <param name="tracy_consensus_results" ftype="fasta"> + <collection type="list"> + <element name="poliovirus1sabin" value="sample1/poliovirus1sabin.fasta" /> + <element name="poliovirus2sabin" value="sample1/poliovirus2sabin.fasta" /> + <element name="poliovirus3sabin" value="sample1/poliovirus3sabin.fasta" /> + </collection> + </param> + <output name="output1" ftype="json"> + <assert_contents> + <has_text text='"mismatch_list": [[2506, 25, "G", "A"], [2509, 28, "G", "A"], [2521, 40, "A", "G"], [2523, 42, "T", "C"], [2542, 61, "C", "T"], [2548, 67, "T", "C"], [2555, 74, "A", "G"], [2574, 93, "G", "A"], [2580, 99, "C", "T"], [2610, 129, "A", "G"], [2616, 135, "A", "G"], [2676, 195, "T", "C"], [2691, 210, "A", "G"], [2751, 270, "T", "C"], [2775, 294, "A", "G"], [2862, 381, "T", "C"], [2909, 428, "T", "C"], [2932, 451, "T", "C"], [2946, 465, "T", "C"], [3000, 519, "T", "C"], [3015, 534, "C", "T"], [3030, 549, "G", "A"], [3142, 661, "G", "A"]]' /> + </assert_contents> + </output> + <output name="output_consensus" ftype="fasta" file="sample1_best_consensus.fasta" /> </test> </tests> <help><