changeset 9:acaaf49e2747 draft

planemo upload for repository https://github.com/pvanheus/polio_report commit aa90f911e6269aba792c9814c98a659e631400b2-dirty
author sanbi-uwc
date Tue, 27 Sep 2022 05:52:53 +0000
parents 852e76e7d22a
children fb905d0f8201
files assess_alignment.py assess_poliovirus_alignment.xml
diffstat 2 files changed, 22 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/assess_alignment.py	Wed Sep 14 11:25:43 2022 +0000
+++ b/assess_alignment.py	Tue Sep 27 05:52:53 2022 +0000
@@ -46,7 +46,8 @@
     max_end = max([int(al["leadingGaps"]) + len(al["align"]) for al in msas])
     base_state = ["n"] * len(reference["align"])
     mismatch_bases = {}
-    for i, base in enumerate(reference["align"]):
+    consensus = ''
+    for i, reference_base in enumerate(reference["align"]):
         for k, al in enumerate(msas):
             leading_gaps = int(al["leadingGaps"])
             align_len = len(al["align"])
@@ -56,6 +57,7 @@
                     # skip positions outside of vp1 gene region
                     continue
                 al_base = al["align"][i - leading_gaps]
+                consensus += al_base
                 has_secondary_basecall = False
                 if sec_is_conflict:
                     gappedTrace = data["gappedTraces"][k]
@@ -68,7 +70,7 @@
                         has_secondary_basecall = True
                         # set this position to conflicted
                         base_state[i] = "C"
-                if al_base != base:
+                if al_base != reference_base:
                     # let's deal with all the cases where the base state doesn't match the reference
                     if base_state[i] == "G":
                         # the base state was G (a trace matches reference) and now we see a mismatch
@@ -108,7 +110,7 @@
             mismatch_list.append(
                 [i + 1, i - offset + 1, reference["align"][i], mismatch_bases[i]]
             )
-    return [conflicts, matches, mismatches, mismatch_list]
+    return [conflicts, matches, mismatches, mismatch_list, consensus]
 
 
 def analyse_trace_quality(json_file: TextIO) -> float:
@@ -174,7 +176,7 @@
         )  # take the name but remove any json suffix
         offset = offsets[dataset_name]
         length = lengths[dataset_name]
-        (conflicts, matches, mismatches, mismatch_list) = analyse_mismatches(
+        (conflicts, matches, mismatches, mismatch_list, consensus) = analyse_mismatches(
             open(json_filename), offset, length
         )
         # analyse_mismatches(json_filename, True)
@@ -184,7 +186,7 @@
             best_match_mismatch_list = mismatch_list
             best_match_quality = quality
             best_match_reference = dataset_name
-            best_consensus = open(args.consensi[file_index]).read().replace('>Consensus', f'>{args.sample_name}')
+            best_consensus = consensus
             percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2)
 
     info = {
@@ -194,6 +196,7 @@
         "mismatch_list": best_match_mismatch_list,
         "quality": best_match_quality,
         "perc_mismatches": percent_mismatches,
+        "consensus": best_consensus
     }
     json.dump(info, open(args.output_filename, "w"))
 
--- a/assess_poliovirus_alignment.xml	Wed Sep 14 11:25:43 2022 +0000
+++ b/assess_poliovirus_alignment.xml	Tue Sep 27 05:52:53 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="assess_poliovirus_alignment" name="Assess poliovirus alignment" version="0.7.0+galaxy0" profile="21.05">
+<tool id="assess_poliovirus_alignment" name="Assess poliovirus alignment" version="0.8.0+galaxy0" profile="21.05">
     <requirements>
         <requirement type="package" version="3.9">python</requirement>
     </requirements>
@@ -37,7 +37,19 @@
                     <element name="poliovirus3sabin" value="sample1/poliovirus3sabin.json" />
                 </collection>
             </param>
-            <output name="output1" ftype="json" file="sample1_output.json" />
+            <param name="tracy_consensus_results" ftype="fasta">
+                <collection type="list">
+                    <element name="poliovirus1sabin" value="sample1/poliovirus1sabin.fasta" />
+                    <element name="poliovirus2sabin" value="sample1/poliovirus2sabin.fasta" />
+                    <element name="poliovirus3sabin" value="sample1/poliovirus3sabin.fasta" />
+                </collection>
+            </param>
+            <output name="output1" ftype="json">
+                <assert_contents>
+                    <has_text text='"mismatch_list": [[2506, 25, "G", "A"], [2509, 28, "G", "A"], [2521, 40, "A", "G"], [2523, 42, "T", "C"], [2542, 61, "C", "T"], [2548, 67, "T", "C"], [2555, 74, "A", "G"], [2574, 93, "G", "A"], [2580, 99, "C", "T"], [2610, 129, "A", "G"], [2616, 135, "A", "G"], [2676, 195, "T", "C"], [2691, 210, "A", "G"], [2751, 270, "T", "C"], [2775, 294, "A", "G"], [2862, 381, "T", "C"], [2909, 428, "T", "C"], [2932, 451, "T", "C"], [2946, 465, "T", "C"], [3000, 519, "T", "C"], [3015, 534, "C", "T"], [3030, 549, "G", "A"], [3142, 661, "G", "A"]]' />
+                </assert_contents>
+            </output>
+            <output name="output_consensus" ftype="fasta" file="sample1_best_consensus.fasta" />
         </test>
     </tests>
     <help><![CDATA[