Mercurial > repos > sanbi-uwc > assess_poliovirus_alignment

--- a/assess_alignment.py	Tue Sep 27 18:56:28 2022 +0000
+++ b/assess_alignment.py	Fri Nov 11 06:18:15 2022 +0000
@@ -114,18 +114,20 @@
                 [i + 1, i - offset + 1, reference["align"][i], mismatch_bases[i]]
             )
     if vp1only:
-        # we have trim consensus bases before vp1 region
-        if base_start <= offset:
-            cons_start = offset - base_start
-        else:
-            cons_start = 0
-        consensus = data['gapFreeConsensus'][cons_start:cons_start + length]
+        # a note on gap trimming: if the consensus is smaller than VP1, we might
+        # get gaps on either side. these will of course be trimmed.
+        # we don't expect gaps inside the consensus but in the rare event that
+        # these occur they are also discarded.
+        consensus = data['gappedConsensus'][offset:offset+length].replace('-','')
     else:
+        # TODO: sometimes there is "orphan" sequence from false calls far in front
+        # or behind the consensus region. these should be removed somehow
         consensus = data['gapFreeConsensus']
     return [conflicts, matches, mismatches, mismatch_list, consensus]


 def analyse_trace_quality(json_file: TextIO) -> float:
+    # TODO: consider clipping this to VP1 region or at least aligned region
     data = load_json(json_file)

     traces = data["gappedTraces"]
@@ -139,8 +141,15 @@
             calls = trace["peak" + base][start : end + 1]
             min_call = min(calls)
             max_call = max(calls)
+            if len(calls) == 0:
+                # no calls for this base to deal with, skip it
+                continue
             avg_call = sum(calls) / len(calls)
-            ratio = max_call / avg_call
+            if avg_call == 0:
+                # zero average base quality!
+                ratio = 0
+            else:
+                ratio = max_call / avg_call
             call_quality["avg" + base] = avg_call
             call_quality["min" + base] = min_call
             call_quality["max" + base] = max_call
@@ -148,7 +157,10 @@
             avg_ratio += ratio
         avg_ratio = avg_ratio / 4
         overall_avg += avg_ratio
-    overall_avg = overall_avg / len(traces)
+    if len(traces) > 0:
+        overall_avg = overall_avg / len(traces)
+    else:
+        overall_avg = 0
     return overall_avg


@@ -198,7 +210,8 @@
             best_match_quality = quality
             best_match_reference = dataset_name
             best_consensus = consensus
-            percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2)
+            percent_mismatches = round(min_mismatches / lengths[best_match_reference] * 100, 2),
+            vp1_coverage_perc = round(len(consensus) / lengths[best_match_reference] * 100)

     info = {
         "sample_name": args.sample_name,
@@ -207,8 +220,14 @@
         "mismatch_list": best_match_mismatch_list,
         "quality": best_match_quality,
         "perc_mismatches": percent_mismatches,
-        "consensus": best_consensus
-    }
+        "consensus": best_consensus,
+        "vp1_coverage_perc": vp1_coverage_perc
+     }
     json.dump(info, open(args.output_filename, "w"))

-    open(args.consensus_output_filename, "w").write(f'>{args.sample_name}\n' + fill(best_consensus) + '\n')
+    if len(best_consensus):
+        consensus_record = f'>{args.sample_name}\n' + fill(best_consensus) + '\n'
+    else:
+        # better to write an empty FASTA file than one with just a header
+        consensus_record = ''
+    open(args.consensus_output_filename, "w").write(consensus_record)
--- a/assess_poliovirus_alignment.xml	Tue Sep 27 18:56:28 2022 +0000
+++ b/assess_poliovirus_alignment.xml	Fri Nov 11 06:18:15 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="assess_poliovirus_alignment" name="Assess poliovirus alignment" version="0.11.0+galaxy0" profile="21.05">
+<tool id="assess_poliovirus_alignment" name="Assess poliovirus alignment" version="0.12.0+galaxy0" profile="21.05">
     <requirements>
         <requirement type="package" version="3.9">python</requirement>
     </requirements>