diff summarize_alignment.py @ 0:7e49c6b19f5e draft

planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
author sanbi-uwc
date Tue, 19 Jul 2022 11:47:08 +0000
parents
children 5c8dfc4d9d68
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/summarize_alignment.py	Tue Jul 19 11:47:08 2022 +0000
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+
+def comma_split(args: str) -> list[str]:
+    return args.split(",")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--summary_output_filename", help="Path to summary output file")
+    parser.add_argument("--variant_list_outputs", nargs="+")
+    parser.add_argument("--datasets", nargs="+")
+    args = parser.parse_args()
+
+    summary_output = open(args.summary_output_filename, "w")
+    for i, json_filename in enumerate(args.datasets):
+        alignment_summary = json.load(open(json_filename))
+        mismatch_list = ";".join(
+            [
+                ":".join([str(part) for part in el[1:]])
+                for el in alignment_summary["mismatch_list"]
+            ]
+        )
+        print(
+            alignment_summary["sample_name"],
+            alignment_summary["best_reference"],
+            alignment_summary["mismatches"],
+            round(alignment_summary["quality"], 2),
+            mismatch_list,
+            sep="\t",
+            file=summary_output,
+        )
+        variant_list_output = open(args.variant_list_outputs[i], "w")
+        print('genome pos', 'VP1 pos', 'ref', 'sequence', sep='\t', file=variant_list_output)
+        for variant in alignment_summary["mismatch_list"]:
+            print("\t".join([str(el) for el in variant]), file=variant_list_output)
+        variant_list_output.close()