view summarize_alignment.py @ 5:7bcfc97b9284 draft default tip

planemo upload for repository https://github.com/pvanheus/polio_report commit 753aad311378b064f2152c8e99e7c8097c7f4321-dirty
author sanbi-uwc
date Fri, 11 Nov 2022 06:19:00 +0000
parents 5c8dfc4d9d68
children
line wrap: on
line source

#!/usr/bin/env python

import argparse
import json

def comma_split(args: str) -> list[str]:
    return args.split(",")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--summary_output_filename", help="Path to summary output file")
    parser.add_argument("--variant_list_outputs", nargs="+")
    parser.add_argument("--datasets", nargs="+")
    args = parser.parse_args()

    galaxy_metadata = {}
    summary_output = open(args.summary_output_filename, "w")
    for i, json_filename in enumerate(args.datasets):
        alignment_summary = json.load(open(json_filename))
        mismatch_list = ";".join(
            [
                ":".join([str(part) for part in el[1:]])
                for el in alignment_summary["mismatch_list"]
            ]
        )
        print(
            alignment_summary["sample_name"],
            alignment_summary["best_reference"],
            alignment_summary["mismatches"],
            alignment_summary["perc_mismatches"],
            round(alignment_summary["quality"], 2),
            alignment_summary["vp1_coverage_perc"],
            mismatch_list,
            sep="\t",
            file=summary_output,
        )
        variant_list_output = open(args.variant_list_outputs[i], "w")
        print('genome pos', 'VP1 pos', 'ref', 'sequence', sep='\t', file=variant_list_output)
        for variant in alignment_summary["mismatch_list"]:
            print("\t".join([str(el) for el in variant]), file=variant_list_output)
        variant_list_output.close()