annotate summarize_alignment.py @ 5:7bcfc97b9284 draft default tip

planemo upload for repository https://github.com/pvanheus/polio_report commit 753aad311378b064f2152c8e99e7c8097c7f4321-dirty
author sanbi-uwc
date Fri, 11 Nov 2022 06:19:00 +0000
parents 5c8dfc4d9d68
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
1 #!/usr/bin/env python
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
2
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
3 import argparse
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
4 import json
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
5
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
6 def comma_split(args: str) -> list[str]:
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
7 return args.split(",")
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
8
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
9
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
10 if __name__ == "__main__":
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
11 parser = argparse.ArgumentParser()
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
12 parser.add_argument("--summary_output_filename", help="Path to summary output file")
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
13 parser.add_argument("--variant_list_outputs", nargs="+")
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
14 parser.add_argument("--datasets", nargs="+")
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
15 args = parser.parse_args()
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
16
2
5c8dfc4d9d68 planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 0
diff changeset
17 galaxy_metadata = {}
0
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
18 summary_output = open(args.summary_output_filename, "w")
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
19 for i, json_filename in enumerate(args.datasets):
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
20 alignment_summary = json.load(open(json_filename))
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
21 mismatch_list = ";".join(
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
22 [
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
23 ":".join([str(part) for part in el[1:]])
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
24 for el in alignment_summary["mismatch_list"]
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
25 ]
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
26 )
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
27 print(
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
28 alignment_summary["sample_name"],
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
29 alignment_summary["best_reference"],
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
30 alignment_summary["mismatches"],
2
5c8dfc4d9d68 planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents: 0
diff changeset
31 alignment_summary["perc_mismatches"],
0
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
32 round(alignment_summary["quality"], 2),
5
7bcfc97b9284 planemo upload for repository https://github.com/pvanheus/polio_report commit 753aad311378b064f2152c8e99e7c8097c7f4321-dirty
sanbi-uwc
parents: 2
diff changeset
33 alignment_summary["vp1_coverage_perc"],
0
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
34 mismatch_list,
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
35 sep="\t",
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
36 file=summary_output,
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
37 )
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
38 variant_list_output = open(args.variant_list_outputs[i], "w")
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
39 print('genome pos', 'VP1 pos', 'ref', 'sequence', sep='\t', file=variant_list_output)
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
40 for variant in alignment_summary["mismatch_list"]:
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
41 print("\t".join([str(el) for el in variant]), file=variant_list_output)
7e49c6b19f5e planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff changeset
42 variant_list_output.close()