Mercurial > repos > sanbi-uwc > summarize_poliovirus_alignment
annotate summarize_alignment.py @ 5:7bcfc97b9284 draft default tip
planemo upload for repository https://github.com/pvanheus/polio_report commit 753aad311378b064f2152c8e99e7c8097c7f4321-dirty
author | sanbi-uwc |
---|---|
date | Fri, 11 Nov 2022 06:19:00 +0000 |
parents | 5c8dfc4d9d68 |
children |
rev | line source |
---|---|
0
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
2 |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
3 import argparse |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
4 import json |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
5 |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
6 def comma_split(args: str) -> list[str]: |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
7 return args.split(",") |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
8 |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
9 |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
10 if __name__ == "__main__": |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
11 parser = argparse.ArgumentParser() |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
12 parser.add_argument("--summary_output_filename", help="Path to summary output file") |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
13 parser.add_argument("--variant_list_outputs", nargs="+") |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
14 parser.add_argument("--datasets", nargs="+") |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
15 args = parser.parse_args() |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
16 |
2
5c8dfc4d9d68
planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
0
diff
changeset
|
17 galaxy_metadata = {} |
0
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
18 summary_output = open(args.summary_output_filename, "w") |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
19 for i, json_filename in enumerate(args.datasets): |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
20 alignment_summary = json.load(open(json_filename)) |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
21 mismatch_list = ";".join( |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
22 [ |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
23 ":".join([str(part) for part in el[1:]]) |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
24 for el in alignment_summary["mismatch_list"] |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
25 ] |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
26 ) |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
27 print( |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
28 alignment_summary["sample_name"], |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
29 alignment_summary["best_reference"], |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
30 alignment_summary["mismatches"], |
2
5c8dfc4d9d68
planemo upload for repository https://github.com/pvanheus/polio_report commit a99e10fec2fac5aae70974c977eb3b362a1a8429-dirty
sanbi-uwc
parents:
0
diff
changeset
|
31 alignment_summary["perc_mismatches"], |
0
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
32 round(alignment_summary["quality"], 2), |
5
7bcfc97b9284
planemo upload for repository https://github.com/pvanheus/polio_report commit 753aad311378b064f2152c8e99e7c8097c7f4321-dirty
sanbi-uwc
parents:
2
diff
changeset
|
33 alignment_summary["vp1_coverage_perc"], |
0
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
34 mismatch_list, |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
35 sep="\t", |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
36 file=summary_output, |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
37 ) |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
38 variant_list_output = open(args.variant_list_outputs[i], "w") |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
39 print('genome pos', 'VP1 pos', 'ref', 'sequence', sep='\t', file=variant_list_output) |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
40 for variant in alignment_summary["mismatch_list"]: |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
41 print("\t".join([str(el) for el in variant]), file=variant_list_output) |
7e49c6b19f5e
planemo upload commit a99e10fec2fac5aae70974c977eb3b362a1a8429
sanbi-uwc
parents:
diff
changeset
|
42 variant_list_output.close() |