annotate qc.py @ 7:6bdec8c62aec draft default tip

"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 58ea5206cf78a4ff48fadedbe4e260c190d6f06e"
author sanbi-uwc
date Mon, 12 Apr 2021 15:53:58 +0000
parents 878f18249f76
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
1 #!/usr/bin/env python3
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
2
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
3 from Bio import SeqIO
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
4 import csv
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
5 import subprocess
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
6 import pandas as pd
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
7 import matplotlib.pyplot as plt
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
8 import shlex
6
878f18249f76 "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents: 0
diff changeset
9 import json
0
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
10
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
11 """
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
12 This script can incorporate as many QC checks as required
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
13 as long as it outputs a csv file containing a final column
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
14 headed with 'qc_pass' and rows for each sample indcating
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
15 'TRUE' if the overall QC check has passed or 'FALSE' if not.
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
16 """
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
17
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
18 def make_qc_plot(depth_pos, n_density, samplename, window=200):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
19 depth_df = pd.DataFrame( { 'position' : [pos[1] for pos in depth_pos], 'depth' : [dep[2] for dep in depth_pos] } )
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
20 depth_df['depth_moving_average'] = depth_df.iloc[:,1].rolling(window=window).mean()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
21
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
22 n_df = pd.DataFrame( { 'position' : [pos[0] for pos in n_density], 'n_density' : [dens[1] for dens in n_density] } )
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
23
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
24 fig, ax1 = plt.subplots()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
25 ax2 = ax1.twinx()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
26
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
27 ax1.set_xlabel('Position')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
28
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
29 ax1.set_ylabel('Depth', color = 'g')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
30 ax1.set_ylim(top=10**5, bottom=1)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
31 ax1.set_yscale('log')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
32 ax1.plot(depth_df['depth_moving_average'], color = 'g')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
33
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
34 ax2.set_ylabel('N density', color = 'r')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
35 ax2.plot(n_df['n_density'], color = 'r')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
36 ax2.set_ylim(top=1)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
37
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
38 plt.title(samplename)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
39 plt.savefig(samplename + '.depth.png')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
40
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
41 def read_depth_file(bamfile):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
42 p = subprocess.Popen(['samtools', 'depth', '-a', '-d', '0', bamfile],
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
43 stdout=subprocess.PIPE)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
44 out, err = p.communicate()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
45 counter = 0
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
46
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
47 pos_depth = []
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
48 for ln in out.decode('utf-8').split("\n"):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
49 if ln:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
50 pos_depth.append(ln.split("\t"))
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
51
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
52 return pos_depth
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
53
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
54
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
55 def get_covered_pos(pos_depth, min_depth):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
56 counter = 0
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
57 for contig, pos,depth in pos_depth:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
58 if int(depth) >= min_depth:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
59 counter = counter + 1
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
60
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
61 return counter
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
62
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
63 def get_N_positions(fasta):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
64 n_pos = [i for i, letter in enumerate(fasta.seq.lower()) if letter == 'n']
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
65
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
66 return n_pos
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
67
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
68 def get_pct_N_bases(fasta):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
69
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
70 count_N = len(get_N_positions(fasta))
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
71
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
72 pct_N_bases = count_N / len(fasta.seq) * 100
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
73
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
74 return pct_N_bases
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
75
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
76 def get_largest_N_gap(fasta):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
77 n_pos = get_N_positions(fasta)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
78
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
79 n_pos = [0] + n_pos + [len(fasta.seq)]
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
80
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
81 n_gaps = [j-i for i, j in zip(n_pos[:-1], n_pos[1:])]
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
82
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
83 return sorted(n_gaps)[-1]
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
84
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
85 def get_ref_length(ref):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
86 record = SeqIO.read(ref, "fasta")
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
87 return len(record.seq)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
88
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
89
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
90 def sliding_window_N_density(sequence, window=10):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
91
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
92 sliding_window_n_density = []
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
93 for i in range(0, len(sequence.seq), 1):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
94 window_mid = i + ( window / 2)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
95 window_seq = sequence.seq[i:i+window]
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
96 n_count = window_seq.lower().count('n')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
97 n_density = n_count / window
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
98
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
99 sliding_window_n_density.append( [ window_mid, n_density ] )
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
100
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
101 return sliding_window_n_density
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
102
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
103 def get_num_reads(bamfile):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
104
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
105 st_filter = '0x900'
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
106 command = 'samtools view -c -F{} {}'.format(st_filter, bamfile)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
107 what = shlex.split(command)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
108
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
109 return subprocess.check_output(what).decode().strip()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
110
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
111 def go(args):
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
112 if args.illumina:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
113 depth = 10
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
114 elif args.nanopore:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
115 depth = 20
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
116
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
117 ## Depth calcs
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
118 ref_length = get_ref_length(args.ref)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
119 depth_pos = read_depth_file(args.bam)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
120
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
121 depth_covered_bases = get_covered_pos(depth_pos, depth)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
122
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
123 pct_covered_bases = depth_covered_bases / ref_length * 100
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
124
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
125 ## Number of aligned reads calculaton
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
126 num_reads = get_num_reads(args.bam)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
127
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
128 # Unknown base calcs
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
129 fasta = SeqIO.read(args.fasta, "fasta")
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
130
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
131 pct_N_bases = 0
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
132 largest_N_gap = 0
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
133 qc_pass = "FALSE"
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
134
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
135 if len(fasta.seq) != 0:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
136
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
137 pct_N_bases = get_pct_N_bases(fasta)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
138 largest_N_gap = get_largest_N_gap(fasta)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
139
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
140 # QC PASS / FAIL
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
141 if largest_N_gap >= 10000 or pct_N_bases < 50.0:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
142 qc_pass = "TRUE"
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
143
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
144
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
145 qc_line = { 'sample_name' : args.sample,
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
146 'pct_N_bases' : "{:.2f}".format(pct_N_bases),
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
147 'pct_covered_bases' : "{:.2f}".format(pct_covered_bases),
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
148 'longest_no_N_run' : largest_N_gap,
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
149 'num_aligned_reads' : num_reads,
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
150 'fasta': args.fasta,
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
151 'bam' : args.bam,
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
152 'qc_pass' : qc_pass}
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
153
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
154
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
155 with open(args.outfile, 'w') as csvfile:
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
156 header = qc_line.keys()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
157 writer = csv.DictWriter(csvfile, fieldnames=header)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
158 writer.writeheader()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
159 writer.writerow(qc_line)
6
878f18249f76 "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents: 0
diff changeset
160
878f18249f76 "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents: 0
diff changeset
161 # write a json file from the dict
878f18249f76 "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents: 0
diff changeset
162 json_file = args.outfile + '.json'
7
6bdec8c62aec "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 58ea5206cf78a4ff48fadedbe4e260c190d6f06e"
sanbi-uwc
parents: 6
diff changeset
163 with open(json_file, 'w') as jsonfile:
6bdec8c62aec "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 58ea5206cf78a4ff48fadedbe4e260c190d6f06e"
sanbi-uwc
parents: 6
diff changeset
164 json.dump(qc_line, jsonfile)
6
878f18249f76 "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents: 0
diff changeset
165
0
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
166 N_density = sliding_window_N_density(fasta)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
167 make_qc_plot(depth_pos, N_density, args.sample)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
168
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
169 def main():
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
170 import argparse
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
171
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
172 parser = argparse.ArgumentParser()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
173 group = parser.add_mutually_exclusive_group(required=True)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
174 group.add_argument('--nanopore', action='store_true')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
175 group.add_argument('--illumina', action='store_true')
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
176 parser.add_argument('--outfile', required=True)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
177 parser.add_argument('--sample', required=True)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
178 parser.add_argument('--ref', required=True)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
179 parser.add_argument('--bam', required=True)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
180 parser.add_argument('--fasta', required=True)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
181
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
182 args = parser.parse_args()
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
183 go(args)
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
184
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
185 if __name__ == "__main__":
2aa014ad54bc "planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff changeset
186 main()