Mercurial > repos > sanbi-uwc > qc
annotate qc.py @ 7:6bdec8c62aec draft default tip
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 58ea5206cf78a4ff48fadedbe4e260c190d6f06e"
author | sanbi-uwc |
---|---|
date | Mon, 12 Apr 2021 15:53:58 +0000 |
parents | 878f18249f76 |
children |
rev | line source |
---|---|
0
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
2 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
3 from Bio import SeqIO |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
4 import csv |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
5 import subprocess |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
6 import pandas as pd |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
7 import matplotlib.pyplot as plt |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
8 import shlex |
6
878f18249f76
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents:
0
diff
changeset
|
9 import json |
0
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
10 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
11 """ |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
12 This script can incorporate as many QC checks as required |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
13 as long as it outputs a csv file containing a final column |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
14 headed with 'qc_pass' and rows for each sample indcating |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
15 'TRUE' if the overall QC check has passed or 'FALSE' if not. |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
16 """ |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
17 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
18 def make_qc_plot(depth_pos, n_density, samplename, window=200): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
19 depth_df = pd.DataFrame( { 'position' : [pos[1] for pos in depth_pos], 'depth' : [dep[2] for dep in depth_pos] } ) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
20 depth_df['depth_moving_average'] = depth_df.iloc[:,1].rolling(window=window).mean() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
21 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
22 n_df = pd.DataFrame( { 'position' : [pos[0] for pos in n_density], 'n_density' : [dens[1] for dens in n_density] } ) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
23 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
24 fig, ax1 = plt.subplots() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
25 ax2 = ax1.twinx() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
26 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
27 ax1.set_xlabel('Position') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
28 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
29 ax1.set_ylabel('Depth', color = 'g') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
30 ax1.set_ylim(top=10**5, bottom=1) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
31 ax1.set_yscale('log') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
32 ax1.plot(depth_df['depth_moving_average'], color = 'g') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
33 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
34 ax2.set_ylabel('N density', color = 'r') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
35 ax2.plot(n_df['n_density'], color = 'r') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
36 ax2.set_ylim(top=1) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
37 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
38 plt.title(samplename) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
39 plt.savefig(samplename + '.depth.png') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
40 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
41 def read_depth_file(bamfile): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
42 p = subprocess.Popen(['samtools', 'depth', '-a', '-d', '0', bamfile], |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
43 stdout=subprocess.PIPE) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
44 out, err = p.communicate() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
45 counter = 0 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
46 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
47 pos_depth = [] |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
48 for ln in out.decode('utf-8').split("\n"): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
49 if ln: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
50 pos_depth.append(ln.split("\t")) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
51 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
52 return pos_depth |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
53 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
54 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
55 def get_covered_pos(pos_depth, min_depth): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
56 counter = 0 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
57 for contig, pos,depth in pos_depth: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
58 if int(depth) >= min_depth: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
59 counter = counter + 1 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
60 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
61 return counter |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
62 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
63 def get_N_positions(fasta): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
64 n_pos = [i for i, letter in enumerate(fasta.seq.lower()) if letter == 'n'] |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
65 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
66 return n_pos |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
67 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
68 def get_pct_N_bases(fasta): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
69 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
70 count_N = len(get_N_positions(fasta)) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
71 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
72 pct_N_bases = count_N / len(fasta.seq) * 100 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
73 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
74 return pct_N_bases |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
75 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
76 def get_largest_N_gap(fasta): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
77 n_pos = get_N_positions(fasta) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
78 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
79 n_pos = [0] + n_pos + [len(fasta.seq)] |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
80 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
81 n_gaps = [j-i for i, j in zip(n_pos[:-1], n_pos[1:])] |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
82 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
83 return sorted(n_gaps)[-1] |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
84 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
85 def get_ref_length(ref): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
86 record = SeqIO.read(ref, "fasta") |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
87 return len(record.seq) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
88 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
89 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
90 def sliding_window_N_density(sequence, window=10): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
91 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
92 sliding_window_n_density = [] |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
93 for i in range(0, len(sequence.seq), 1): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
94 window_mid = i + ( window / 2) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
95 window_seq = sequence.seq[i:i+window] |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
96 n_count = window_seq.lower().count('n') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
97 n_density = n_count / window |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
98 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
99 sliding_window_n_density.append( [ window_mid, n_density ] ) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
100 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
101 return sliding_window_n_density |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
102 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
103 def get_num_reads(bamfile): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
104 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
105 st_filter = '0x900' |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
106 command = 'samtools view -c -F{} {}'.format(st_filter, bamfile) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
107 what = shlex.split(command) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
108 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
109 return subprocess.check_output(what).decode().strip() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
110 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
111 def go(args): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
112 if args.illumina: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
113 depth = 10 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
114 elif args.nanopore: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
115 depth = 20 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
116 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
117 ## Depth calcs |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
118 ref_length = get_ref_length(args.ref) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
119 depth_pos = read_depth_file(args.bam) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
120 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
121 depth_covered_bases = get_covered_pos(depth_pos, depth) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
122 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
123 pct_covered_bases = depth_covered_bases / ref_length * 100 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
124 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
125 ## Number of aligned reads calculaton |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
126 num_reads = get_num_reads(args.bam) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
127 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
128 # Unknown base calcs |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
129 fasta = SeqIO.read(args.fasta, "fasta") |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
130 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
131 pct_N_bases = 0 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
132 largest_N_gap = 0 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
133 qc_pass = "FALSE" |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
134 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
135 if len(fasta.seq) != 0: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
136 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
137 pct_N_bases = get_pct_N_bases(fasta) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
138 largest_N_gap = get_largest_N_gap(fasta) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
139 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
140 # QC PASS / FAIL |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
141 if largest_N_gap >= 10000 or pct_N_bases < 50.0: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
142 qc_pass = "TRUE" |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
143 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
144 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
145 qc_line = { 'sample_name' : args.sample, |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
146 'pct_N_bases' : "{:.2f}".format(pct_N_bases), |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
147 'pct_covered_bases' : "{:.2f}".format(pct_covered_bases), |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
148 'longest_no_N_run' : largest_N_gap, |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
149 'num_aligned_reads' : num_reads, |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
150 'fasta': args.fasta, |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
151 'bam' : args.bam, |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
152 'qc_pass' : qc_pass} |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
153 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
154 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
155 with open(args.outfile, 'w') as csvfile: |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
156 header = qc_line.keys() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
157 writer = csv.DictWriter(csvfile, fieldnames=header) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
158 writer.writeheader() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
159 writer.writerow(qc_line) |
6
878f18249f76
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents:
0
diff
changeset
|
160 |
878f18249f76
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents:
0
diff
changeset
|
161 # write a json file from the dict |
878f18249f76
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents:
0
diff
changeset
|
162 json_file = args.outfile + '.json' |
7
6bdec8c62aec
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 58ea5206cf78a4ff48fadedbe4e260c190d6f06e"
sanbi-uwc
parents:
6
diff
changeset
|
163 with open(json_file, 'w') as jsonfile: |
6bdec8c62aec
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 58ea5206cf78a4ff48fadedbe4e260c190d6f06e"
sanbi-uwc
parents:
6
diff
changeset
|
164 json.dump(qc_line, jsonfile) |
6
878f18249f76
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 21d348651ca466563cc507b9e3a3fbd6c2b0a016"
sanbi-uwc
parents:
0
diff
changeset
|
165 |
0
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
166 N_density = sliding_window_N_density(fasta) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
167 make_qc_plot(depth_pos, N_density, args.sample) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
168 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
169 def main(): |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
170 import argparse |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
171 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
172 parser = argparse.ArgumentParser() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
173 group = parser.add_mutually_exclusive_group(required=True) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
174 group.add_argument('--nanopore', action='store_true') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
175 group.add_argument('--illumina', action='store_true') |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
176 parser.add_argument('--outfile', required=True) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
177 parser.add_argument('--sample', required=True) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
178 parser.add_argument('--ref', required=True) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
179 parser.add_argument('--bam', required=True) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
180 parser.add_argument('--fasta', required=True) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
181 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
182 args = parser.parse_args() |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
183 go(args) |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
184 |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
185 if __name__ == "__main__": |
2aa014ad54bc
"planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc/tree/master/tools/qc commit 54450d56a66bd4b01a17c8ec8aa3e649e3e4749f-dirty"
sanbi-uwc
parents:
diff
changeset
|
186 main() |