annotate github_downloaded/1005a47/margin_cons_medaka.py @ 7:be08b76fd78b draft default tip

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
author dfornika
date Tue, 17 Mar 2020 22:05:59 +0000
parents 47eb99e41bfb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
2 from Bio import SeqIO
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
3 import sys
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
4 import vcf
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
5 import subprocess
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
6 from collections import defaultdict
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
7 import os.path
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
8 import operator
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
9 from .vcftagprimersites import read_bed_file
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
10 import argparse
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
11
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
12 def collect_depths(bamfile):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
13 if not os.path.exists(bamfile):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
14 raise SystemExit("bamfile %s doesn't exist" % (bamfile,))
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
15
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
16 p = subprocess.Popen(['samtools', 'depth', bamfile],
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
17 stdout=subprocess.PIPE)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
18 out, err = p.communicate()
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
19 depths = defaultdict(dict)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
20 for ln in out.decode('utf-8').split("\n"):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
21 if ln:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
22 contig, pos, depth = ln.split("\t")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
23 depths[contig][int(pos)] = int(depth)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
24 return depths
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
25
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
26 class Reporter:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
27 def __init__(self, vcffile, depths):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
28 self.vcffile = vcffile
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
29 self.depths = depths
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
30
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
31 def report(self, r, status, allele):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
32 idfile = os.path.basename(self.vcffile).split(".")[0]
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
33 print("%s\t%s\tstatus\t%s" % (idfile, r.POS, status), file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
34 print("%s\t%s\tallele\t%s" % (idfile, r.POS, allele), file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
35 print("%s\t%s\tref\t%s" % (idfile, r.POS, r.REF), file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
36 print("%s\t%s\tdepth\t%s" % (idfile, r.POS, self.depths[r.CHROM][r.POS]), file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
37
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
38 def go(args):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
39 MASKED_POSITIONS = defaultdict(set)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
40
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
41 depths = collect_depths(args.bamfile)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
42 reporter = Reporter(args.vcffile, depths)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
43
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
44 seqs = dict([(rec.id, rec) for rec in SeqIO.parse(open(args.reference), "fasta")])
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
45 cons = {}
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
46 for k in seqs.keys():
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
47 cons[k] = list(seqs[k].seq)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
48
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
49 for n, c in enumerate(cons[k]):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
50 try:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
51 depth = depths[k][n+1]
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
52 except:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
53 depth = 0
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
54
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
55 if depth < args.depth:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
56 cons[k][n] = 'N'
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
57
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
58 if args.masked:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
59 for region in args.masked.split(","):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
60 contig, positions = region.split(":")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
61 start, end = positions.split("-")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
62 start = int(start)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
63 end = int(end)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
64 for n in range(start, end):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
65 cons[contig][n-1] = 'N'
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
66 MASKED_POSITIONS[contig].add(n-1)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
67
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
68 sett = set()
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
69 vcf_reader = vcf.Reader(open(args.vcffile, 'r'))
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
70 for record in vcf_reader:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
71 if record.ALT[0] != '.':
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
72 # variant call
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
73
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
74 if record.POS in MASKED_POSITIONS[record.CHROM]:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
75 reporter.report(record, "masked_manual", "n")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
76 cons[record.CHROM][record.POS-1] = 'N'
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
77 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
78
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
79 if record.num_het:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
80 if depths[record.CHROM][record.POS] < args.depth:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
81 reporter.report(record, "het_site_low_depth", "y")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
82 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
83 else:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
84 reporter.report(record, "het_site", "y")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
85 cons[record.CHROM][record.POS-1] = 'N'
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
86 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
87
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
88 if 'PRIMER' in record.INFO:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
89 reporter.report(record, "primer_binding_site", "n")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
90 cons[record.CHROM][record.POS-1] = 'N'
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
91 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
92
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
93 #support = float(record.INFO['SupportFraction'])
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
94 #total_reads = int(record.INFO['TotalReads'])
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
95 qual = record.QUAL
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
96
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
97 REF = record.REF
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
98 ALT = str(record.ALT[0])
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
99
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
100 if len(ALT) > len(REF):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
101 print("Skipping insertion at position: %s" % (record.POS), file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
102 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
103
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
104 if depths[record.CHROM][record.POS] >= args.depth and record.QUAL >= args.quality:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
105 if len(REF) > len(ALT):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
106 print("N-masking confident deletion at %s" % (record.POS), file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
107 for n in range(len(REF)):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
108 cons[record.CHROM][record.POS-1+n] = 'N'
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
109 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
110
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
111 reporter.report(record, "variant", ALT)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
112 sett.add(record.POS)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
113 if len(REF) > len(ALT):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
114 print("deletion", file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
115 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
116
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
117 if len(ALT) > len(REF):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
118 print("insertion", file=sys.stderr)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
119 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
120 cons[record.CHROM][record.POS-1] = str(ALT)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
121 elif len(REF) > len(ALT):
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
122 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
123 else:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
124 if depths[record.CHROM][record.POS] < args.depth:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
125 reporter.report(record, "low_depth_variant", "n")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
126 else:
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
127 reporter.report(record, "low_qual_variant", "n")
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
128 #cons[record.CHROM][record.POS-1] = 'N'
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
129 continue
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
130
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
131 #print >>sys.stderr, str(sett)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
132
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
133 for k in seqs.keys():
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
134 print(">%s-%s" % (args.bamfile, k))
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
135 print("".join(cons[k]))
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
136
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
137 def main():
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
138 parser = argparse.ArgumentParser()
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
139 parser.add_argument('--depth', type=int, default=5, help='minimum depth to call a variant')
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
140 parser.add_argument('--quality', type=int, default=0, help='minimum quality to call a variant')
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
141 parser.add_argument('--masked', help='Regions to mask (contig:start-end,contig:start-end)')
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
142 parser.add_argument('reference')
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
143 parser.add_argument('vcffile')
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
144 parser.add_argument('bamfile')
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
145 args = parser.parse_args()
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
146 go(args)
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
147
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
148 if __name__ == "__main__":
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff changeset
149 main()