Mercurial > repos > dfornika > artic_margin_cons_medaka
annotate github_downloaded/1005a47/margin_cons_medaka.py @ 7:be08b76fd78b draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
author | dfornika |
---|---|
date | Tue, 17 Mar 2020 22:05:59 +0000 |
parents | 47eb99e41bfb |
children |
rev | line source |
---|---|
5
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
2 from Bio import SeqIO |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
3 import sys |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
4 import vcf |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
5 import subprocess |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
6 from collections import defaultdict |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
7 import os.path |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
8 import operator |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
9 from .vcftagprimersites import read_bed_file |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
10 import argparse |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
11 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
12 def collect_depths(bamfile): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
13 if not os.path.exists(bamfile): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
14 raise SystemExit("bamfile %s doesn't exist" % (bamfile,)) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
15 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
16 p = subprocess.Popen(['samtools', 'depth', bamfile], |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
17 stdout=subprocess.PIPE) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
18 out, err = p.communicate() |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
19 depths = defaultdict(dict) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
20 for ln in out.decode('utf-8').split("\n"): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
21 if ln: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
22 contig, pos, depth = ln.split("\t") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
23 depths[contig][int(pos)] = int(depth) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
24 return depths |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
25 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
26 class Reporter: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
27 def __init__(self, vcffile, depths): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
28 self.vcffile = vcffile |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
29 self.depths = depths |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
30 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
31 def report(self, r, status, allele): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
32 idfile = os.path.basename(self.vcffile).split(".")[0] |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
33 print("%s\t%s\tstatus\t%s" % (idfile, r.POS, status), file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
34 print("%s\t%s\tallele\t%s" % (idfile, r.POS, allele), file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
35 print("%s\t%s\tref\t%s" % (idfile, r.POS, r.REF), file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
36 print("%s\t%s\tdepth\t%s" % (idfile, r.POS, self.depths[r.CHROM][r.POS]), file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
37 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
38 def go(args): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
39 MASKED_POSITIONS = defaultdict(set) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
40 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
41 depths = collect_depths(args.bamfile) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
42 reporter = Reporter(args.vcffile, depths) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
43 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
44 seqs = dict([(rec.id, rec) for rec in SeqIO.parse(open(args.reference), "fasta")]) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
45 cons = {} |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
46 for k in seqs.keys(): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
47 cons[k] = list(seqs[k].seq) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
48 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
49 for n, c in enumerate(cons[k]): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
50 try: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
51 depth = depths[k][n+1] |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
52 except: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
53 depth = 0 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
54 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
55 if depth < args.depth: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
56 cons[k][n] = 'N' |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
57 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
58 if args.masked: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
59 for region in args.masked.split(","): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
60 contig, positions = region.split(":") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
61 start, end = positions.split("-") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
62 start = int(start) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
63 end = int(end) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
64 for n in range(start, end): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
65 cons[contig][n-1] = 'N' |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
66 MASKED_POSITIONS[contig].add(n-1) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
67 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
68 sett = set() |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
69 vcf_reader = vcf.Reader(open(args.vcffile, 'r')) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
70 for record in vcf_reader: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
71 if record.ALT[0] != '.': |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
72 # variant call |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
73 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
74 if record.POS in MASKED_POSITIONS[record.CHROM]: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
75 reporter.report(record, "masked_manual", "n") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
76 cons[record.CHROM][record.POS-1] = 'N' |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
77 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
78 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
79 if record.num_het: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
80 if depths[record.CHROM][record.POS] < args.depth: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
81 reporter.report(record, "het_site_low_depth", "y") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
82 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
83 else: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
84 reporter.report(record, "het_site", "y") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
85 cons[record.CHROM][record.POS-1] = 'N' |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
86 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
87 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
88 if 'PRIMER' in record.INFO: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
89 reporter.report(record, "primer_binding_site", "n") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
90 cons[record.CHROM][record.POS-1] = 'N' |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
91 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
92 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
93 #support = float(record.INFO['SupportFraction']) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
94 #total_reads = int(record.INFO['TotalReads']) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
95 qual = record.QUAL |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
96 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
97 REF = record.REF |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
98 ALT = str(record.ALT[0]) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
99 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
100 if len(ALT) > len(REF): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
101 print("Skipping insertion at position: %s" % (record.POS), file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
102 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
103 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
104 if depths[record.CHROM][record.POS] >= args.depth and record.QUAL >= args.quality: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
105 if len(REF) > len(ALT): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
106 print("N-masking confident deletion at %s" % (record.POS), file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
107 for n in range(len(REF)): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
108 cons[record.CHROM][record.POS-1+n] = 'N' |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
109 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
110 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
111 reporter.report(record, "variant", ALT) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
112 sett.add(record.POS) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
113 if len(REF) > len(ALT): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
114 print("deletion", file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
115 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
116 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
117 if len(ALT) > len(REF): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
118 print("insertion", file=sys.stderr) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
119 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
120 cons[record.CHROM][record.POS-1] = str(ALT) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
121 elif len(REF) > len(ALT): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
122 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
123 else: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
124 if depths[record.CHROM][record.POS] < args.depth: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
125 reporter.report(record, "low_depth_variant", "n") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
126 else: |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
127 reporter.report(record, "low_qual_variant", "n") |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
128 #cons[record.CHROM][record.POS-1] = 'N' |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
129 continue |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
130 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
131 #print >>sys.stderr, str(sett) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
132 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
133 for k in seqs.keys(): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
134 print(">%s-%s" % (args.bamfile, k)) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
135 print("".join(cons[k])) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
136 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
137 def main(): |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
138 parser = argparse.ArgumentParser() |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
139 parser.add_argument('--depth', type=int, default=5, help='minimum depth to call a variant') |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
140 parser.add_argument('--quality', type=int, default=0, help='minimum quality to call a variant') |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
141 parser.add_argument('--masked', help='Regions to mask (contig:start-end,contig:start-end)') |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
142 parser.add_argument('reference') |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
143 parser.add_argument('vcffile') |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
144 parser.add_argument('bamfile') |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
145 args = parser.parse_args() |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
146 go(args) |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
147 |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
148 if __name__ == "__main__": |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
diff
changeset
|
149 main() |