annotate margin_cons_medaka.py @ 7:be08b76fd78b draft default tip

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
author dfornika
date Tue, 17 Mar 2020 22:05:59 +0000
parents 47eb99e41bfb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
2
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
3 # Written by Nick Loman
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
4 # This file adapted from ARTICnetwork 'fieldbioinformatics' pipeline:
5
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents: 3
diff changeset
5 # https://github.com/artic-network/fieldbioinformatics/blob/master/artic/margin_cons_medaka.py
47eb99e41bfb "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents: 3
diff changeset
6 # https://github.com/artic-network/fieldbioinformatics/blob/master/artic/vcftagprimersites.py
0
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
7
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
8 import argparse
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
9 import operator
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
10 import os.path
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
11 import subprocess
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
12 import sys
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
13
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
14 from collections import defaultdict
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
15
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
16 import vcf
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
17
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
18 from Bio import SeqIO
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
19
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
20
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
21 def read_bed_file(fn):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
22 bedfile = []
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
23 with open(fn) as csvfile:
3
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
24 reader = csv.reader(csvfile, dialect='excel-tab')
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
25 for row in reader:
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
26 bedrow = {}
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
27 bedrow['Primer_ID'] = row[3]
0
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
28
3
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
29 if len(row) >= 6:
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
30 # new style bed
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
31 bedrow['direction'] = row[5]
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
32 elif len(row) == 5:
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
33 # old style without directory
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
34 if 'LEFT' in row[3]:
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
35 bedrow['direction'] = '+'
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
36 elif 'RIGHT' in row[3]:
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
37 bedrow['direction'] = '-'
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
38 else:
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
39 print("Malformed BED file!", file=sys.stderr)
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
40 raise SystemExit
0
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
41
3
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
42 if bedrow['direction'] == '+':
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
43 bedrow['end'] = int(row[2])
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
44 bedrow['start'] = int(row[1])
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
45 else:
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
46 bedrow['end'] = int(row[1])
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
47 bedrow['start'] = int(row[2])
eeb3cb264cb0 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents: 0
diff changeset
48 bedfile.append(bedrow)
0
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
49 return bedfile
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
50
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
51
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
52 def collect_depths(bamfile):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
53 if not os.path.exists(bamfile):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
54 raise SystemExit("bamfile %s doesn't exist" % (bamfile,))
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
55
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
56 p = subprocess.Popen(['samtools', 'depth', bamfile],
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
57 stdout=subprocess.PIPE)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
58 out, err = p.communicate()
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
59 depths = defaultdict(dict)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
60 for ln in out.decode('utf-8').split("\n"):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
61 if ln:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
62 contig, pos, depth = ln.split("\t")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
63 depths[contig][int(pos)] = int(depth)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
64 return depths
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
65
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
66
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
67 class Reporter:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
68 def __init__(self, vcffile, depths):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
69 self.vcffile = vcffile
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
70 self.depths = depths
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
71
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
72 def report(self, r, status, allele):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
73 idfile = os.path.basename(self.vcffile).split(".")[0]
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
74 print("%s\t%s\tstatus\t%s" % (idfile, r.POS, status), file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
75 print("%s\t%s\tallele\t%s" % (idfile, r.POS, allele), file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
76 print("%s\t%s\tref\t%s" % (idfile, r.POS, r.REF), file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
77 print("%s\t%s\tdepth\t%s" % (idfile, r.POS, self.depths[r.CHROM][r.POS]), file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
78
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
79
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
80 def go(args):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
81 MASKED_POSITIONS = defaultdict(set)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
82
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
83 depths = collect_depths(args.bamfile)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
84 reporter = Reporter(args.vcffile, depths)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
85
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
86 seqs = dict([(rec.id, rec) for rec in SeqIO.parse(open(args.reference), "fasta")])
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
87 cons = {}
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
88 for k in seqs.keys():
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
89 cons[k] = list(seqs[k].seq)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
90
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
91 for n, c in enumerate(cons[k]):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
92 try:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
93 depth = depths[k][n+1]
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
94 except:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
95 depth = 0
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
96
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
97 if depth < args.depth:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
98 cons[k][n] = 'N'
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
99
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
100 if args.masked:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
101 for region in args.masked.split(","):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
102 contig, positions = region.split(":")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
103 start, end = positions.split("-")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
104 start = int(start)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
105 end = int(end)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
106 for n in range(start, end):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
107 cons[contig][n-1] = 'N'
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
108 MASKED_POSITIONS[contig].add(n-1)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
109
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
110 sett = set()
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
111 vcf_reader = vcf.Reader(open(args.vcffile, 'r'))
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
112 for record in vcf_reader:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
113 if record.ALT[0] != '.':
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
114 # variant call
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
115
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
116 if record.POS in MASKED_POSITIONS[record.CHROM]:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
117 reporter.report(record, "masked_manual", "n")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
118 cons[record.CHROM][record.POS-1] = 'N'
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
119 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
120
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
121 if record.num_het:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
122 if depths[record.CHROM][record.POS] < args.depth:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
123 reporter.report(record, "het_site_low_depth", "y")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
124 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
125 else:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
126 reporter.report(record, "het_site", "y")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
127 cons[record.CHROM][record.POS-1] = 'N'
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
128 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
129
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
130 if 'PRIMER' in record.INFO:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
131 reporter.report(record, "primer_binding_site", "n")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
132 cons[record.CHROM][record.POS-1] = 'N'
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
133 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
134
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
135 #support = float(record.INFO['SupportFraction'])
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
136 #total_reads = int(record.INFO['TotalReads'])
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
137 qual = record.QUAL
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
138
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
139 REF = record.REF
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
140 ALT = str(record.ALT[0])
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
141
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
142 if len(ALT) > len(REF):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
143 print("Skipping insertion at position: %s" % (record.POS), file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
144 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
145
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
146 if depths[record.CHROM][record.POS] >= args.depth and record.QUAL >= args.quality:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
147 if len(REF) > len(ALT):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
148 print("N-masking confident deletion at %s" % (record.POS), file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
149 for n in range(len(REF)):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
150 cons[record.CHROM][record.POS-1+n] = 'N'
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
151 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
152
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
153 reporter.report(record, "variant", ALT)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
154 sett.add(record.POS)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
155 if len(REF) > len(ALT):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
156 print("deletion", file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
157 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
158
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
159 if len(ALT) > len(REF):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
160 print("insertion", file=sys.stderr)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
161 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
162 cons[record.CHROM][record.POS-1] = str(ALT)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
163 elif len(REF) > len(ALT):
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
164 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
165 else:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
166 if depths[record.CHROM][record.POS] < args.depth:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
167 reporter.report(record, "low_depth_variant", "n")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
168 else:
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
169 reporter.report(record, "low_qual_variant", "n")
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
170 #cons[record.CHROM][record.POS-1] = 'N'
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
171 continue
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
172
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
173 #print >>sys.stderr, str(sett)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
174
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
175 for k in seqs.keys():
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
176 print(">%s-%s" % (args.bamfile, k))
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
177 print("".join(cons[k]))
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
178
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
179 def main():
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
180 parser = argparse.ArgumentParser()
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
181 parser.add_argument('--depth', type=int, default=5, help='minimum depth to call a variant')
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
182 parser.add_argument('--quality', type=int, default=0, help='minimum quality to call a variant')
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
183 parser.add_argument('--masked', help='Regions to mask (contig:start-end,contig:start-end)')
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
184 parser.add_argument('reference')
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
185 parser.add_argument('vcffile')
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
186 parser.add_argument('bamfile')
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
187 args = parser.parse_args()
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
188 go(args)
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
189
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
190 if __name__ == "__main__":
7df876ad836a "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff changeset
191 main()