Mercurial > repos > dfornika > artic_margin_cons_medaka
annotate margin_cons_medaka.py @ 7:be08b76fd78b draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
author | dfornika |
---|---|
date | Tue, 17 Mar 2020 22:05:59 +0000 |
parents | 47eb99e41bfb |
children |
rev | line source |
---|---|
0
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
2 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
3 # Written by Nick Loman |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
4 # This file adapted from ARTICnetwork 'fieldbioinformatics' pipeline: |
5
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
3
diff
changeset
|
5 # https://github.com/artic-network/fieldbioinformatics/blob/master/artic/margin_cons_medaka.py |
47eb99e41bfb
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit cdc887baab41af2b386b1ec7239b55d474c0fff5-dirty"
dfornika
parents:
3
diff
changeset
|
6 # https://github.com/artic-network/fieldbioinformatics/blob/master/artic/vcftagprimersites.py |
0
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
7 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
8 import argparse |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
9 import operator |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
10 import os.path |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
11 import subprocess |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
12 import sys |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
13 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
14 from collections import defaultdict |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
15 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
16 import vcf |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
17 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
18 from Bio import SeqIO |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
19 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
20 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
21 def read_bed_file(fn): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
22 bedfile = [] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
23 with open(fn) as csvfile: |
3
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
24 reader = csv.reader(csvfile, dialect='excel-tab') |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
25 for row in reader: |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
26 bedrow = {} |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
27 bedrow['Primer_ID'] = row[3] |
0
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
28 |
3
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
29 if len(row) >= 6: |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
30 # new style bed |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
31 bedrow['direction'] = row[5] |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
32 elif len(row) == 5: |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
33 # old style without directory |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
34 if 'LEFT' in row[3]: |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
35 bedrow['direction'] = '+' |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
36 elif 'RIGHT' in row[3]: |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
37 bedrow['direction'] = '-' |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
38 else: |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
39 print("Malformed BED file!", file=sys.stderr) |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
40 raise SystemExit |
0
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
41 |
3
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
42 if bedrow['direction'] == '+': |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
43 bedrow['end'] = int(row[2]) |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
44 bedrow['start'] = int(row[1]) |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
45 else: |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
46 bedrow['end'] = int(row[1]) |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
47 bedrow['start'] = int(row[2]) |
eeb3cb264cb0
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
0
diff
changeset
|
48 bedfile.append(bedrow) |
0
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
49 return bedfile |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
50 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
51 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
52 def collect_depths(bamfile): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
53 if not os.path.exists(bamfile): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
54 raise SystemExit("bamfile %s doesn't exist" % (bamfile,)) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
55 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
56 p = subprocess.Popen(['samtools', 'depth', bamfile], |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
57 stdout=subprocess.PIPE) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
58 out, err = p.communicate() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
59 depths = defaultdict(dict) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
60 for ln in out.decode('utf-8').split("\n"): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
61 if ln: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
62 contig, pos, depth = ln.split("\t") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
63 depths[contig][int(pos)] = int(depth) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
64 return depths |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
65 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
66 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
67 class Reporter: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
68 def __init__(self, vcffile, depths): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
69 self.vcffile = vcffile |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
70 self.depths = depths |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
71 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
72 def report(self, r, status, allele): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
73 idfile = os.path.basename(self.vcffile).split(".")[0] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
74 print("%s\t%s\tstatus\t%s" % (idfile, r.POS, status), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
75 print("%s\t%s\tallele\t%s" % (idfile, r.POS, allele), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
76 print("%s\t%s\tref\t%s" % (idfile, r.POS, r.REF), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
77 print("%s\t%s\tdepth\t%s" % (idfile, r.POS, self.depths[r.CHROM][r.POS]), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
78 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
79 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
80 def go(args): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
81 MASKED_POSITIONS = defaultdict(set) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
82 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
83 depths = collect_depths(args.bamfile) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
84 reporter = Reporter(args.vcffile, depths) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
85 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
86 seqs = dict([(rec.id, rec) for rec in SeqIO.parse(open(args.reference), "fasta")]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
87 cons = {} |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
88 for k in seqs.keys(): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
89 cons[k] = list(seqs[k].seq) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
90 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
91 for n, c in enumerate(cons[k]): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
92 try: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
93 depth = depths[k][n+1] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
94 except: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
95 depth = 0 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
96 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
97 if depth < args.depth: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
98 cons[k][n] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
99 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
100 if args.masked: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
101 for region in args.masked.split(","): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
102 contig, positions = region.split(":") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
103 start, end = positions.split("-") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
104 start = int(start) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
105 end = int(end) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
106 for n in range(start, end): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
107 cons[contig][n-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
108 MASKED_POSITIONS[contig].add(n-1) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
109 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
110 sett = set() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
111 vcf_reader = vcf.Reader(open(args.vcffile, 'r')) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
112 for record in vcf_reader: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
113 if record.ALT[0] != '.': |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
114 # variant call |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
115 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
116 if record.POS in MASKED_POSITIONS[record.CHROM]: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
117 reporter.report(record, "masked_manual", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
118 cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
119 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
120 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
121 if record.num_het: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
122 if depths[record.CHROM][record.POS] < args.depth: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
123 reporter.report(record, "het_site_low_depth", "y") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
124 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
125 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
126 reporter.report(record, "het_site", "y") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
127 cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
128 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
129 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
130 if 'PRIMER' in record.INFO: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
131 reporter.report(record, "primer_binding_site", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
132 cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
133 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
134 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
135 #support = float(record.INFO['SupportFraction']) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
136 #total_reads = int(record.INFO['TotalReads']) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
137 qual = record.QUAL |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
138 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
139 REF = record.REF |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
140 ALT = str(record.ALT[0]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
141 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
142 if len(ALT) > len(REF): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
143 print("Skipping insertion at position: %s" % (record.POS), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
144 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
145 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
146 if depths[record.CHROM][record.POS] >= args.depth and record.QUAL >= args.quality: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
147 if len(REF) > len(ALT): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
148 print("N-masking confident deletion at %s" % (record.POS), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
149 for n in range(len(REF)): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
150 cons[record.CHROM][record.POS-1+n] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
151 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
152 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
153 reporter.report(record, "variant", ALT) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
154 sett.add(record.POS) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
155 if len(REF) > len(ALT): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
156 print("deletion", file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
157 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
158 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
159 if len(ALT) > len(REF): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
160 print("insertion", file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
161 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
162 cons[record.CHROM][record.POS-1] = str(ALT) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
163 elif len(REF) > len(ALT): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
164 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
165 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
166 if depths[record.CHROM][record.POS] < args.depth: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
167 reporter.report(record, "low_depth_variant", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
168 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
169 reporter.report(record, "low_qual_variant", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
170 #cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
171 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
172 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
173 #print >>sys.stderr, str(sett) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
174 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
175 for k in seqs.keys(): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
176 print(">%s-%s" % (args.bamfile, k)) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
177 print("".join(cons[k])) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
178 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
179 def main(): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
180 parser = argparse.ArgumentParser() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
181 parser.add_argument('--depth', type=int, default=5, help='minimum depth to call a variant') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
182 parser.add_argument('--quality', type=int, default=0, help='minimum quality to call a variant') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
183 parser.add_argument('--masked', help='Regions to mask (contig:start-end,contig:start-end)') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
184 parser.add_argument('reference') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
185 parser.add_argument('vcffile') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
186 parser.add_argument('bamfile') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
187 args = parser.parse_args() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
188 go(args) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
189 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
190 if __name__ == "__main__": |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
191 main() |