Mercurial > repos > dfornika > artic_margin_cons_medaka
annotate margin_cons_medaka.py @ 0:7df876ad836a draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
author | dfornika |
---|---|
date | Wed, 11 Mar 2020 01:58:44 +0000 |
parents | |
children | eeb3cb264cb0 |
rev | line source |
---|---|
0
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
2 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
3 # Written by Nick Loman |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
4 # This file adapted from ARTICnetwork 'fieldbioinformatics' pipeline: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
5 # https://github.com/artic-network/fieldbioinformatics |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
6 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
7 import argparse |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
8 import operator |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
9 import os.path |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
10 import subprocess |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
11 import sys |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
12 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
13 from collections import defaultdict |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
14 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
15 import vcf |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
16 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
17 from Bio import SeqIO |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
18 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
19 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
20 def read_bed_file(fn): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
21 bedfile = [] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
22 with open(fn) as csvfile: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
23 reader = csv.reader(csvfile, dialect='excel-tab') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
24 for row in reader: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
25 bedrow = {} |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
26 bedrow['Primer_ID'] = row[3] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
27 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
28 if len(row) >= 6: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
29 # new style bed |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
30 bedrow['direction'] = row[5] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
31 elif len(row) == 5: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
32 # old style without directory |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
33 if 'LEFT' in row[3]: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
34 bedrow['direction'] = '+' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
35 elif 'RIGHT' in row[3]: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
36 bedrow['direction'] = '-' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
37 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
38 print("Malformed BED file!", file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
39 raise SystemExit |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
40 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
41 if bedrow['direction'] == '+': |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
42 bedrow['end'] = int(row[2]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
43 bedrow['start'] = int(row[1]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
44 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
45 bedrow['end'] = int(row[1]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
46 bedrow['start'] = int(row[2]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
47 bedfile.append(bedrow) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
48 return bedfile |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
49 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
50 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
51 def collect_depths(bamfile): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
52 if not os.path.exists(bamfile): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
53 raise SystemExit("bamfile %s doesn't exist" % (bamfile,)) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
54 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
55 p = subprocess.Popen(['samtools', 'depth', bamfile], |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
56 stdout=subprocess.PIPE) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
57 out, err = p.communicate() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
58 depths = defaultdict(dict) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
59 for ln in out.decode('utf-8').split("\n"): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
60 if ln: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
61 contig, pos, depth = ln.split("\t") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
62 depths[contig][int(pos)] = int(depth) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
63 return depths |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
64 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
65 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
66 class Reporter: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
67 def __init__(self, vcffile, depths): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
68 self.vcffile = vcffile |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
69 self.depths = depths |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
70 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
71 def report(self, r, status, allele): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
72 idfile = os.path.basename(self.vcffile).split(".")[0] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
73 print("%s\t%s\tstatus\t%s" % (idfile, r.POS, status), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
74 print("%s\t%s\tallele\t%s" % (idfile, r.POS, allele), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
75 print("%s\t%s\tref\t%s" % (idfile, r.POS, r.REF), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
76 print("%s\t%s\tdepth\t%s" % (idfile, r.POS, self.depths[r.CHROM][r.POS]), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
77 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
78 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
79 def go(args): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
80 MASKED_POSITIONS = defaultdict(set) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
81 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
82 depths = collect_depths(args.bamfile) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
83 reporter = Reporter(args.vcffile, depths) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
84 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
85 seqs = dict([(rec.id, rec) for rec in SeqIO.parse(open(args.reference), "fasta")]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
86 cons = {} |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
87 for k in seqs.keys(): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
88 cons[k] = list(seqs[k].seq) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
89 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
90 for n, c in enumerate(cons[k]): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
91 try: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
92 depth = depths[k][n+1] |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
93 except: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
94 depth = 0 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
95 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
96 if depth < args.depth: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
97 cons[k][n] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
98 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
99 if args.masked: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
100 for region in args.masked.split(","): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
101 contig, positions = region.split(":") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
102 start, end = positions.split("-") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
103 start = int(start) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
104 end = int(end) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
105 for n in range(start, end): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
106 cons[contig][n-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
107 MASKED_POSITIONS[contig].add(n-1) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
108 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
109 sett = set() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
110 vcf_reader = vcf.Reader(open(args.vcffile, 'r')) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
111 for record in vcf_reader: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
112 if record.ALT[0] != '.': |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
113 # variant call |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
114 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
115 if record.POS in MASKED_POSITIONS[record.CHROM]: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
116 reporter.report(record, "masked_manual", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
117 cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
118 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
119 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
120 if record.num_het: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
121 if depths[record.CHROM][record.POS] < args.depth: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
122 reporter.report(record, "het_site_low_depth", "y") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
123 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
124 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
125 reporter.report(record, "het_site", "y") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
126 cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
127 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
128 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
129 if 'PRIMER' in record.INFO: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
130 reporter.report(record, "primer_binding_site", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
131 cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
132 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
133 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
134 #support = float(record.INFO['SupportFraction']) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
135 #total_reads = int(record.INFO['TotalReads']) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
136 qual = record.QUAL |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
137 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
138 REF = record.REF |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
139 ALT = str(record.ALT[0]) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
140 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
141 if len(ALT) > len(REF): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
142 print("Skipping insertion at position: %s" % (record.POS), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
143 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
144 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
145 if depths[record.CHROM][record.POS] >= args.depth and record.QUAL >= args.quality: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
146 if len(REF) > len(ALT): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
147 print("N-masking confident deletion at %s" % (record.POS), file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
148 for n in range(len(REF)): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
149 cons[record.CHROM][record.POS-1+n] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
150 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
151 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
152 reporter.report(record, "variant", ALT) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
153 sett.add(record.POS) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
154 if len(REF) > len(ALT): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
155 print("deletion", file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
156 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
157 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
158 if len(ALT) > len(REF): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
159 print("insertion", file=sys.stderr) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
160 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
161 cons[record.CHROM][record.POS-1] = str(ALT) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
162 elif len(REF) > len(ALT): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
163 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
164 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
165 if depths[record.CHROM][record.POS] < args.depth: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
166 reporter.report(record, "low_depth_variant", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
167 else: |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
168 reporter.report(record, "low_qual_variant", "n") |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
169 #cons[record.CHROM][record.POS-1] = 'N' |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
170 continue |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
171 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
172 #print >>sys.stderr, str(sett) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
173 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
174 for k in seqs.keys(): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
175 print(">%s-%s" % (args.bamfile, k)) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
176 print("".join(cons[k])) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
177 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
178 def main(): |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
179 parser = argparse.ArgumentParser() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
180 parser.add_argument('--depth', type=int, default=5, help='minimum depth to call a variant') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
181 parser.add_argument('--quality', type=int, default=0, help='minimum quality to call a variant') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
182 parser.add_argument('--masked', help='Regions to mask (contig:start-end,contig:start-end)') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
183 parser.add_argument('reference') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
184 parser.add_argument('vcffile') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
185 parser.add_argument('bamfile') |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
186 args = parser.parse_args() |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
187 go(args) |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
188 |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
189 if __name__ == "__main__": |
7df876ad836a
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_margin_cons_medaka commit c866a76470b72bf5abdc2a88e6bf584c2f2f9b02-dirty"
dfornika
parents:
diff
changeset
|
190 main() |