Mercurial > repos > sanbi-uwc > vcf_to_alignment
annotate vcf_to_msa.py @ 5:4f3f7d390382 draft default tip
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76-dirty
author | sanbi-uwc |
---|---|
date | Wed, 17 Oct 2018 02:35:51 -0400 |
parents | f58178c0f00d |
children |
rev | line source |
---|---|
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
2 |
2
a0c85f2d74a5
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 9612f06b8c60520dc0a047ec072ced317c7796e4
sanbi-uwc
parents:
0
diff
changeset
|
3 from __future__ import print_function |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
4 import argparse |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
5 import sys |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
6 from Bio import SeqIO |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
7 from Bio.SeqRecord import SeqRecord |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
8 from Bio.Seq import Seq |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
9 from Bio.Alphabet import IUPAC |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
10 import os.path |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
11 import vcf |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
12 import intervaltree |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
13 from operator import itemgetter |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
14 from pathlib import Path |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
15 |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
16 |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
17 def difference(x, y): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
18 return 0 if x == y else 1 |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
19 |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
20 |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
21 def string_difference(query, target, query_len): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
22 return sum((difference(query[i], target[i])) for i in range(query_len)) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
23 |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
24 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
25 def fuzzysearch(query, target): |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
26 query_len = len(query) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
27 target_len = len(target) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
28 assert query_len <= target_len, "query cannot be longer than target" |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
29 min_distance = string_difference(query, target, query_len) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
30 best_pos = 0 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
31 for i in range(0, target_len - query_len + 1): |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
32 distance = string_difference(query, target[i:i + query_len], query_len) |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
33 if distance < min_distance: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
34 (min_distance, best_pos) = (distance, i) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
35 return best_pos |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
36 |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
37 |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
38 class readable_dir(argparse.Action): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
39 def __call__(self, parser, namespace, values, option_string=None): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
40 prospective_dir = values |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
41 if not os.path.isdir(prospective_dir): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
42 raise argparse.ArgumentTypeError("readable_dir:{0} is not a valid path".format(prospective_dir)) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
43 if os.access(prospective_dir, os.R_OK): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
44 setattr(namespace, self.dest, prospective_dir) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
45 else: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
46 raise argparse.ArgumentTypeError("readable_dir:{0} is not a readable dir".format(prospective_dir)) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
47 |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
48 |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
49 parser = argparse.ArgumentParser() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
50 parser.add_argument('--vcf_files', nargs="+") |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
51 parser.add_argument('-d', '--vcf_dir', action=readable_dir, help="VCF directory ") |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
52 parser.add_argument('--reference_file', required=True, type=argparse.FileType()) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
53 parser.add_argument('--output_file', required=True, type=argparse.FileType('w')) |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
54 parser.add_argument('--remove_invariant', action='store_true', default=False) |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
55 parser.add_argument('--exclude', type=argparse.FileType(), help='BED format file of regions to exclude from variant calling') |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
56 args = parser.parse_args() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
57 |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
58 exclude_trees = {} |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
59 if args.exclude is not None: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
60 for line in args.exclude: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
61 # all of BED format that we care about is chromosome, start, end |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
62 fields = line.strip().split('\t') |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
63 if len(fields) < 3: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
64 continue |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
65 chrom = fields[0] |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
66 start = int(fields[1]) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
67 end = int(fields[2]) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
68 if chrom not in exclude_trees: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
69 tree = intervaltree.IntervalTree() |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
70 exclude_trees[chrom] = tree |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
71 else: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
72 tree = exclude_trees[chrom] |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
73 tree[start:end] = True |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
74 |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
75 do_inserts = False |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
76 do_deletes = False |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
77 do_snps = True |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
78 if (do_inserts or do_deletes) and args.remove_invariant: |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
79 exit("Cannot combine indel processing with 'remove_invariant' argument") |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
80 # reference = str(SeqIO.read(os.path.expanduser("~/Data/fasta/NC_000962.fna"), "fasta").seq) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
81 # print(reference, file=open('/tmp/reference.txt', 'w')) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
82 # vcf_files_dir = os.path.expanduser("~/Data/vcf") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
83 # vcf_files = [os.path.join(vcf_files_dir, "vcf{}.vcf".format(num)) for num in range(1,4)] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
84 # print(vcf_files) |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
85 reference_seq = SeqIO.read(args.reference_file, "fasta") |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
86 reference = str(reference_seq.seq) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
87 # output_file = open(os.path.join(os.path.expanduser("~/Data/fasta/vcf_to_msa"), 'output.fasta'), 'w') |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
88 insertions = {} |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
89 insertion_sites = [] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
90 tree = intervaltree.IntervalTree() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
91 sequence_names = [] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
92 sequences = {} |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
93 if args.remove_invariant: |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
94 variant_sites = set() |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
95 |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
96 vcf_list = [] |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
97 if args.vcf_dir: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
98 pathlist = Path(args.vcf_dir).glob('*.vcf') |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
99 for path in pathlist: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
100 vcf_list.append(str(path)) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
101 elif args.vcf_files: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
102 vcf_list = args.vcf_files |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
103 |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
104 for i, vcf_descriptor in enumerate(vcf_list): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
105 # print(os.path.basename(vcf_descriptor)) |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
106 if '^^^' in vcf_descriptor: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
107 (seqname, vcf_filename) = vcf_descriptor.split('^^^') |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
108 else: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
109 seqname = str(os.path.basename(vcf_descriptor)).rsplit('.vcf', 1)[0] |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
110 vcf_filename = vcf_descriptor |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
111 sequence_names.append(seqname) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
112 sequence = list(reference) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
113 sequences[seqname] = sequence |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
114 |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
115 insertions[seqname] = [] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
116 count = 0 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
117 for record in vcf.VCFReader(filename=vcf_filename): |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
118 if args.exclude: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
119 if record.CHROM in exclude_trees: |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
120 tree = exclude_trees[record.CHROM] |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
121 if tree.overlaps(record.affected_start, record.affected_end): |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
122 continue |
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
123 type = "unknown" |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
124 if record.is_snp and do_snps: |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
125 type = "snp" |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
126 try: |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
127 sequence[record.affected_start] = str(record.alleles[1]) # SNP, simply insert alt allele |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
128 except IndexError as e: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
129 print("snp: Error assigning to {}:{}: {}".format(record.affected_start, record.affected_end, str(e)), file=sys.stderr) |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
130 if args.remove_invariant: |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
131 variant_sites.add(record.affected_start) |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
132 count += 1 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
133 elif record.is_indel: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
134 length = record.affected_end - record.affected_start |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
135 if record.is_deletion and do_deletes: |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
136 type = "del" |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
137 try: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
138 sequence[record.affected_start:record.affected_end] = ['-'] * length |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
139 except IndexError as e: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
140 print("del: Error assigning to {}:{}: {}".format(record.affected_start, record.affected_end, str(e)), file=sys.stderr) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
141 count += 1 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
142 else: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
143 if do_inserts: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
144 print("Warning: insert processing from VCF is dangerously broken", file=sys.stderr) |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
145 type = "ins" |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
146 # insertions[seqname].append(record) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
147 ref = str(record.alleles[0]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
148 alt = str(record.alleles[1]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
149 # print("ins", alt.startswith(ref), fuzzysearch(ref, alt), ref, alt, record.affected_start, record.affected_end, len(alt) - len(ref), len(alt), len(ref), record.affected_end - record.affected_start + 1) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
150 alt_sequence = alt[len(ref) - 1:] if alt.startswith(ref) else alt |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
151 insertion_sites.append((record.affected_start, record.affected_end, alt_sequence, seqname)) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
152 # interval = intervaltree.Interval(record.affected_start, record.affected_start + length, data=[seqname]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
153 # if interval in tree: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
154 # existing_interval = tree[interval.begin:interval.end + 1] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
155 # start = min([existing_interval.begin, interval.begin]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
156 # end = max([existing_interval.end, interval.end]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
157 # tree.remove(existing_interval) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
158 # new_interval = intervaltree.Interval(start, end, existing_interval.data + interval.data) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
159 # tree.add(new_interval) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
160 |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
161 if args.remove_invariant: |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
162 reference_str = ''.join([c for (i, c) in enumerate(reference) if i in variant_sites]) |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
163 reference_seq_variant = SeqRecord(Seq(reference_str), id=reference_seq.id, description=reference_seq.description) |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
164 SeqIO.write(reference_seq_variant, args.output_file, "fasta") |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
165 else: |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
166 SeqIO.write(reference_seq, args.output_file, "fasta") |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
167 |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
168 offset = 0 |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
169 sequence_length = 0 |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
170 for name in sequence_names: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
171 sequence = sequences[name] |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
172 sequence_length = len(sequence) |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
173 for site in sorted(insertion_sites, key=itemgetter(0)): |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
174 (start, end, allele, seqname) = site |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
175 # print(start, allele, seqname) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
176 length = len(allele) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
177 # start += offset |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
178 # end += offset |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
179 # offset += length |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
180 try: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
181 if name == seqname: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
182 sequence[start:end] = list(str(allele)) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
183 else: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
184 sequence[start:end] = ['-'] * length |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
185 except IndexError as e: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
186 print("ins: Error assigning to {}:{}: {}".format(start, end, str(e)), file=sys.stderr) |
3
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
187 |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
188 if args.remove_invariant: |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
189 seq_str = ''.join([c for (i, c) in enumerate(sequence) if i in variant_sites]) |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
190 else: |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
191 seq_str = ''.join(sequence) |
62fbd3f96b30
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents:
2
diff
changeset
|
192 SeqIO.write(SeqRecord(Seq(seq_str, alphabet=IUPAC.ambiguous_dna), id=name, description=""), args.output_file, "fasta") |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
193 |
4
f58178c0f00d
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents:
3
diff
changeset
|
194 # output.write(bytes("\t".join([type, str(record.affected_start), str(record.affected_end), str(record.alleles[0]), str(record.alleles[1])])+"\n", encoding="ascii")) |
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
195 # output.close() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
196 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
197 args.output_file.close() |