annotate vcf_to_msa.py @ 5:4f3f7d390382 draft default tip

planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76-dirty
author sanbi-uwc
date Wed, 17 Oct 2018 02:35:51 -0400
parents f58178c0f00d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
1 #!/usr/bin/env python
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
2
2
a0c85f2d74a5 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 9612f06b8c60520dc0a047ec072ced317c7796e4
sanbi-uwc
parents: 0
diff changeset
3 from __future__ import print_function
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
4 import argparse
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
5 import sys
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
6 from Bio import SeqIO
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
7 from Bio.SeqRecord import SeqRecord
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
8 from Bio.Seq import Seq
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
9 from Bio.Alphabet import IUPAC
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
10 import os.path
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
11 import vcf
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
12 import intervaltree
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
13 from operator import itemgetter
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
14 from pathlib import Path
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
15
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
16
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
17 def difference(x, y):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
18 return 0 if x == y else 1
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
19
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
20
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
21 def string_difference(query, target, query_len):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
22 return sum((difference(query[i], target[i])) for i in range(query_len))
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
23
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
24
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
25 def fuzzysearch(query, target):
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
26 query_len = len(query)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
27 target_len = len(target)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
28 assert query_len <= target_len, "query cannot be longer than target"
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
29 min_distance = string_difference(query, target, query_len)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
30 best_pos = 0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
31 for i in range(0, target_len - query_len + 1):
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
32 distance = string_difference(query, target[i:i + query_len], query_len)
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
33 if distance < min_distance:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
34 (min_distance, best_pos) = (distance, i)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
35 return best_pos
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
36
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
37
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
38 class readable_dir(argparse.Action):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
39 def __call__(self, parser, namespace, values, option_string=None):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
40 prospective_dir = values
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
41 if not os.path.isdir(prospective_dir):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
42 raise argparse.ArgumentTypeError("readable_dir:{0} is not a valid path".format(prospective_dir))
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
43 if os.access(prospective_dir, os.R_OK):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
44 setattr(namespace, self.dest, prospective_dir)
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
45 else:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
46 raise argparse.ArgumentTypeError("readable_dir:{0} is not a readable dir".format(prospective_dir))
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
47
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
48
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
49 parser = argparse.ArgumentParser()
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
50 parser.add_argument('--vcf_files', nargs="+")
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
51 parser.add_argument('-d', '--vcf_dir', action=readable_dir, help="VCF directory ")
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
52 parser.add_argument('--reference_file', required=True, type=argparse.FileType())
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
53 parser.add_argument('--output_file', required=True, type=argparse.FileType('w'))
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
54 parser.add_argument('--remove_invariant', action='store_true', default=False)
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
55 parser.add_argument('--exclude', type=argparse.FileType(), help='BED format file of regions to exclude from variant calling')
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
56 args = parser.parse_args()
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
57
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
58 exclude_trees = {}
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
59 if args.exclude is not None:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
60 for line in args.exclude:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
61 # all of BED format that we care about is chromosome, start, end
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
62 fields = line.strip().split('\t')
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
63 if len(fields) < 3:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
64 continue
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
65 chrom = fields[0]
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
66 start = int(fields[1])
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
67 end = int(fields[2])
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
68 if chrom not in exclude_trees:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
69 tree = intervaltree.IntervalTree()
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
70 exclude_trees[chrom] = tree
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
71 else:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
72 tree = exclude_trees[chrom]
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
73 tree[start:end] = True
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
74
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
75 do_inserts = False
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
76 do_deletes = False
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
77 do_snps = True
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
78 if (do_inserts or do_deletes) and args.remove_invariant:
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
79 exit("Cannot combine indel processing with 'remove_invariant' argument")
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
80 # reference = str(SeqIO.read(os.path.expanduser("~/Data/fasta/NC_000962.fna"), "fasta").seq)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
81 # print(reference, file=open('/tmp/reference.txt', 'w'))
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
82 # vcf_files_dir = os.path.expanduser("~/Data/vcf")
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
83 # vcf_files = [os.path.join(vcf_files_dir, "vcf{}.vcf".format(num)) for num in range(1,4)]
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
84 # print(vcf_files)
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
85 reference_seq = SeqIO.read(args.reference_file, "fasta")
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
86 reference = str(reference_seq.seq)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
87 # output_file = open(os.path.join(os.path.expanduser("~/Data/fasta/vcf_to_msa"), 'output.fasta'), 'w')
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
88 insertions = {}
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
89 insertion_sites = []
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
90 tree = intervaltree.IntervalTree()
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
91 sequence_names = []
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
92 sequences = {}
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
93 if args.remove_invariant:
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
94 variant_sites = set()
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
95
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
96 vcf_list = []
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
97 if args.vcf_dir:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
98 pathlist = Path(args.vcf_dir).glob('*.vcf')
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
99 for path in pathlist:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
100 vcf_list.append(str(path))
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
101 elif args.vcf_files:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
102 vcf_list = args.vcf_files
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
103
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
104 for i, vcf_descriptor in enumerate(vcf_list):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
105 # print(os.path.basename(vcf_descriptor))
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
106 if '^^^' in vcf_descriptor:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
107 (seqname, vcf_filename) = vcf_descriptor.split('^^^')
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
108 else:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
109 seqname = str(os.path.basename(vcf_descriptor)).rsplit('.vcf', 1)[0]
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
110 vcf_filename = vcf_descriptor
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
111 sequence_names.append(seqname)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
112 sequence = list(reference)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
113 sequences[seqname] = sequence
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
114
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
115 insertions[seqname] = []
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
116 count = 0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
117 for record in vcf.VCFReader(filename=vcf_filename):
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
118 if args.exclude:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
119 if record.CHROM in exclude_trees:
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
120 tree = exclude_trees[record.CHROM]
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
121 if tree.overlaps(record.affected_start, record.affected_end):
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
122 continue
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
123 type = "unknown"
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
124 if record.is_snp and do_snps:
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
125 type = "snp"
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
126 try:
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
127 sequence[record.affected_start] = str(record.alleles[1]) # SNP, simply insert alt allele
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
128 except IndexError as e:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
129 print("snp: Error assigning to {}:{}: {}".format(record.affected_start, record.affected_end, str(e)), file=sys.stderr)
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
130 if args.remove_invariant:
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
131 variant_sites.add(record.affected_start)
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
132 count += 1
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
133 elif record.is_indel:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
134 length = record.affected_end - record.affected_start
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
135 if record.is_deletion and do_deletes:
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
136 type = "del"
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
137 try:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
138 sequence[record.affected_start:record.affected_end] = ['-'] * length
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
139 except IndexError as e:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
140 print("del: Error assigning to {}:{}: {}".format(record.affected_start, record.affected_end, str(e)), file=sys.stderr)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
141 count += 1
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
142 else:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
143 if do_inserts:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
144 print("Warning: insert processing from VCF is dangerously broken", file=sys.stderr)
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
145 type = "ins"
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
146 # insertions[seqname].append(record)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
147 ref = str(record.alleles[0])
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
148 alt = str(record.alleles[1])
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
149 # print("ins", alt.startswith(ref), fuzzysearch(ref, alt), ref, alt, record.affected_start, record.affected_end, len(alt) - len(ref), len(alt), len(ref), record.affected_end - record.affected_start + 1)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
150 alt_sequence = alt[len(ref) - 1:] if alt.startswith(ref) else alt
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
151 insertion_sites.append((record.affected_start, record.affected_end, alt_sequence, seqname))
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
152 # interval = intervaltree.Interval(record.affected_start, record.affected_start + length, data=[seqname])
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
153 # if interval in tree:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
154 # existing_interval = tree[interval.begin:interval.end + 1]
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
155 # start = min([existing_interval.begin, interval.begin])
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
156 # end = max([existing_interval.end, interval.end])
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
157 # tree.remove(existing_interval)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
158 # new_interval = intervaltree.Interval(start, end, existing_interval.data + interval.data)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
159 # tree.add(new_interval)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
160
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
161 if args.remove_invariant:
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
162 reference_str = ''.join([c for (i, c) in enumerate(reference) if i in variant_sites])
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
163 reference_seq_variant = SeqRecord(Seq(reference_str), id=reference_seq.id, description=reference_seq.description)
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
164 SeqIO.write(reference_seq_variant, args.output_file, "fasta")
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
165 else:
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
166 SeqIO.write(reference_seq, args.output_file, "fasta")
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
167
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
168 offset = 0
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
169 sequence_length = 0
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
170 for name in sequence_names:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
171 sequence = sequences[name]
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
172 sequence_length = len(sequence)
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
173 for site in sorted(insertion_sites, key=itemgetter(0)):
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
174 (start, end, allele, seqname) = site
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
175 # print(start, allele, seqname)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
176 length = len(allele)
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
177 # start += offset
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
178 # end += offset
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
179 # offset += length
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
180 try:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
181 if name == seqname:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
182 sequence[start:end] = list(str(allele))
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
183 else:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
184 sequence[start:end] = ['-'] * length
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
185 except IndexError as e:
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
186 print("ins: Error assigning to {}:{}: {}".format(start, end, str(e)), file=sys.stderr)
3
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
187
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
188 if args.remove_invariant:
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
189 seq_str = ''.join([c for (i, c) in enumerate(sequence) if i in variant_sites])
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
190 else:
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
191 seq_str = ''.join(sequence)
62fbd3f96b30 planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 8a59904b63da8bcb647c8afbc2a88070c51a697e
sanbi-uwc
parents: 2
diff changeset
192 SeqIO.write(SeqRecord(Seq(seq_str, alphabet=IUPAC.ambiguous_dna), id=name, description=""), args.output_file, "fasta")
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
193
4
f58178c0f00d planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
sanbi-uwc
parents: 3
diff changeset
194 # output.write(bytes("\t".join([type, str(record.affected_start), str(record.affected_end), str(record.alleles[0]), str(record.alleles[1])])+"\n", encoding="ascii"))
0
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
195 # output.close()
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
196
cc255feec53b planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff changeset
197 args.output_file.close()