Mercurial > repos > sanbi-uwc > vcf_to_alignment
annotate vcf_to_msa.py @ 0:cc255feec53b draft
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
author | sanbi-uwc |
---|---|
date | Wed, 01 Feb 2017 06:56:24 -0500 |
parents | |
children | a0c85f2d74a5 |
rev | line source |
---|---|
0
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
2 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
3 import argparse |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
4 import sys |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
5 from Bio import SeqIO |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
6 from Bio.SeqRecord import SeqRecord |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
7 from Bio.Seq import Seq |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
8 from Bio.Alphabet import IUPAC |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
9 import os.path |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
10 import vcf |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
11 import intervaltree |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
12 from operator import itemgetter |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
13 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
14 difference = lambda x,y: 0 if x == y else 1 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
15 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
16 string_difference = lambda query, target, query_len: sum((difference(query[i], target[i])) for i in range(query_len)) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
17 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
18 def fuzzysearch(query, target): |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
19 query_len = len(query) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
20 target_len = len(target) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
21 assert query_len <= target_len, "query cannot be longer than target" |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
22 min_distance = string_difference(query, target, query_len) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
23 best_pos = 0 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
24 for i in range(0, target_len - query_len + 1): |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
25 distance = string_difference(query, target[i:i+query_len], query_len) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
26 if distance < min_distance: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
27 (min_distance, best_pos) = (distance, i) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
28 return best_pos |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
29 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
30 parser = argparse.ArgumentParser() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
31 parser.add_argument('--vcf_files', nargs="+") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
32 parser.add_argument('--reference_file', type=argparse.FileType()) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
33 parser.add_argument('--output_file', type=argparse.FileType('w')) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
34 args = parser.parse_args() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
35 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
36 do_inserts = False |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
37 do_deletes = False |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
38 do_snps = True |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
39 # reference = str(SeqIO.read(os.path.expanduser("~/Data/fasta/NC_000962.fna"), "fasta").seq) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
40 # print(reference, file=open('/tmp/reference.txt', 'w')) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
41 # vcf_files_dir = os.path.expanduser("~/Data/vcf") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
42 # vcf_files = [os.path.join(vcf_files_dir, "vcf{}.vcf".format(num)) for num in range(1,4)] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
43 # print(vcf_files) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
44 reference_seq = SeqIO.read(args.reference_file, "fasta") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
45 reference = str(reference_seq.seq) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
46 # output_file = open(os.path.join(os.path.expanduser("~/Data/fasta/vcf_to_msa"), 'output.fasta'), 'w') |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
47 insertions = {} |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
48 insertion_sites = [] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
49 tree = intervaltree.IntervalTree() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
50 sequence_names = [] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
51 sequences = {} |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
52 for i, vcf_descriptor in enumerate(args.vcf_files): |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
53 # seqname = os.path.splitext(os.path.basename(vcf_filename))[0] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
54 (seqname,vcf_filename) = vcf_descriptor.split('^^^') |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
55 sequence_names.append(seqname) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
56 sequence = list(reference) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
57 sequences[seqname] = sequence |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
58 print(seqname) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
59 # tsv_filename = vcf_filename.replace(".vcf", ".tsv") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
60 # output = open(tsv_filename, "wb") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
61 insertions[seqname] = [] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
62 count = 0 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
63 for record in vcf.VCFReader(filename=vcf_filename): |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
64 type="unknown" |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
65 if record.is_snp and do_snps: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
66 type="snp" |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
67 try: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
68 sequence[record.affected_start] = str(record.alleles[1]) # SNP, simply insert alt allele |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
69 except IndexError as e: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
70 print("snp: Error assigning to {}:{}: {}".format(record.affected_start, record.affected_end, str(e)), file=sys.stderr) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
71 count += 1 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
72 elif record.is_indel: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
73 length = record.affected_end - record.affected_start |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
74 if record.is_deletion and do_deletes: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
75 type="del" |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
76 try: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
77 sequence[record.affected_start:record.affected_end] = ['-'] * length |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
78 except IndexError as e: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
79 print("del: Error assigning to {}:{}: {}".format(record.affected_start, record.affected_end, str(e)), file=sys.stderr) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
80 count += 1 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
81 else: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
82 if do_inserts: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
83 print("Warning: insert processing from VCF is dangerously broken", file=sys.stderr) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
84 type="ins" |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
85 # insertions[seqname].append(record) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
86 ref = str(record.alleles[0]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
87 alt = str(record.alleles[1]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
88 # print("ins", alt.startswith(ref), fuzzysearch(ref, alt), ref, alt, record.affected_start, record.affected_end, len(alt) - len(ref), len(alt), len(ref), record.affected_end - record.affected_start + 1) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
89 alt_sequence = alt[len(ref) - 1:] if alt.startswith(ref) else alt |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
90 insertion_sites.append((record.affected_start, record.affected_end, alt_sequence, seqname)) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
91 # interval = intervaltree.Interval(record.affected_start, record.affected_start + length, data=[seqname]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
92 # if interval in tree: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
93 # existing_interval = tree[interval.begin:interval.end + 1] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
94 # start = min([existing_interval.begin, interval.begin]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
95 # end = max([existing_interval.end, interval.end]) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
96 # tree.remove(existing_interval) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
97 # new_interval = intervaltree.Interval(start, end, existing_interval.data + interval.data) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
98 # tree.add(new_interval) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
99 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
100 SeqIO.write(reference_seq, args.output_file, "fasta") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
101 offset = 0 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
102 for name in sequence_names: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
103 sequence = sequences[name] |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
104 for site in sorted(insertion_sites, key=itemgetter(0)): |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
105 (start, end, allele, seqname) = site |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
106 # print(start, allele, seqname) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
107 length = len(allele) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
108 # start += offset |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
109 # end += offset |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
110 # offset += length |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
111 try: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
112 if name == seqname: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
113 sequence[start:end] = list(str(allele)) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
114 else: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
115 sequence[start:end] = ['-'] * length |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
116 except IndexError as e: |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
117 print("ins: Error assigning to {}:{}: {}".format(start, end, str(e)), file=sys.stderr) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
118 SeqIO.write(SeqRecord(Seq(''.join(sequence), alphabet=IUPAC.ambiguous_dna), id=name, description=""), args.output_file, "fasta") |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
119 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
120 # output.write(bytes("\t".join([type, str(record.affected_start), str(record.affected_end), str(record.alleles[0]), str(record.alleles[1])])+"\n", encoding="ascii")) |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
121 # output.close() |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
122 |
cc255feec53b
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit bc8fd85986b54f9d000e7d5869876fc9e479b6eb
sanbi-uwc
parents:
diff
changeset
|
123 args.output_file.close() |