annotate vcf2hrdetect.py @ 6:0dc7ea74bf61 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 5f62b58c65917afbb018296568c81ad132106012
author artbio
date Wed, 05 Apr 2023 15:55:36 +0000
parents 41704ce96c7f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
1 import sys
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
2
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
3 handle = open(sys.argv[1], 'r')
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
4 vcfdict = dict()
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
5 tabdict = dict()
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
6 for line in handle:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
7 if line[0] == "#":
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
8 continue
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
9 else:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
10 tabline = line[:-1].split("\t")
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
11 vcfdict[tabline[2]] = tabline
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
12 for id in vcfdict.keys():
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
13 if "_1" in id:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
14 newid = id[:-2]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
15 pointbreak = vcfdict[id][4]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
16 if "]" in pointbreak:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
17 coordbreak = pointbreak.split("]")[1].split(":")[1]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
18 chrom = pointbreak.split("]")[1].split(":")[0]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
19 elif "[" in pointbreak:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
20 coordbreak = pointbreak.split("[")[1].split(":")[1]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
21 chrom = pointbreak.split("[")[1].split(":")[0]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
22 if vcfdict[id][0] == chrom:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
23 tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
24 else:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
25 tabdict[newid] = [vcfdict[id][0], vcfdict[id][1],
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
26 chrom, coordbreak, "TRA"]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
27 for id in list(vcfdict):
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
28 if "_" in id:
6
0dc7ea74bf61 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 5f62b58c65917afbb018296568c81ad132106012
artbio
parents: 4
diff changeset
29 del vcfdict[id]
4
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
30 for id in vcfdict.keys(): # only sv that are not of type TRA or INV
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
31 chr1 = vcfdict[id][0]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
32 chr2 = vcfdict[id][0]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
33 pos1 = vcfdict[id][1]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
34 pos2 = vcfdict[id][7].split("END=")[1].split(";")[0]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
35 type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
36 tabdict[id] = [chr1, pos1, chr2, pos2, type]
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
37 out = open(sys.argv[2], 'w')
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
38 out.write("chr1\tpos1\tchr2\tpos2\ttype\n")
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
39 for key in tabdict:
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
40 line = "\t".join(tabdict[key]) + "\n"
41704ce96c7f "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff changeset
41 out.write(line)