Mercurial > repos > artbio > lumpy_smoove
annotate vcf2hrdetect.py @ 4:41704ce96c7f draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
| author | artbio |
|---|---|
| date | Sun, 18 Apr 2021 17:07:32 +0000 |
| parents | |
| children | 0dc7ea74bf61 |
| rev | line source |
|---|---|
|
4
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
1 import sys |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
2 |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
3 handle = open(sys.argv[1], 'r') |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
4 vcfdict = dict() |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
5 tabdict = dict() |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
6 for line in handle: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
7 if line[0] == "#": |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
8 continue |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
9 else: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
10 tabline = line[:-1].split("\t") |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
11 vcfdict[tabline[2]] = tabline |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
12 for id in vcfdict.keys(): |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
13 if "_1" in id: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
14 newid = id[:-2] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
15 pointbreak = vcfdict[id][4] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
16 if "]" in pointbreak: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
17 coordbreak = pointbreak.split("]")[1].split(":")[1] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
18 chrom = pointbreak.split("]")[1].split(":")[0] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
19 elif "[" in pointbreak: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
20 coordbreak = pointbreak.split("[")[1].split(":")[1] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
21 chrom = pointbreak.split("[")[1].split(":")[0] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
22 if vcfdict[id][0] == chrom: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
23 tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
24 else: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
25 tabdict[newid] = [vcfdict[id][0], vcfdict[id][1], |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
26 chrom, coordbreak, "TRA"] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
27 for id in list(vcfdict): |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
28 if "_" in id: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
29 del(vcfdict[id]) |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
30 for id in vcfdict.keys(): # only sv that are not of type TRA or INV |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
31 chr1 = vcfdict[id][0] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
32 chr2 = vcfdict[id][0] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
33 pos1 = vcfdict[id][1] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
34 pos2 = vcfdict[id][7].split("END=")[1].split(";")[0] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
35 type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
36 tabdict[id] = [chr1, pos1, chr2, pos2, type] |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
37 out = open(sys.argv[2], 'w') |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
38 out.write("chr1\tpos1\tchr2\tpos2\ttype\n") |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
39 for key in tabdict: |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
40 line = "\t".join(tabdict[key]) + "\n" |
|
41704ce96c7f
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 558dc75a7a265e2b3572d9ce938d9f9f5673abda"
artbio
parents:
diff
changeset
|
41 out.write(line) |
