Mercurial > repos > elixir-it > vep86_vcf2maf
changeset 0:786c9295d2be draft
Uploaded
author | elixir-it |
---|---|
date | Tue, 03 Jul 2018 04:38:21 -0400 |
parents | |
children | 7a54ac4976d0 |
files | test-data/input.vcf test-data/vcftomaf_output.maf test-data/vep_annotate_results.vcf vcf2maf-macros.xml vcf2maf.pl vep-annotate-macros.xml vep-download-cache-macros.xml vep-unico-macros.xml vep_unico.xml |
diffstat | 9 files changed, 1848 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.vcf Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,100 @@ +##fileformat=VCFv4.1 +##fileDate=20180622 +##phasing=none +##reference=file:///export/galaxy/database/files/000/dataset_3.dat +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=IGT,Number=1,Type=String,Description="Genotype when called independently (only filled if called in joint prior mode)"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth"> +##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##FORMAT=<ID=BCOUNT,Number=4,Type=Integer,Description="Occurrence count for each base at this site (A,C,G,T)"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> +##FORMAT=<ID=JGQ,Number=1,Type=Integer,Description="Joint genotype quality (only filled if called in join prior mode)"> +##FORMAT=<ID=VAQ,Number=1,Type=Integer,Description="Variant allele quality"> +##FORMAT=<ID=BQ,Number=.,Type=Integer,Description="Average base quality"> +##FORMAT=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality across all reads"> +##FORMAT=<ID=AMQ,Number=.,Type=Integer,Description="Average mapping quality for each allele present in the genotype"> +##FORMAT=<ID=SS,Number=1,Type=Integer,Description="Variant status relative to non-adjacent Normal, 0=wildtype,1=germline,2=somatic,3=LOH,4=unknown"> +##FORMAT=<ID=SSC,Number=1,Type=Integer,Description="Somatic Score"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR +chr1 133129 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:376:187,124,29,36:65,0,311,0:161:.:0:32:5:4:0:. 0/1:0/1:907:361,249,237,60:296,1,610,0:214:.:214:29,32:24:22,25:2:218 +chr1 133160 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:351:158,147,16,30:46,0,305,0:145:.:0:33:13:12:0:. 0/1:0/1:871:370,354,61,86:145,0,724,2:30:.:30:32,33:35:36,35:2:57 +chr1 133483 . G T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:388:203,111,46,28:2,0,314,72:1:.:0:29:1:0:0:. 0/1:0/1:1101:577,245,160,119:1,0,822,278:41:.:41:29,30:11:10,14:2:71 +chr1 1250144 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:10:4,6,0,0:0,0,0,10:57:.:0:29:22:22:0:. 0/1:0/1:18:9,6,0,3:0,3,0,15:6:.:6:33,29:52:35,55:2:28 +chr1 2280653 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:55:24,16,2,13:40,0,15,0:55:.:0:25:15:19:0:. 0/1:0/1:70:15,19,8,28:34,0,36,0:118:.:118:25,30:28:41,15:2:28 +chr1 2280736 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:2,6,0,0:8,0,0,0:51:.:0:31:18:18:0:. 0/1:0/1:8:2,3,0,3:5,0,3,0:19:.:19:27,31:25:20,34:2:24 +chr1 11522650 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:4,2,0,0:0,0,6,0:18:.:0:30:1:1:0:. 1/1:1/1:2:0,1,0,1:1,0,1,0:3:.:3:33:35:40:2:18 +chr1 57636120 . C A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:34:14,12,8,0:8,26,0,0:55:.:0:21:54:60:0:. 0/1:0/1:44:16,12,16,0:16,28,0,0:1:.:1:14,21:56:51,59:2:25 +chr1 72135390 . C A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:53:36,7,0,10:10,43,0,0:53:.:0:24:27:26:0:. 0/1:0/1:90:60,4,3,23:26,64,0,0:57:.:57:23,24:51:45,54:2:26 +chr1 116600445 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:24:0,22,0,2:2,0,22,0:55:.:0:33:32:34:0:. 0/1:0/1:30:0,20,0,10:10,0,20,0:42:.:42:32,33:48:27,58:2:28 +chr1 116600446 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:22:0,22,0,0:22,0,0,0:93:.:0:30:34:34:0:. 0/1:0/1:29:0,20,0,9:20,0,9,0:28:.:28:30,32:48:58,25:2:55 +chr1 125167865 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:4:3,1,0,0:0,0,0,4:14:.:0:29:0:0:0:. 1/1:1/1:1:0,0,0,1:0,1,0,0:22:.:22:34:22:22:2:16 +chr1 143192820 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:4,2,0,0:0,0,6,0:18:.:0:30:0:0:0:. 1/1:1/1:2:0,1,1,0:1,0,1,0:26:.:26:32:20:40:2:19 +chr1 143192836 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:4,2,0,0:0,6,0,0:18:.:0:30:0:0:0:. 1/1:1/1:2:0,1,1,0:0,1,0,1:25:.:25:31:20:40:2:18 +chr1 147517696 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:33:60:60:0:. 0/1:0/1:9:1,4,1,3:0,5,0,4:63:.:63:33,32:60:60,60:2:18 +chr1 148359371 . C A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:8,0,0,0:0,8,0,0:51:.:0:32:38:38:0:. 0/1:0/1:15:12,0,0,3:3,12,0,0:3:.:3:29,33:55:60,54:2:23 +chr1 149390829 . C A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:33:0,12,3,18:20,12,0,1:2:.:0:27:3:8:0:. 0/1:0/1:39:1,22,1,15:16,23,0,0:20:.:20:31,25:33:12,48:2:25 +chr1 151171163 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:0,2,0,0:2,0,0,0:33:.:0:33:60:60:0:. 1/1:1/1:1:0,0,0,1:0,0,1,0:30:.:30:32:60:60:2:16 +chr1 152303901 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:563:229,216,52,66:1,445,117,0:11:.:0:32:0:0:0:. 0/1:0/1:2572:939,1036,377,220:2,1975,595,0:93:.:93:32,32:15:16,12:2:120 +chr1 152303983 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:614:273,243,43,55:0,516,0,98:73:.:0:33:3:3:0:. 0/1:0/1:2851:1013,1052,312,474:0,2065,0,786:228:.:228:33,31:28:29,27:2:247 +chr1 152304107 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:394:195,173,13,13:0,368,26,0:255:.:0:33:18:20:0:. 0/1:0/1:1708:624,733,200,151:0,1357,351,0:155:.:155:33,32:41:47,18:2:182 +chr1 152304122 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:389:186,184,5,14:19,0,370,0:255:.:0:29:23:25:0:. 0/1:0/1:1605:602,679,166,158:324,0,1281,0:114:.:114:30,29:47:16,55:2:141 +chr1 152304150 . G C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:433:164,184,41,44:0,85,348,0:218:.:0:30:24:29:0:. 0/1:0/1:1470:533,682,135,120:0,255,1215,0:116:.:116:33,30:53:22,59:2:143 +chr1 152306079 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:578:205,203,83,87:0,170,0,408:2:.:0:31:0:0:0:. 0/1:0/1:3958:1466,1740,451,301:0,750,2,3206:203:.:203:33,31:14:19,12:2:63 +chr1 152306338 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:411:137,165,68,41:0,302,0,109:135:.:0:30:22:27:0:. 0/1:0/1:4985:2106,2070,462,347:3,4176,2,804:228:.:228:30,32:50:54,31:2:108 +chr1 152306380 . T G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:487:206,225,32,24:0,0,56,431:203:.:0:31:10:9:0:. 0/1:0/1:5056:2043,2014,517,482:1,0,998,4057:228:.:228:29,32:36:39,35:2:228 +chr1 152307694 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:537:220,192,50,75:0,412,125,0:57:.:0:33:1:1:0:. 0/1:0/1:2374:820,645,307,602:0,1465,909,0:228:.:228:32,31:22:19,27:2:145 +chr1 152307789 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:574:227,204,65,78:1,431,142,0:7:.:0:32:0:0:0:. 0/1:0/1:2091:717,763,159,452:2,1480,609,0:228:.:228:32,31:14:13,17:2:111 +chr1 152307871 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:445:209,207,16,13:2,416,0,27:80:.:0:33:6:6:0:. 0/1:0/1:1915:846,652,315,102:1,1498,1,415:228:.:228:33,30:28:29,23:2:250 +chr1 152307896 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:367:184,183,0,0:0,0,367,0:114:.:0:29:9:9:0:. 0/1:0/1:1801:593,587,615,6:620,0,1180,1:228:.:228:31,29:34:26,39:2:250 +chr1 152307995 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:429:173,167,41,48:0,340,89,0:135:.:0:33:15:17:0:. 0/1:0/1:3096:1236,990,353,517:1,2226,869,0:228:.:228:33,32:44:49,34:2:108 +chr1 152308083 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:422:192,182,18,30:2,46,0,374:255:.:0:31:36:41:0:. 0/1:0/1:2559:1011,1235,84,229:2,311,0,2246:64:.:64:28,31:55:26,59:2:91 +chr1 152308424 . T G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:422:170,175,45,32:0,0,77,345:64:.:0:32:37:39:0:. 0/1:0/1:3124:1059,1130,507,428:0,2,933,2189:228:.:228:29,31:55:48,58:2:37 +chr1 152308814 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:424:122,174,86,42:296,1,127,0:88:.:0:31:21:27:0:. 0/1:0/1:3521:1103,1235,588,595:2338,1,1182,0:228:.:228:30,29:52:58,40:2:61 +chr1 152310807 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:394:172,150,22,50:2,322,0,70:51:.:0:30:40:40:0:. 0/1:0/1:2461:670,644,573,574:4,1314,3,1140:228:.:228:30,30:53:60,46:2:24 +chr1 158607804 . C A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:0,8,0,0:0,8,0,0:51:.:0:33:60:60:0:. 0/1:0/1:10:1,7,1,1:2,8,0,0:6:.:6:31,33:60:60,60:2:23 +chr1 161363118 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:171:24,101,8,38:0,125,0,46:77:.:0:33:10:9:0:. 0/1:0/1:326:37,152,29,108:0,189,0,137:228:.:228:33,31:35:33,37:2:50 +chr1 161444478 . T A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:90:27,33,21,9:30,0,0,60:47:.:0:31:4:4:0:. 0/1:0/1:85:39,15,25,6:31,0,0,54:61:.:61:31,31:13:20,9:2:20 +chr1 161444486 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:86:23,33,21,9:56,0,30,0:43:.:0:30:4:5:0:. 0/1:0/1:73:29,13,25,6:42,0,31,0:74:.:74:29,26:15:12,20:2:16 +chr1 161444488 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:88:22,35,21,10:57,0,31,0:42:.:0:30:4:5:0:. 0/1:0/1:74:30,13,24,7:43,0,31,0:72:.:72:29,28:15:12,19:2:15 +chr1 161444514 . T A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:99:10,60,19,10:29,0,0,70:54:.:0:29:3:3:0:. 0/1:0/1:70:10,25,22,13:35,0,0,35:98:.:153:29,29:17:21,12:2:33 +chr1 161444527 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:125:13,90,12,10:0,103,22,0:6:.:0:32:2:1:0:. 0/1:0/1:74:15,38,12,9:0,53,21,0:85:.:85:32,31:16:14,23:2:109 +chr1 178725267 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:10:10,0,0,0:0,0,10,0:57:.:0:28:56:56:0:. 0/1:0/1:10:7,0,1,2:3,0,7,0:25:.:25:28,28:60:60,60:2:30 +chr1 203166324 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:6,0,0,0:0,6,0,0:45:.:0:34:60:60:0:. 0/1:0/1:8:6,0,2,0:0,6,0,2:11:.:11:33,33:60:60,60:2:18 +chr1 222468242 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:61:19,37,0,5:0,5,0,56:21:.:0:31:4:4:0:. 1/1:1/1:84:4,4,17,59:0,76,0,8:116:.:240:26:33:34:2:21 +chr1 223947556 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:91:64,7,20,0:20,0,71,0:1:.:0:28:0:0:0:. 0/1:0/1:65:42,3,19,1:20,0,45,0:26:.:38:32,28:16:26,11:2:57 +chr1 223947588 . G T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:117:85,18,7,7:0,0,103,14:1:.:0:32:0:0:0:. 1/1:1/1:87:39,24,20,4:0,0,63,24:11:.:60:29:11:28:2:20 +chr1 223947978 . G T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:254:171,20,53,10:0,0,191,63:1:.:0:32:1:2:0:. 0/1:0/1:430:205,20,192,13:0,0,225,205:225:.:225:32,30:35:39,31:2:134 +chr1 223948062 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:304:132,66,44,62:104,0,198,2:62:.:0:29:20:30:0:. 0/1:0/1:1124:380,206,360,178:537,0,586,1:228:.:228:31,29:47:36,58:2:35 +chr1 223948558 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:40:8,27,1,4:0,35,0,5:1:.:0:28:1:1:0:. 0/1:0/1:154:10,64,21,59:0,74,0,80:176:.:220:28,31:22:23,22:2:49 +chr1 223949699 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:189:94,95,0,0:0,0,0,189:28:.:0:30:1:1:0:. 0/1:0/1:370:104,102,99,65:0,164,0,206:206:.:206:30,31:25:18,30:2:233 +chr1 223950486 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:79:14,4,49,12:0,18,0,61:58:.:0:30:10:31:0:. 0/1:0/1:115:19,6,52,38:0,25,0,90:73:.:177:30,30:31:56,25:2:31 +chr1 227976709 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:10:8,2,0,0:0,0,0,10:22:.:0:25:1:1:0:. 1/1:1/1:91:3,1,82,5:0,86,1,4:25:.:25:32:3:2:2:20 +chr1 227977309 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:170:101,30,27,12:131,0,38,1:116:.:0:31:9:11:0:. 0/1:0/1:461:185,51,194,31:236,0,225,0:228:.:228:31,28:38:48,27:2:113 +chr1 234978406 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:2,0,0,0:0,2,0,0:33:.:0:32:60:60:0:. 1/1:1/1:1:0,0,1,0:0,0,1,0:12:.:12:12:60:60:2:15 +chr1 234978426 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:2,0,0,0:0,2,0,0:33:.:0:31:60:60:0:. 1/1:1/1:1:0,0,1,0:0,0,1,0:30:.:30:35:60:60:2:16 +chr1 234978429 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:2,0,0,0:0,0,0,2:33:.:0:34:60:60:0:. 1/1:1/1:1:0,0,1,0:0,1,0,0:25:.:25:25:60:60:2:16 +chr1 240207797 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:161:61,72,12,16:26,2,133,0:59:.:0:28:20:18:0:. 0/1:0/1:245:67,113,13,52:64,1,180,0:34:.:34:30,28:38:47,35:2:32 +chr1 240207800 . C A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:152:57,69,12,14:22,126,4,0:95:.:0:30:18:17:0:. 0/1:0/1:225:60,107,14,44:53,167,5,0:8:.:8:30,29:37:45,34:2:35 +chr1 240207806 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:144:57,65,10,12:1,122,0,21:114:.:0:32:15:14:0:. 0/1:0/1:216:59,105,12,40:1,164,0,51:13:.:13:31,29:34:31,45:2:40 +chr1 240207812 . G A,T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/2:0/2:124:22,15,33,54:12,8,37,67:37:.:69:21,26:10:5,12:1:. 1/2:1/2:185:12,4,50,119:31,10,16,128:95:.:165:27,26:28:38,27:2:69 +chr1 240208175 . T G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:125:51,50,10,14:0,7,17,101:155:.:0:28:31:36:0:. 0/1:0/1:146:64,47,12,23:0,5,30,111:2:.:2:29,27:47:24,53:2:29 +chr1 240472659 . G T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,0,6,0:45:.:0:25:60:60:0:. 0/1:0/1:7:0,5,0,2:0,0,5,2:8:.:8:28,30:60:60,60:2:18 +chr1 242810191 . T G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:71:26,30,3,12:0,0,15,56:88:.:0:30:18:22:0:. 0/1:0/1:171:59,40,54,18:0,0,72,99:219:.:219:31,30:39:35,42:2:61 +chr1 242810198 . T G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:79:24,34,3,18:0,0,21,58:76:.:0:30:16:21:0:. 0/1:0/1:177:57,38,54,28:0,0,82,95:228:.:228:30,30:39:35,42:2:49 +chr1 243052907 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:7:0,7,0,0:0,7,0,0:15:.:0:25:0:0:0:. 0/1:0/1:17:0,11,0,6:0,11,0,6:23:.:23:28,31:25:26,24:2:21 +chr1 244891249 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:30:60:60:0:. 0/1:0/1:7:0,5,0,2:0,5,0,2:4:.:4:22,28:54:60,40:2:18 +chr2 3035112 . T C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:263:82,120,30,31:0,59,2,202:64:.:0:29:49:57:0:. 0/1:0/1:559:136,265,60,98:0,157,1,401:167:.:167:27,30:53:35,60:2:37 +chr2 3329166 . G C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:66:24,30,6,6:0,12,54,0:73:.:0:31:28:32:0:. 0/1:0/1:64:19,28,10,7:0,13,47,4:19:.:19:28,31:39:23,47:2:43 +chr2 3329223 . G C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:34:11,16,5,2:0,7,27,0:51:.:0:32:34:43:0:. 0/1:0/1:36:9,13,6,8:0,7,22,7:0:.:76:30,31:42:29,51:2:27 +chr2 3329230 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:30:9,12,7,2:7,0,21,2:44:.:0:32:35:46:0:. 0/1:0/1:31:8,10,5,8:12,0,18,1:116:.:116:30,31:44:31,52:2:17 +chr2 3329243 . A G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:29:13,14,0,2:27,0,2,0:68:.:0:30:34:36:0:. 0/1:0/1:31:9,11,2,9:20,0,11,0:59:.:59:30,31:39:48,22:2:41 +chr2 3775131 . G C . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:172:79,50,25,18:0,43,129,0:117:.:0:30:33:42:0:. 0/1:0/1:366:184,70,41,71:0,112,254,0:48:.:48:32,30:39:21,47:2:75 +chr2 10080680 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:6,0,0,0:0,6,0,0:45:.:0:34:60:60:0:. 0/1:0/1:5:3,0,2,0:0,3,0,2:18:.:18:34,32:60:60,60:2:18 +chr2 19985338 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:41:18,5,0,18:2,23,0,16:45:.:0:32:17:24:0:. 0/1:0/1:32:15,5,0,12:1,20,0,11:14:.:14:32,28:23:27,15:2:18 +chr2 39404606 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:24:40:40:0:. 0/1:0/1:10:0,6,0,4:0,6,0,4:30:.:30:28,26:50:57,39:2:18 +chr2 39404609 . C T . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:31:40:40:0:. 0/1:0/1:9:0,5,0,4:0,5,0,4:7:.:7:32,17:49:56,39:2:18 +chr2 68306124 . C A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:8,0,0,0:0,8,0,0:51:.:0:32:60:60:0:. 0/1:0/1:8:6,0,0,2:2,6,0,0:3:.:3:29,32:57:50,59:2:23 +chr2 86942193 . T G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:279:133,92,27,27:0,0,54,225:31:.:0:32:2:2:0:. 0/1:0/1:771:252,262,121,136:0,1,256,514:228:.:228:31,31:17:21,16:2:213 +chr2 86942348 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:141:12,100,4,25:0,112,29,0:12:.:0:26:2:2:0:. 0/1:0/1:265:11,176,5,73:0,187,78,0:11:.:11:26,32:26:25,27:2:38 +chr2 86942392 . C G . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:21:0,16,0,5:0,16,5,0:1:.:0:23:0:0:0:. 0/1:0/1:41:0,32,0,9:0,32,9,0:12:.:12:27,29:19:15,31:2:21 +chr2 89625699 . G A . . . GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:201:108,46,32,15:47,0,154,0:1:.:0:32:0:0:0:. 0/1:0/1:393:143,74,110,66:176,0,217,0:141:.:146:30,32:8:11,7:2:125
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/vcftomaf_output.maf Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,84 @@ +#version 2.4 +Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS GENE_PHENO FILTER flanking_bps variant_id variant_qual ExAC_AF_Adj ExAC_AC_AN_Adj ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_FILTER gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF +CICP27 0 . GRCh38 chr1 133129 133129 + RNA SNP G G A rs367730352 TUMOR NORMAL G G n.2105G>A ENST00000442987 1/1 907 610 296 376 311 65 RP11-34P13.7,intron_variant,,ENST00000610542,;RP11-34P13.7,intron_variant,,ENST00000453576,;RP11-34P13.7,upstream_gene_variant,,ENST00000471248,;RP11-34P13.7,upstream_gene_variant,,ENST00000477740,;CICP27,non_coding_transcript_exon_variant,,ENST00000442987,;RP11-34P13.15,downstream_gene_variant,,ENST00000494149,;RP11-34P13.16,downstream_gene_variant,,ENST00000595919,; A ENSG00000233750 ENST00000442987 Transcript non_coding_transcript_exon_variant,non_coding_transcript_variant 2105/3812 rs367730352 1 1 CICP27 HGNC HGNC:48835 processed_pseudogene YES 1/1 MODIFIER 1 SNV . TGT . . +CICP27 0 . GRCh38 chr1 133160 133160 + RNA SNP G G A rs371468694 TUMOR NORMAL G G n.2136G>A ENST00000442987 1/1 871 724 145 351 305 46 RP11-34P13.7,intron_variant,,ENST00000610542,;RP11-34P13.7,intron_variant,,ENST00000453576,;RP11-34P13.7,upstream_gene_variant,,ENST00000471248,;RP11-34P13.7,upstream_gene_variant,,ENST00000477740,;CICP27,non_coding_transcript_exon_variant,,ENST00000442987,;RP11-34P13.15,downstream_gene_variant,,ENST00000494149,;RP11-34P13.16,downstream_gene_variant,,ENST00000595919,; A ENSG00000233750 ENST00000442987 Transcript non_coding_transcript_exon_variant,non_coding_transcript_variant 2136/3812 rs371468694 1 1 CICP27 HGNC HGNC:48835 processed_pseudogene YES 1/1 MODIFIER 1 SNV . GGG . . +CICP27 0 . GRCh38 chr1 133483 133483 + RNA SNP G G T rs369820305 TUMOR NORMAL G G n.2459G>T ENST00000442987 1/1 1101 822 278 388 314 72 RP11-34P13.7,non_coding_transcript_exon_variant,,ENST00000610542,;RP11-34P13.7,non_coding_transcript_exon_variant,,ENST00000453576,;RP11-34P13.7,upstream_gene_variant,,ENST00000471248,;RP11-34P13.7,upstream_gene_variant,,ENST00000477740,;CICP27,non_coding_transcript_exon_variant,,ENST00000442987,;RP11-34P13.15,downstream_gene_variant,,ENST00000494149,;RP11-34P13.16,downstream_gene_variant,,ENST00000595919,; T ENSG00000233750 ENST00000442987 Transcript non_coding_transcript_exon_variant,non_coding_transcript_variant 2459/3812 rs369820305 1 1 CICP27 HGNC HGNC:48835 processed_pseudogene YES 1/1 MODIFIER 1 SNV . CGG . . +FAM132A 0 . GRCh38 chr1 1250144 1250144 + 5'Flank SNP T T C rs115270700 TUMOR NORMAL T T ENST00000330388 18 15 3 10 10 0 UBE2J2,downstream_gene_variant,,ENST00000347370,NM_194458.1;UBE2J2,downstream_gene_variant,,ENST00000349431,NM_058167.2;UBE2J2,downstream_gene_variant,,ENST00000400929,NM_194457.1;FAM132A,upstream_gene_variant,,ENST00000330388,NM_001014980.2;RP5-902P8.12,non_coding_transcript_exon_variant,,ENST00000565563,;UBE2J2,downstream_gene_variant,,ENST00000450390,;UBE2J2,downstream_gene_variant,,ENST00000464036,;,regulatory_region_variant,,ENSR00000000162,; C ENSG00000184163 ENST00000330388 Transcript upstream_gene_variant -/1036 -/909 -/302 rs115270700 1 3422 -1 FAM132A HGNC HGNC:32308 protein_coding YES CCDS30554.1 ENSP00000329137 Q5T7M4 UPI00001D7E7A NM_001014980.2 C:0.0877 C:0.0613 C:0.0504 C:0.1647 C:0.0666 C:0.0920 MODIFIER 1 SNV 1 . ATA . . +SKI 0 . GRCh38 chr1 2280653 2280653 + Intron SNP A A G rs186058331 TUMOR NORMAL A A c.970-22325A>G ENST00000378536 70 34 36 55 40 15 SKI,intron_variant,,ENST00000378536,NM_003036.3;SKI,intron_variant,,ENST00000478223,;SKI,intron_variant,,ENST00000508416,; G ENSG00000157933 ENST00000378536 Transcript intron_variant -/5613 -/2187 -/728 rs186058331 1 1 SKI HGNC HGNC:10896 protein_coding YES CCDS39.1 ENSP00000367797 P12755 UPI00001359DE NM_003036.3 1/6 MODIFIER 1 SNV 1 1 . CAC . . +SKI 0 . GRCh38 chr1 2280736 2280736 + Intron SNP A A G novel TUMOR NORMAL A A c.970-22242A>G ENST00000378536 8 5 3 8 8 0 SKI,intron_variant,,ENST00000378536,NM_003036.3;SKI,intron_variant,,ENST00000478223,;SKI,intron_variant,,ENST00000508416,; G ENSG00000157933 ENST00000378536 Transcript intron_variant -/5613 -/2187 -/728 1 1 SKI HGNC HGNC:10896 protein_coding YES CCDS39.1 ENSP00000367797 P12755 UPI00001359DE NM_003036.3 1/6 MODIFIER 1 SNV 1 1 . GAG . . +DISP3 0 . GRCh38 chr1 11522650 11522650 + Intron SNP G A A rs371097095 TUMOR NORMAL G G c.2363-1292G>A ENST00000294484 2 1 1 6 6 0 DISP3,intron_variant,,ENST00000294484,NM_020780.1;,regulatory_region_variant,,ENSR00000001409,; A ENSG00000204624 ENST00000294484 Transcript intron_variant -/5215 -/4179 -/1392 rs371097095 1 1 DISP3 HGNC HGNC:29251 protein_coding YES CCDS41247.1 ENSP00000294484 Q9P2K9 UPI00001C1D7A NM_020780.1 10/20 MODIFIER 1 SNV 1 . GGG . . +DAB1 0 . GRCh38 chr1 57636120 57636120 + Intron SNP C C A rs12079557 TUMOR NORMAL C C n.625+13472G>T ENST00000485760 44 28 16 34 26 8 DAB1,intron_variant,,ENST00000485760,; A ENSG00000173406 ENST00000485760 Transcript intron_variant,non_coding_transcript_variant -/2668 rs12079557 1 -1 DAB1 HGNC HGNC:2661 processed_transcript 7/20 MODIFIER 1 SNV 2 . ACA . . +NEGR1 0 . GRCh38 chr1 72135390 72135390 + Intron SNP C C A rs796166092 TUMOR NORMAL C C c.176+146929G>T ENST00000357731 90 64 26 53 43 10 NEGR1,intron_variant,,ENST00000357731,NM_173808.2; A ENSG00000172260 ENST00000357731 Transcript intron_variant -/12811 -/1065 -/354 rs796166092 1 -1 NEGR1 HGNC HGNC:17302 protein_coding YES CCDS661.1 ENSP00000350364 Q7Z3B1 UPI00000477EE NM_173808.2 1/6 MODIFIER 1 SNV 1 . ACA . . +IGSF3 0 . GRCh38 chr1 116600445 116600445 + Intron SNP G G A rs59940339 TUMOR NORMAL G G c.1685-100C>T ENST00000369483 30 20 10 24 22 2 IGSF3,intron_variant,,ENST00000369483,NM_001542.3;IGSF3,intron_variant,,ENST00000369486,NM_001007237.2;IGSF3,intron_variant,,ENST00000318837,; A ENSG00000143061 ENST00000369483 Transcript intron_variant -/7326 -/3645 -/1214 rs59940339 1 -1 IGSF3 HGNC HGNC:5950 protein_coding YES CCDS30814.1 ENSP00000358495 O75054 UPI0000140437 NM_001542.3 7/11 A:0.0960 A:0.1271 A:0.1023 A:0.1200 A:0.0467 A:0.0757 MODIFIER 1 SNV 5 1 . GGA . . +IGSF3 0 . GRCh38 chr1 116600446 116600446 + Intron SNP A A G rs57198501 TUMOR NORMAL A A c.1685-101T>C ENST00000369483 29 20 9 22 22 0 IGSF3,intron_variant,,ENST00000369483,NM_001542.3;IGSF3,intron_variant,,ENST00000369486,NM_001007237.2;IGSF3,intron_variant,,ENST00000318837,; G ENSG00000143061 ENST00000369483 Transcript intron_variant -/7326 -/3645 -/1214 rs57198501 1 -1 IGSF3 HGNC HGNC:5950 protein_coding YES CCDS30814.1 ENSP00000358495 O75054 UPI0000140437 NM_001542.3 7/11 G:0.0960 G:0.1271 G:0.1023 G:0.1200 G:0.0467 G:0.0757 MODIFIER 1 SNV 5 1 . GAC . . +ENSR00000012648 0 . GRCh38 chr1 125167865 125167865 + IGR SNP T C C novel TUMOR NORMAL T T ENSR00000012648 1 0 1 4 4 0 ,regulatory_region_variant,,ENSR00000012648,; C ENSR00000012648 RegulatoryFeature regulatory_region_variant 1 TF_binding_site MODIFIER 1 SNV . TTC . . +ENSR00000012650 0 . GRCh38 chr1 143192820 143192820 + IGR SNP G A A novel TUMOR NORMAL G G ENSR00000012650 2 1 1 6 6 0 ,regulatory_region_variant,,ENSR00000012650,; A ENSR00000012650 RegulatoryFeature regulatory_region_variant 1 TF_binding_site MODIFIER 1 SNV . CGA . . +ENSR00000012650 0 . GRCh38 chr1 143192836 143192836 + IGR SNP C T T novel TUMOR NORMAL C C ENSR00000012650 2 1 1 6 6 0 ,regulatory_region_variant,,ENSR00000012650,; T ENSR00000012650 RegulatoryFeature regulatory_region_variant 1 TF_binding_site MODIFIER 1 SNV . ACG . . +LINC00624 0 . GRCh38 chr1 147517696 147517696 + RNA SNP C C T rs12046574 TUMOR NORMAL C C n.180G>A ENST00000621316 1/4 9 5 4 6 6 0 LINC00624,non_coding_transcript_exon_variant,,ENST00000621316,;LINC00624,non_coding_transcript_exon_variant,,ENST00000619867,; T ENSG00000278811 ENST00000621316 Transcript non_coding_transcript_exon_variant,non_coding_transcript_variant 180/3372 rs12046574 1 -1 LINC00624 HGNC HGNC:44254 antisense YES 1/4 T:0.3027 T:0.2042 T:0.4726 T:0.2321 T:0.3569 T:0.3323 MODIFIER 1 SNV 1 . CCT . . +LINC01138 0 . GRCh38 chr1 148359371 148359371 + 3'Flank SNP C C A novel TUMOR NORMAL C C ENST00000622328 15 12 3 8 8 0 LINC01138,downstream_gene_variant,,ENST00000622328,;RP6-74O6.6,upstream_gene_variant,,ENST00000609678,;RNVU1-1,downstream_gene_variant,,ENST00000384610,; A ENSG00000274020 ENST00000622328 Transcript downstream_gene_variant -/2212 1 3624 -1 LINC01138 HGNC HGNC:49454 lincRNA YES MODIFIER 1 SNV 2 . TCA . . +NBPF19 0 . GRCh38 chr1 149390829 149390829 + 5'UTR SNP C C A novel TUMOR NORMAL C C c.-1064C>A ENST00000621744 1/97 39 23 16 33 12 20 NBPF19,5_prime_UTR_variant,,ENST00000621744,;,regulatory_region_variant,,ENSR00000013370,; A ENSG00000271383 ENST00000621744 Transcript 5_prime_UTR_variant 207/14425 -/11532 -/3843 1 1 NBPF19 HGNC HGNC:31999 protein_coding YES ENSP00000478752 A0A087WUL8 UPI000387BE73 1/97 MODIFIER 1 SNV 5 . GCG . . +TMOD4 0 . GRCh38 chr1 151171163 151171163 + Intron SNP A G G novel TUMOR NORMAL A A c.727-100T>C ENST00000295314 1 0 1 2 2 0 TMOD4,intron_variant,,ENST00000295314,NM_013353.2;TMOD4,intron_variant,,ENST00000466891,;SCNM1,downstream_gene_variant,,ENST00000368905,NM_024041.3;SCNM1,downstream_gene_variant,,ENST00000368902,NM_001204856.1;SCNM1,downstream_gene_variant,,ENST00000602841,NM_001204848.1;TMOD4,downstream_gene_variant,,ENST00000441701,;SCNM1,downstream_gene_variant,,ENST00000461862,;SCNM1,downstream_gene_variant,,ENST00000497147,;TMOD4,downstream_gene_variant,,ENST00000601585,;SCNM1,downstream_gene_variant,,ENST00000459799,;SCNM1,downstream_gene_variant,,ENST00000471039,;TMOD4,downstream_gene_variant,,ENST00000488488,;VPS72,intron_variant,,ENST00000491094,;TMOD4,intron_variant,,ENST00000463543,; G ENSG00000163157 ENST00000295314 Transcript intron_variant -/1267 -/1038 -/345 1 -1 TMOD4 HGNC HGNC:11874 protein_coding YES CCDS988.1 ENSP00000295314 Q9NZQ9 UPI00000015C0 NM_013353.2 7/9 MODIFIER 1 SNV 1 . GAG . . +FLG 0 . GRCh38 chr1 152303901 152303901 + Missense_Mutation SNP C C G rs75235053 TUMOR NORMAL C C c.10985G>C p.Ser3662Thr p.S3662T ENST00000368799 3/3 2572 1975 595 563 445 117 FLG,missense_variant,p.Ser3662Thr,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; G ENSG00000143631 ENST00000368799 Transcript missense_variant 11021/12747 10985/12186 3662/4061 S/T aGt/aCt rs75235053 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 deleterious(0.01) unknown(0) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571,Pfam_domain:PF03516 MODERATE 1 SNV 1 1 . ACT . . +FLG 0 . GRCh38 chr1 152303983 152303983 + Missense_Mutation SNP C C T rs75448155 TUMOR NORMAL C C c.10903G>A p.Asp3635Asn p.D3635N ENST00000368799 3/3 2851 2065 786 614 516 98 FLG,missense_variant,p.Asp3635Asn,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; T ENSG00000143631 ENST00000368799 Transcript missense_variant 10939/12747 10903/12186 3635/4061 D/N Gac/Aac rs75448155 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 deleterious(0) unknown(0) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571,Pfam_domain:PF03516 T:0.0268 T:0.103 MODERATE 1 SNV 1 1 . TCT . . +FLG 0 . GRCh38 chr1 152304107 152304107 + Missense_Mutation SNP C C G rs12083389 TUMOR NORMAL C C c.10779G>C p.Glu3593Asp p.E3593D ENST00000368799 3/3 1708 1357 351 394 368 26 FLG,missense_variant,p.Glu3593Asp,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; G ENSG00000143631 ENST00000368799 Transcript missense_variant 10815/12747 10779/12186 3593/4061 E/D gaG/gaC rs12083389 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(1) unknown(0) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 G:0.3614 G:0.4796 G:0.3213 G:0.4732 G:0.1233 G:0.3599 G:0.3795 G:0.0319 MODERATE 1 SNV 1 1 . TCT . . +FLG 0 . GRCh38 chr1 152304122 152304122 + Silent SNP G G A rs12742178 TUMOR NORMAL G G c.10764C>T p.= p.H3588= ENST00000368799 3/3 1605 1281 324 389 370 19 FLG,synonymous_variant,p.=,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; A ENSG00000143631 ENST00000368799 Transcript synonymous_variant 10800/12747 10764/12186 3588/4061 H caC/caT rs12742178 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 A:0.0887 A:0.0227 A:0.0965 A:0.1984 A:0.0199 A:0.1299 LOW 1 SNV 1 1 . CGT . . +FLG 0 . GRCh38 chr1 152304150 152304150 + Missense_Mutation SNP G G C rs3126075 TUMOR NORMAL G G c.10736C>G p.Thr3579Arg p.T3579R ENST00000368799 3/3 1470 1215 255 433 348 85 FLG,missense_variant,p.Thr3579Arg,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; C ENSG00000143631 ENST00000368799 Transcript missense_variant 10772/12747 10736/12186 3579/4061 T/R aCg/aGg rs3126075 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(0.57) unknown(0) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 C:0.4778 C:0.6074 C:0.4395 C:0.6498 C:0.1561 C:0.4836 C:0.2548 C:0.0174 MODERATE 1 SNV 1 1 . CGT . . +FLG 0 . GRCh38 chr1 152306079 152306079 + Missense_Mutation SNP T T C rs80221306 TUMOR NORMAL T T c.8807A>G p.Asp2936Gly p.D2936G ENST00000368799 3/3 3958 3206 750 578 408 170 FLG,missense_variant,p.Asp2936Gly,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; C ENSG00000143631 ENST00000368799 Transcript missense_variant 8843/12747 8807/12186 2936/4061 D/G gAc/gGc rs80221306 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(0.38) benign(0.003) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 C:0.0734 C:0.0964 MODERATE 1 SNV 1 1 . GTC . . +FLG 0 . GRCh38 chr1 152306338 152306338 + Missense_Mutation SNP C C T rs2184952 TUMOR NORMAL C C c.8548G>A p.Gly2850Ser p.G2850S ENST00000368799 3/3 4985 4176 804 411 302 109 FLG,missense_variant,p.Gly2850Ser,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; T ENSG00000143631 ENST00000368799 Transcript missense_variant 8584/12747 8548/12186 2850/4061 G/S Ggc/Agc rs2184952 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(0.85) possibly_damaging(0.579) 3/3 Low_complexity_(Seg):seg,Pfam_domain:PF03516 T:0.0768 T:0.0521 MODERATE 1 SNV 1 1 . CCG . . +FLG 0 . GRCh38 chr1 152306380 152306380 + Missense_Mutation SNP T T G rs11582087 TUMOR NORMAL T T c.8506A>C p.Ser2836Arg p.S2836R ENST00000368799 3/3 5056 4057 998 487 431 56 FLG,missense_variant,p.Ser2836Arg,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; G ENSG00000143631 ENST00000368799 Transcript missense_variant 8542/12747 8506/12186 2836/4061 S/R Agt/Cgt rs11582087 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 deleterious(0.04) possibly_damaging(0.766) 3/3 Low_complexity_(Seg):seg G:0.0314 G:0.1107 MODERATE 1 SNV 1 1 . CTT . . +FLG 0 . GRCh38 chr1 152307694 152307694 + Missense_Mutation SNP C C G rs71625201 TUMOR NORMAL C C c.7192G>C p.Glu2398Gln p.E2398Q ENST00000368799 3/3 2374 1465 909 537 412 125 FLG,missense_variant,p.Glu2398Gln,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; G ENSG00000143631 ENST00000368799 Transcript missense_variant 7228/12747 7192/12186 2398/4061 E/Q Gag/Cag rs71625201 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(0.37) benign(0.403) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 G:0.3305 G:0.1135 G:0.4092 G:0.5665 G:0.1690 G:0.4908 G:0.1326 G:0.1657 MODERATE 1 SNV 1 1 . TCT . . +FLG 0 . GRCh38 chr1 152307789 152307789 + Missense_Mutation SNP C C G rs71625202 TUMOR NORMAL C C c.7097G>C p.Ser2366Thr p.S2366T ENST00000368799 3/3 2091 1480 609 574 431 142 FLG,missense_variant,p.Ser2366Thr,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; G ENSG00000143631 ENST00000368799 Transcript missense_variant 7133/12747 7097/12186 2366/4061 S/T aGt/aCt rs71625202 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 deleterious(0.01) benign(0.418) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571,Pfam_domain:PF03516 G:0.0348 G:0.1329 MODERATE 1 SNV 1 1 . ACT . . +FLG 0 . GRCh38 chr1 152307871 152307871 + Missense_Mutation SNP C C T rs139476473 TUMOR NORMAL C C c.7015G>A p.Asp2339Asn p.D2339N ENST00000368799 3/3 1915 1498 415 445 416 27 FLG,missense_variant,p.Asp2339Asn,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; T ENSG00000143631 ENST00000368799 Transcript missense_variant 7051/12747 7015/12186 2339/4061 D/N Gac/Aac rs139476473 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 deleterious(0.02) possibly_damaging(0.822) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571,Pfam_domain:PF03516 T:0.0185 T:0.0328 MODERATE 1 SNV 1 1 . TCT . . +FLG 0 . GRCh38 chr1 152307896 152307896 + Silent SNP G G A rs6664985 TUMOR NORMAL G G c.6990C>T p.= p.H2330= ENST00000368799 3/3 1801 1180 620 367 367 0 FLG,synonymous_variant,p.=,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; A ENSG00000143631 ENST00000368799 Transcript synonymous_variant 7026/12747 6990/12186 2330/4061 H caC/caT rs6664985 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571,Pfam_domain:PF03516 A:0.4535 A:0.4902 A:0.4524 A:0.6567 A:0.1730 A:0.4836 LOW 1 SNV 1 1 . CGT . . +FLG 0 . GRCh38 chr1 152307995 152307995 + Missense_Mutation SNP C C G rs78179835 TUMOR NORMAL C C c.6891G>C p.Glu2297Asp p.E2297D ENST00000368799 3/3 3096 2226 869 429 340 89 FLG,missense_variant,p.Glu2297Asp,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; G ENSG00000143631 ENST00000368799 Transcript missense_variant 6927/12747 6891/12186 2297/4061 E/D gaG/gaC rs78179835 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(1) benign(0.217) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 G:0.0915 G:0.1071 MODERATE 1 SNV 1 1 . TCT . . +FLG 0 . GRCh38 chr1 152308083 152308083 + Missense_Mutation SNP T T C rs150122015 TUMOR NORMAL T T c.6803A>G p.His2268Arg p.H2268R ENST00000368799 3/3 2559 2246 311 422 374 46 FLG,missense_variant,p.His2268Arg,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; C ENSG00000143631 ENST00000368799 Transcript missense_variant 6839/12747 6803/12186 2268/4061 H/R cAt/cGt rs150122015 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(0.28) benign(0.227) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 MODERATE 1 SNV 1 1 . ATG . . +FLG 0 . GRCh38 chr1 152308424 152308424 + Missense_Mutation SNP T T G rs74129452 TUMOR NORMAL T T c.6462A>C p.Gln2154His p.Q2154H ENST00000368799 3/3 3124 2189 933 422 345 77 FLG,missense_variant,p.Gln2154His,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,; G ENSG00000143631 ENST00000368799 Transcript missense_variant 6498/12747 6462/12186 2154/4061 Q/H caA/caC rs74129452 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(1) benign(0.216) 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571 G:0.3438 G:0.3843 G:0.3487 G:0.4444 G:0.1541 G:0.3773 MODERATE 1 SNV 1 1 . CTT . . +FLG 0 . GRCh38 chr1 152308814 152308814 + Silent SNP A A G rs80353812 TUMOR NORMAL A A c.6072T>C p.= p.H2024= ENST00000368799 3/3 3521 2338 1182 424 296 127 FLG,synonymous_variant,p.=,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,;FLG-AS1,upstream_gene_variant,,ENST00000392688,; G ENSG00000143631 ENST00000368799 Transcript synonymous_variant 6108/12747 6072/12186 2024/4061 H caT/caC rs80353812 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 3/3 hmmpanther:PTHR22571:SF21,hmmpanther:PTHR22571,Pfam_domain:PF03516 G:0.3281 G:0.3850 G:0.3343 G:0.4355 G:0.1292 G:0.3405 G:0.2624 G:0.1262 LOW 1 SNV 1 1 . CAT . . +FLG 0 . GRCh38 chr1 152310807 152310807 + Missense_Mutation SNP C C T rs11586631 TUMOR NORMAL C C c.4079G>A p.Arg1360His p.R1360H ENST00000368799 3/3 2461 1314 1140 394 322 70 FLG,missense_variant,p.Arg1360His,ENST00000368799,NM_002016.1;FLG-AS1,intron_variant,,ENST00000420707,;FLG-AS1,intron_variant,,ENST00000593011,;FLG-AS1,upstream_gene_variant,,ENST00000392688,; T ENSG00000143631 ENST00000368799 Transcript missense_variant 4115/12747 4079/12186 1360/4061 R/H cGc/cAc rs11586631 1 -1 FLG HGNC HGNC:3748 protein_coding YES CCDS30860.1 ENSP00000357789 P20930 UPI0000470CB3 NM_002016.1 tolerated(0.83) possibly_damaging(0.812) 3/3 Pfam_domain:PF03516 T:0.2877 T:0.0144 T:0.3890 T:0.5823 T:0.1421 T:0.4315 T:0.037 T:0.1439 MODERATE 1 SNV 1 1 . GCG . . +SPTA1 0 . GRCh38 chr1 158607804 158607804 + 3'Flank SNP C C A novel TUMOR NORMAL C C ENST00000368147 10 8 2 8 8 0 SPTA1,downstream_gene_variant,,ENST00000368147,NM_003126.2;OR10Z1,downstream_gene_variant,,ENST00000361284,NM_001004478.1;SPTA1,downstream_gene_variant,,ENST00000485680,;SPTA1,downstream_gene_variant,,ENST00000481212,;SPTA1,downstream_gene_variant,,ENST00000498708,; A ENSG00000163554 ENST00000368147 Transcript downstream_gene_variant -/7999 -/7260 -/2419 1 2902 -1 SPTA1 HGNC HGNC:11272 protein_coding YES CCDS41423.1 ENSP00000357129 P02549 UPI0000458906 NM_003126.2 MODIFIER 1 SNV 1 1 . ACT . . +SDHC 0 . GRCh38 chr1 161363118 161363118 + 3'UTR SNP C C T rs138085670 TUMOR NORMAL C C c.*685C>T ENST00000367975 6/6 326 189 137 171 125 46 SDHC,3_prime_UTR_variant,,ENST00000367975,NM_003001.3;SDHC,3_prime_UTR_variant,,ENST00000342751,NM_001035511.1;CFAP126,downstream_gene_variant,,ENST00000367974,NM_001013625.3;SDHC,downstream_gene_variant,,ENST00000432287,NM_001035512.1;SDHC,downstream_gene_variant,,ENST00000392169,NM_001035513.1;SDHC,downstream_gene_variant,,ENST00000513009,NM_001278172.1;SDHC,upstream_gene_variant,,ENST00000437833,;SDHC,downstream_gene_variant,,ENST00000470743,;SDHC,downstream_gene_variant,,ENST00000504963,; T ENSG00000143252 ENST00000367975 Transcript 3_prime_UTR_variant 1344/13566 -/510 -/169 rs138085670 1 1 SDHC HGNC HGNC:10682 protein_coding YES CCDS1230.1 ENSP00000356953 Q99643 A0A0S2Z4B7 UPI0000001636 NM_003001.3 6/6 T:0.0204 T:0.0023 T:0.0014 T:0.0446 T:0.0000 T:0.0542 MODIFIER 1 SNV 1 1 . TCT . . +RP11-122G18.10 0 . GRCh38 chr1 161444478 161444478 + Intron SNP T T A rs3123349 TUMOR NORMAL T T n.55-16459A>T ENST00000637155 85 54 31 90 60 30 RP11-122G18.10,intron_variant,,ENST00000637155,;RP11-122G18.11,upstream_gene_variant,,ENST00000636824,; A ENSG00000283360 ENST00000637155 Transcript intron_variant,non_coding_transcript_variant -/863 rs3123349 1 -1 RP11-122G18.10 Clone_based_vega_gene lincRNA YES 1/4 MODIFIER 1 SNV . CTG . . +RP11-122G18.10 0 . GRCh38 chr1 161444486 161444486 + Intron SNP A A G rs540204255 TUMOR NORMAL A A n.55-16467T>C ENST00000637155 73 42 31 86 56 30 RP11-122G18.10,intron_variant,,ENST00000637155,;RP11-122G18.11,upstream_gene_variant,,ENST00000636824,; G ENSG00000283360 ENST00000637155 Transcript intron_variant,non_coding_transcript_variant -/863 rs540204255 1 -1 RP11-122G18.10 Clone_based_vega_gene lincRNA YES 1/4 G:0.0004 G:0.0000 G:0.0000 G:0.0000 G:0.0010 G:0.0010 MODIFIER 1 SNV . CAC . . +RP11-122G18.10 0 . GRCh38 chr1 161444488 161444488 + Intron SNP A A G rs12076971 TUMOR NORMAL A A n.55-16469T>C ENST00000637155 74 43 31 88 57 31 RP11-122G18.10,intron_variant,,ENST00000637155,;RP11-122G18.11,upstream_gene_variant,,ENST00000636824,; G ENSG00000283360 ENST00000637155 Transcript intron_variant,non_coding_transcript_variant -/863 rs12076971 1 -1 RP11-122G18.10 Clone_based_vega_gene lincRNA YES 1/4 G:0.0004 G:0.0000 G:0.0000 G:0.0000 G:0.0010 G:0.0010 MODIFIER 1 SNV . CAC . . +RP11-122G18.10 0 . GRCh38 chr1 161444514 161444514 + Intron SNP T T A rs765889351 TUMOR NORMAL T T n.55-16495A>T ENST00000637155 70 35 35 99 70 29 RP11-122G18.10,intron_variant,,ENST00000637155,;RP11-122G18.11,upstream_gene_variant,,ENST00000636824,; A ENSG00000283360 ENST00000637155 Transcript intron_variant,non_coding_transcript_variant -/863 rs765889351 1 -1 RP11-122G18.10 Clone_based_vega_gene lincRNA YES 1/4 MODIFIER 1 SNV . CTC . . +RP11-122G18.10 0 . GRCh38 chr1 161444527 161444527 + Intron SNP C C G novel TUMOR NORMAL C C n.55-16508G>C ENST00000637155 74 53 21 125 103 22 RP11-122G18.10,intron_variant,,ENST00000637155,;RP11-122G18.11,upstream_gene_variant,,ENST00000636824,; G ENSG00000283360 ENST00000637155 Transcript intron_variant,non_coding_transcript_variant -/863 1 -1 RP11-122G18.10 Clone_based_vega_gene lincRNA YES 1/4 MODIFIER 1 SNV . TCT . . +RALGPS2 0 . GRCh38 chr1 178725267 178725267 + 5'UTR SNP G G A novel TUMOR NORMAL G G c.-236G>A ENST00000367635 1/20 10 7 3 10 10 0 RALGPS2,5_prime_UTR_variant,,ENST00000367634,NM_001286247.1;RALGPS2,5_prime_UTR_variant,,ENST00000367635,NM_152663.4;RALGPS2,upstream_gene_variant,,ENST00000324778,;RP11-428K3.1,non_coding_transcript_exon_variant,,ENST00000608517,;RALGPS2,non_coding_transcript_exon_variant,,ENST00000495034,;,regulatory_region_variant,,ENSR00000016375,; A ENSG00000116191 ENST00000367635 Transcript 5_prime_UTR_variant 103/5834 -/1752 -/583 1 1 RALGPS2 HGNC HGNC:30279 protein_coding YES CCDS1325.1 ENSP00000356607 Q86X27 UPI000000DBE1 NM_152663.4 1/20 MODIFIER 1 SNV 1 . CGG . . +ADORA1 0 . GRCh38 chr1 203166324 203166324 + 3'UTR SNP C C T rs16851030 TUMOR NORMAL C C c.*424C>T ENST00000367236 3/3 8 6 2 6 6 0 ADORA1,3_prime_UTR_variant,,ENST00000367236,NM_001048230.1;ADORA1,3_prime_UTR_variant,,ENST00000337894,NM_000674.2;ADORA1,3_prime_UTR_variant,,ENST00000309502,;ADORA1,3_prime_UTR_variant,,ENST00000367235,;MYBPH,downstream_gene_variant,,ENST00000255416,NM_004997.2;MYBPH,downstream_gene_variant,,ENST00000621380,;ADORA1,downstream_gene_variant,,ENST00000618295,;ADORA1,non_coding_transcript_exon_variant,,ENST00000472535,;ADORA1,downstream_gene_variant,,ENST00000467253,;ADORA1,downstream_gene_variant,,ENST00000464019,;,regulatory_region_variant,,ENSR00000018401,; T ENSG00000163485 ENST00000367236 Transcript 3_prime_UTR_variant 2326/3407 -/981 -/326 rs16851030 1 1 ADORA1 HGNC HGNC:262 protein_coding YES CCDS1434.1 ENSP00000356205 P30542 UPI00000503E1 NM_001048230.1 3/3 T:0.1532 T:0.1142 T:0.1009 T:0.3958 T:0.0368 T:0.1125 20520601,21886579,22462821,19019667,24003382 MODIFIER 1 SNV 1 . CCC . . +CICP13 0 . GRCh38 chr1 222468242 222468242 + RNA SNP T C C rs4103697 TUMOR NORMAL T T n.149T>C ENST00000422015 1/1 84 8 76 61 56 5 CICP13,non_coding_transcript_exon_variant,,ENST00000422015,; C ENSG00000234419 ENST00000422015 Transcript non_coding_transcript_exon_variant,non_coding_transcript_variant 149/2789 rs4103697 1 1 CICP13 HGNC HGNC:37907 processed_pseudogene YES 1/1 T:0.0385 C:0.9418 C:0.9539 C:0.9663 C:0.9742 C:0.9755 MODIFIER 1 SNV . ATG . . +GTF2IP20 0 . GRCh38 chr1 223947556 223947556 + 3'Flank SNP G G A rs61825417 TUMOR NORMAL G G ENST00000634905 65 45 20 91 71 20 GTF2IP20,downstream_gene_variant,,ENST00000634905,;GTF2IP20,downstream_gene_variant,,ENST00000608760,;CICP5,upstream_gene_variant,,ENST00000424045,; A ENSG00000272645 ENST00000634905 Transcript downstream_gene_variant -/2966 rs61825417 1 3923 -1 GTF2IP20 HGNC HGNC:51732 processed_transcript YES A:0.2630 A:0.3154 A:0.2637 A:0.0794 A:0.4125 A:0.2270 MODIFIER 1 SNV 5 . CGA . . +GTF2IP20 0 . GRCh38 chr1 223947588 223947588 + 3'Flank SNP G T T rs573572074 TUMOR NORMAL G G ENST00000634905 87 63 24 117 103 14 GTF2IP20,downstream_gene_variant,,ENST00000634905,;GTF2IP20,downstream_gene_variant,,ENST00000608760,;CICP5,upstream_gene_variant,,ENST00000424045,; T ENSG00000272645 ENST00000634905 Transcript downstream_gene_variant -/2966 rs573572074 1 3891 -1 GTF2IP20 HGNC HGNC:51732 processed_transcript YES T:0.0974 T:0.0787 T:0.1023 T:0.0208 T:0.1958 T:0.0971 MODIFIER 1 SNV 5 . TGC . . +GTF2IP20 0 . GRCh38 chr1 223947978 223947978 + 3'Flank SNP G G T rs61825418 TUMOR NORMAL G G ENST00000634905 430 225 205 254 191 63 GTF2IP20,downstream_gene_variant,,ENST00000634905,;GTF2IP20,downstream_gene_variant,,ENST00000608760,;CICP5,non_coding_transcript_exon_variant,,ENST00000424045,; T ENSG00000272645 ENST00000634905 Transcript downstream_gene_variant -/2966 rs61825418 1 3501 -1 GTF2IP20 HGNC HGNC:51732 processed_transcript YES T:0.2616 T:0.2912 T:0.2651 T:0.1002 T:0.4125 T:0.2301 MODIFIER 1 SNV 5 . AGT . . +GTF2IP20 0 . GRCh38 chr1 223948062 223948062 + 3'Flank SNP G G A rs61825419 TUMOR NORMAL G G ENST00000634905 1124 586 537 304 198 104 GTF2IP20,downstream_gene_variant,,ENST00000634905,;GTF2IP20,downstream_gene_variant,,ENST00000608760,;CICP5,non_coding_transcript_exon_variant,,ENST00000424045,; A ENSG00000272645 ENST00000634905 Transcript downstream_gene_variant -/2966 rs61825419 1 3417 -1 GTF2IP20 HGNC HGNC:51732 processed_transcript YES A:0.3319 A:0.4024 A:0.3112 A:0.1627 A:0.4473 A:0.3067 MODIFIER 1 SNV 5 . CGC . . +GTF2IP20 0 . GRCh38 chr1 223948558 223948558 + 3'Flank SNP C C T rs78538264 TUMOR NORMAL C C ENST00000634905 154 74 80 40 35 5 GTF2IP20,downstream_gene_variant,,ENST00000634905,;GTF2IP20,downstream_gene_variant,,ENST00000608760,;CICP5,non_coding_transcript_exon_variant,,ENST00000424045,; T ENSG00000272645 ENST00000634905 Transcript downstream_gene_variant -/2966 rs78538264 1 2921 -1 GTF2IP20 HGNC HGNC:51732 processed_transcript YES MODIFIER 1 SNV 5 . ACG . . +GTF2IP20 0 . GRCh38 chr1 223949699 223949699 + 3'Flank SNP T T C rs3991993 TUMOR NORMAL T T ENST00000634905 370 206 164 189 189 0 GTF2IP20,downstream_gene_variant,,ENST00000634905,;GTF2IP20,downstream_gene_variant,,ENST00000608760,;CICP5,non_coding_transcript_exon_variant,,ENST00000424045,; C ENSG00000272645 ENST00000634905 Transcript downstream_gene_variant -/2966 rs3991993 1 1780 -1 GTF2IP20 HGNC HGNC:51732 processed_transcript YES MODIFIER 1 SNV 5 . GTG . . +GTF2IP20 0 . GRCh38 chr1 223950486 223950486 + 3'Flank SNP C C T rs3991971 TUMOR NORMAL C C ENST00000634905 115 25 90 79 18 61 GTF2IP20,downstream_gene_variant,,ENST00000634905,;GTF2IP20,downstream_gene_variant,,ENST00000608760,;CICP5,downstream_gene_variant,,ENST00000424045,; T ENSG00000272645 ENST00000634905 Transcript downstream_gene_variant -/2966 rs3991971 1 993 -1 GTF2IP20 HGNC HGNC:51732 processed_transcript YES MODIFIER 1 SNV 5 . ACG . . +CICP26 0 . GRCh38 chr1 227976709 227976709 + RNA SNP T C C rs61825104 TUMOR NORMAL T T n.1485A>G ENST00000425003 1/3 91 4 86 10 10 0 CICP26,non_coding_transcript_exon_variant,,ENST00000425003,;SEPT14P17,upstream_gene_variant,,ENST00000617978,; C ENSG00000233003 ENST00000425003 Transcript non_coding_transcript_exon_variant,non_coding_transcript_variant 1485/2448 rs61825104 1 -1 CICP26 HGNC HGNC:48834 unprocessed_pseudogene YES 1/3 MODIFIER 1 SNV . CTA . . +CICP26 0 . GRCh38 chr1 227977309 227977309 + RNA SNP A A G rs9662308 TUMOR NORMAL A A n.885T>C ENST00000425003 1/3 461 236 225 170 131 38 CICP26,non_coding_transcript_exon_variant,,ENST00000425003,;SEPT14P17,upstream_gene_variant,,ENST00000617978,; G ENSG00000233003 ENST00000425003 Transcript non_coding_transcript_exon_variant,non_coding_transcript_variant 885/2448 rs9662308 1 -1 CICP26 HGNC HGNC:48834 unprocessed_pseudogene YES 1/3 G:0.2290 G:0.3714 G:0.1787 G:0.2708 G:0.0517 G:0.2117 MODIFIER 1 SNV . CAT . . +RP11-443B7.3 0 . GRCh38 chr1 234978406 234978406 + 3'Flank SNP C G G novel TUMOR NORMAL C C ENST00000549744 1 0 1 2 2 0 RP11-443B7.3,downstream_gene_variant,,ENST00000549744,; G ENSG00000258082 ENST00000549744 Transcript downstream_gene_variant -/846 1 1241 -1 RP11-443B7.3 Clone_based_vega_gene lincRNA YES MODIFIER 1 SNV 3 . CCA . . +RP11-443B7.3 0 . GRCh38 chr1 234978426 234978426 + 3'Flank SNP C G G novel TUMOR NORMAL C C ENST00000549744 1 0 1 2 2 0 RP11-443B7.3,downstream_gene_variant,,ENST00000549744,; G ENSG00000258082 ENST00000549744 Transcript downstream_gene_variant -/846 1 1221 -1 RP11-443B7.3 Clone_based_vega_gene lincRNA YES MODIFIER 1 SNV 3 . GCA . . +RP11-443B7.3 0 . GRCh38 chr1 234978429 234978429 + 3'Flank SNP T C C novel TUMOR NORMAL T T ENST00000549744 1 0 1 2 2 0 RP11-443B7.3,downstream_gene_variant,,ENST00000549744,; C ENSG00000258082 ENST00000549744 Transcript downstream_gene_variant -/846 1 1218 -1 RP11-443B7.3 Clone_based_vega_gene lincRNA YES MODIFIER 1 SNV 3 . CTC . . +FMN2 0 . GRCh38 chr1 240207797 240207797 + Silent SNP G G A rs71646827 TUMOR NORMAL G G c.2985G>A p.= p.A995= ENST00000319653 5/18 245 180 64 161 133 26 FMN2,synonymous_variant,p.=,ENST00000319653,NM_020066.4,NM_001305424.1;FMN2,downstream_gene_variant,,ENST00000447095,; A ENSG00000155816 ENST00000319653 Transcript synonymous_variant 3215/6434 2985/5169 995/1722 A gcG/gcA rs71646827 1 1 FMN2 HGNC HGNC:14074 protein_coding YES CCDS31069.2 ENSP00000318884 Q9NZ56 UPI00015FA087 NM_020066.4,NM_001305424.1 5/18 Pfam_domain:PF06346,Prints_domain:PR01217,Low_complexity_(Seg):seg,SMART_domains:SM00498 LOW 1 SNV 5 1 . CGG . . +FMN2 0 . GRCh38 chr1 240207800 240207800 + Silent SNP C C A rs71646887 TUMOR NORMAL C C c.2988C>A p.= p.G996= ENST00000319653 5/18 225 167 53 152 126 22 FMN2,synonymous_variant,p.=,ENST00000319653,NM_020066.4,NM_001305424.1;FMN2,downstream_gene_variant,,ENST00000447095,; A ENSG00000155816 ENST00000319653 Transcript synonymous_variant 3218/6434 2988/5169 996/1722 G ggC/ggA rs71646887 1 1 FMN2 HGNC HGNC:14074 protein_coding YES CCDS31069.2 ENSP00000318884 Q9NZ56 UPI00015FA087 NM_020066.4,NM_001305424.1 5/18 Pfam_domain:PF06346,Prints_domain:PR01217,Low_complexity_(Seg):seg,SMART_domains:SM00498 LOW 1 SNV 5 1 . GCA . . +FMN2 0 . GRCh38 chr1 240207806 240207806 + Silent SNP C C T rs11586155 TUMOR NORMAL C C c.2994C>T p.= p.P998= ENST00000319653 5/18 216 164 51 144 122 21 FMN2,synonymous_variant,p.=,ENST00000319653,NM_020066.4,NM_001305424.1;FMN2,downstream_gene_variant,,ENST00000447095,; T ENSG00000155816 ENST00000319653 Transcript synonymous_variant 3224/6434 2994/5169 998/1722 P ccC/ccT rs11586155 1 1 FMN2 HGNC HGNC:14074 protein_coding YES CCDS31069.2 ENSP00000318884 Q9NZ56 UPI00015FA087 NM_020066.4,NM_001305424.1 5/18 Pfam_domain:PF06346,Prints_domain:PR01217,Low_complexity_(Seg):seg,SMART_domains:SM00498 LOW 1 SNV 5 1 . CCC . . +FMN2 0 . GRCh38 chr1 240207812 240207812 + Silent SNP G T A rs71646889 TUMOR NORMAL G T c.3000G>A p.= p.P1000= ENST00000319653 5/18 185 16 31 124 37 12 FMN2,synonymous_variant,p.=,ENST00000319653,NM_020066.4,NM_001305424.1;FMN2,downstream_gene_variant,,ENST00000447095,; A ENSG00000155816 ENST00000319653 Transcript synonymous_variant 3230/6434 3000/5169 1000/1722 P ccG/ccA rs71646889 1 1 FMN2 HGNC HGNC:14074 protein_coding YES CCDS31069.2 ENSP00000318884 Q9NZ56 UPI00015FA087 NM_020066.4,NM_001305424.1 5/18 Pfam_domain:PF06346,Prints_domain:PR01217,Low_complexity_(Seg):seg,SMART_domains:SM00498 T:0.2678 T:0.3570 T:0.1873 T:0.2927 T:0.2008 T:0.2474 LOW 1 SNV 5 1 . CGC . . +FMN2 0 . GRCh38 chr1 240208175 240208175 + Silent SNP T T G rs200682272 TUMOR NORMAL T T c.3363T>G p.= p.P1121= ENST00000319653 5/18 146 111 30 125 101 17 FMN2,synonymous_variant,p.=,ENST00000319653,NM_020066.4,NM_001305424.1;FMN2,downstream_gene_variant,,ENST00000447095,; G ENSG00000155816 ENST00000319653 Transcript synonymous_variant 3593/6434 3363/5169 1121/1722 P ccT/ccG rs200682272 1 1 FMN2 HGNC HGNC:14074 protein_coding YES CCDS31069.2 ENSP00000318884 Q9NZ56 UPI00015FA087 NM_020066.4,NM_001305424.1 5/18 Pfam_domain:PF06346,hmmpanther:PTHR13037,Low_complexity_(Seg):seg,SMART_domains:SM00498 LOW 1 SNV 5 1 . CTC . . +FMN2 0 . GRCh38 chr1 240472659 240472659 + Intron SNP G G T rs1953603 TUMOR NORMAL G G c.5142+206G>T ENST00000319653 7 5 2 6 6 0 FMN2,intron_variant,,ENST00000319653,NM_020066.4,NM_001305424.1;FMN2,intron_variant,,ENST00000543681,;FMN2,intron_variant,,ENST00000545751,;FMN2,intron_variant,,ENST00000496950,; T ENSG00000155816 ENST00000319653 Transcript intron_variant -/6434 -/5169 -/1722 rs1953603 1 1 FMN2 HGNC HGNC:14074 protein_coding YES CCDS31069.2 ENSP00000318884 Q9NZ56 UPI00015FA087 NM_020066.4,NM_001305424.1 17/17 T:0.2965 T:0.0923 T:0.4092 T:0.4702 T:0.3280 T:0.2812 MODIFIER 1 SNV 5 1 . AGA . . +Unknown 0 . GRCh38 chr1 242810191 242810191 + IGR SNP T T G rs2780803 TUMOR NORMAL T T 171 99 72 71 56 15 G intergenic_variant rs2780803 1 MODIFIER 1 SNV . TTT . . +Unknown 0 . GRCh38 chr1 242810198 242810198 + IGR SNP T T G rs865863353 TUMOR NORMAL T T 177 95 82 79 58 21 G intergenic_variant rs865863353 1 MODIFIER 1 SNV . TTT . . +LINC01347 0 . GRCh38 chr1 243052907 243052907 + 3'Flank SNP C C T rs375705608 TUMOR NORMAL C C ENST00000627498 17 11 6 7 7 0 LINC01347,upstream_gene_variant,,ENST00000437691,;LINC01347,downstream_gene_variant,,ENST00000627498,;RP11-261C10.8,downstream_gene_variant,,ENST00000611420,;RP11-261C10.7,downstream_gene_variant,,ENST00000517560,; T ENSG00000214837 ENST00000627498 Transcript downstream_gene_variant -/3841 rs375705608 1 3400 -1 LINC01347 HGNC HGNC:50566 processed_transcript YES MODIFIER 1 SNV 1 . CCG . . +Unknown 0 . GRCh38 chr1 244891249 244891249 + IGR SNP C C T rs528047174 TUMOR NORMAL C C 7 5 2 6 6 0 T intergenic_variant rs528047174 1 T:0.3173 T:0.3321 T:0.3084 T:0.3284 T:0.3241 T:0.2853 MODIFIER 1 SNV . TCT . . +LINC01250 0 . GRCh38 chr2 3035112 3035112 + Intron SNP T T C rs6749589 TUMOR NORMAL T T n.439-68437N>G ENST00000457478 559 401 157 263 202 59 LINC01250,intron_variant,,ENST00000457478,; C ENSG00000234423 ENST00000457478 Transcript intron_variant,non_coding_transcript_variant -/2344 rs6749589 1 -1 LINC01250 HGNC HGNC:49844 lincRNA YES 3/6 MODIFIER 1 SNV 2 . GTG . . +TSSC1 0 . GRCh38 chr2 3329166 3329166 + Intron SNP G G C rs71279311 TUMOR NORMAL G G c.259+8851N>G ENST00000382125 64 47 13 66 54 12 TSSC1,intron_variant,,ENST00000398659,;TSSC1,intron_variant,,ENST00000443925,;TSSC1,intron_variant,,ENST00000382125,NM_003310.2;TSSC1,intron_variant,,ENST00000441271,;TSSC1,intron_variant,,ENST00000444776,;TSSC1,intron_variant,,ENST00000463662,;TSSC1,intron_variant,,ENST00000455162,;TSSC1,intron_variant,,ENST00000406835,;TSSC1,intron_variant,,ENST00000435721,; C ENSG00000032389 ENST00000382125 Transcript intron_variant -/1766 -/1164 -/387 rs71279311 1 -1 TSSC1 HGNC HGNC:12383 protein_coding YES CCDS1651.1 ENSP00000371559 Q53HC9 UPI000006DFE1 NM_003310.2 3/8 MODIFIER 1 SNV 1 . AGG . . +TSSC1 0 . GRCh38 chr2 3329223 3329223 + Intron SNP G G C rs74188699 TUMOR NORMAL G G c.259+8794N>G ENST00000382125 36 22 7 34 27 7 TSSC1,intron_variant,,ENST00000398659,;TSSC1,intron_variant,,ENST00000443925,;TSSC1,intron_variant,,ENST00000382125,NM_003310.2;TSSC1,intron_variant,,ENST00000441271,;TSSC1,intron_variant,,ENST00000444776,;TSSC1,intron_variant,,ENST00000463662,;TSSC1,intron_variant,,ENST00000455162,;TSSC1,intron_variant,,ENST00000406835,;TSSC1,intron_variant,,ENST00000435721,; C ENSG00000032389 ENST00000382125 Transcript intron_variant -/1766 -/1164 -/387 rs74188699 1 -1 TSSC1 HGNC HGNC:12383 protein_coding YES CCDS1651.1 ENSP00000371559 Q53HC9 UPI000006DFE1 NM_003310.2 3/8 MODIFIER 1 SNV 1 . AGG . . +TSSC1 0 . GRCh38 chr2 3329230 3329230 + Intron SNP G G A rs13004833 TUMOR NORMAL G G c.259+8787N>T ENST00000382125 31 18 12 30 21 7 TSSC1,intron_variant,,ENST00000398659,;TSSC1,intron_variant,,ENST00000443925,;TSSC1,intron_variant,,ENST00000382125,NM_003310.2;TSSC1,intron_variant,,ENST00000441271,;TSSC1,intron_variant,,ENST00000444776,;TSSC1,intron_variant,,ENST00000463662,;TSSC1,intron_variant,,ENST00000455162,;TSSC1,intron_variant,,ENST00000406835,;TSSC1,intron_variant,,ENST00000435721,; A ENSG00000032389 ENST00000382125 Transcript intron_variant -/1766 -/1164 -/387 rs13004833 1 -1 TSSC1 HGNC HGNC:12383 protein_coding YES CCDS1651.1 ENSP00000371559 Q53HC9 UPI000006DFE1 NM_003310.2 3/8 MODIFIER 1 SNV 1 . AGT . . +TSSC1 0 . GRCh38 chr2 3329243 3329243 + Intron SNP A A G rs113258696 TUMOR NORMAL A A c.259+8774N>C ENST00000382125 31 20 11 29 27 2 TSSC1,intron_variant,,ENST00000398659,;TSSC1,intron_variant,,ENST00000443925,;TSSC1,intron_variant,,ENST00000382125,NM_003310.2;TSSC1,intron_variant,,ENST00000441271,;TSSC1,intron_variant,,ENST00000444776,;TSSC1,intron_variant,,ENST00000463662,;TSSC1,intron_variant,,ENST00000455162,;TSSC1,intron_variant,,ENST00000406835,;TSSC1,intron_variant,,ENST00000435721,; G ENSG00000032389 ENST00000382125 Transcript intron_variant -/1766 -/1164 -/387 rs113258696 1 -1 TSSC1 HGNC HGNC:12383 protein_coding YES CCDS1651.1 ENSP00000371559 Q53HC9 UPI000006DFE1 NM_003310.2 3/8 G:0.3021 G:0.3351 G:0.1643 G:0.4514 G:0.2167 G:0.2894 MODIFIER 1 SNV 1 . CAC . . +DCDC2C 0 . GRCh38 chr2 3775131 3775131 + Intron SNP G G C rs371674744 TUMOR NORMAL G G c.955-3685N>C ENST00000399143 366 254 112 172 129 43 DCDC2C,intron_variant,,ENST00000399143,NM_001287444.1;DCDC2C,intron_variant,,ENST00000423741,;DCDC2C,intron_variant,,ENST00000537457,; C ENSG00000214866 ENST00000399143 Transcript intron_variant -/1480 -/1095 -/364 rs371674744 1 1 DCDC2C HGNC HGNC:32696 protein_coding YES CCDS74481.1 ENSP00000382097 A8MYV0 UPI0002742D44 NM_001287444.1 8/10 MODIFIER 1 SNV 5 . TGT . . +CYS1 0 . GRCh38 chr2 10080680 10080680 + 5'Flank SNP C C T novel TUMOR NORMAL C C ENST00000381813 5 3 2 6 6 0 CYS1,upstream_gene_variant,,ENST00000381813,NM_001037160.2;AC104794.4,upstream_gene_variant,,ENST00000425235,;CYS1,intron_variant,,ENST00000477304,; T ENSG00000205795 ENST00000381813 Transcript upstream_gene_variant -/2738 -/477 -/158 1 269 -1 CYS1 HGNC HGNC:18525 protein_coding YES CCDS33145.1 ENSP00000371234 Q717R9 UPI00001D6281 NM_001037160.2 MODIFIER 1 SNV 1 . CCC . . +WDR35 0 . GRCh38 chr2 19985338 19985338 + Intron SNP C C T novel TUMOR NORMAL C C c.143-2804N>A ENST00000345530 32 20 11 41 23 16 WDR35,intron_variant,,ENST00000345530,NM_001006657.1;WDR35,intron_variant,,ENST00000281405,NM_020779.3;AC079145.4,upstream_gene_variant,,ENST00000416575,;WDR35,intron_variant,,ENST00000414212,; T ENSG00000118965 ENST00000345530 Transcript intron_variant -/6960 -/3546 -/1181 1 -1 WDR35 HGNC HGNC:29250 protein_coding YES CCDS33152.1 ENSP00000314444 Q9P2L0 UPI000034E5D3 NM_001006657.1 2/27 MODIFIER 1 SNV 1 1 . CCA . . +MAP4K3 0 . GRCh38 chr2 39404606 39404606 + Intron SNP C C T rs374072817 TUMOR NORMAL C C c.97-26483N>A ENST00000263881 10 6 4 6 6 0 MAP4K3,intron_variant,,ENST00000263881,NM_003618.3;MAP4K3,intron_variant,,ENST00000341681,NM_001270425.1;MAP4K3,intron_variant,,ENST00000484274,;MAP4K3,intron_variant,,ENST00000429397,;MAP4K3,intron_variant,,ENST00000437968,; T ENSG00000011566 ENST00000263881 Transcript intron_variant -/4362 -/2685 -/894 rs374072817 1 -1 MAP4K3 HGNC HGNC:6865 protein_coding YES CCDS1803.1 ENSP00000263881 Q8IVH8 UPI00000747E6 NM_003618.3 1/33 MODIFIER 1 SNV 1 . TCT . . +MAP4K3 0 . GRCh38 chr2 39404609 39404609 + Intron SNP C C T rs113384156 TUMOR NORMAL C C c.97-26486N>A ENST00000263881 9 5 4 6 6 0 MAP4K3,intron_variant,,ENST00000263881,NM_003618.3;MAP4K3,intron_variant,,ENST00000341681,NM_001270425.1;MAP4K3,intron_variant,,ENST00000484274,;MAP4K3,intron_variant,,ENST00000429397,;MAP4K3,intron_variant,,ENST00000437968,; T ENSG00000011566 ENST00000263881 Transcript intron_variant -/4362 -/2685 -/894 rs113384156 1 -1 MAP4K3 HGNC HGNC:6865 protein_coding YES CCDS1803.1 ENSP00000263881 Q8IVH8 UPI00000747E6 NM_003618.3 1/33 MODIFIER 1 SNV 1 . TCT . . +CNRIP1 0 . GRCh38 chr2 68306124 68306124 + Intron SNP C C A novel TUMOR NORMAL C C c.330+11033N>T ENST00000263655 8 6 2 8 8 0 CNRIP1,intron_variant,,ENST00000263655,NM_015463.2;CNRIP1,intron_variant,,ENST00000409559,NM_001111101.1;CNRIP1,intron_variant,,ENST00000481714,; A ENSG00000119865 ENST00000263655 Transcript intron_variant -/1953 -/495 -/164 1 -1 CNRIP1 HGNC HGNC:24546 protein_coding YES CCDS1886.1 ENSP00000263655 Q96F85 UPI0000070FBE NM_015463.2 2/2 MODIFIER 1 SNV 1 . ACA . . +RGPD1 0 . GRCh38 chr2 86942193 86942193 + Intron SNP T T G rs202230148 TUMOR NORMAL T T c.49-9103N>G ENST00000559485 771 514 256 279 225 54 RGPD1,intron_variant,,ENST00000559485,NM_001024457.3;RGPD1,intron_variant,,ENST00000409776,;RGPD1,intron_variant,,ENST00000398193,;,regulatory_region_variant,,ENSR00000119786,; G ENSG00000187627 ENST00000559485 Transcript intron_variant -/6697 -/5247 -/1748 rs202230148 1 1 RGPD1 HGNC HGNC:32414 protein_coding YES CCDS46358.2 ENSP00000453170 P0DJD0 UPI00018815D1 NM_001024457.3 1/22 G:0.2778 G:0.0734 G:0.1988 G:0.4593 G:0.3161 G:0.3834 MODIFIER 1 SNV 1 . CTG . . +RGPD1 0 . GRCh38 chr2 86942348 86942348 + Intron SNP C C G rs564118497 TUMOR NORMAL C C c.49-8948N>G ENST00000559485 265 187 78 141 112 29 RGPD1,intron_variant,,ENST00000559485,NM_001024457.3;RGPD1,intron_variant,,ENST00000409776,;RGPD1,intron_variant,,ENST00000398193,;,regulatory_region_variant,,ENSR00000119786,; G ENSG00000187627 ENST00000559485 Transcript intron_variant -/6697 -/5247 -/1748 rs564118497 1 1 RGPD1 HGNC HGNC:32414 protein_coding YES CCDS46358.2 ENSP00000453170 P0DJD0 UPI00018815D1 NM_001024457.3 1/22 G:0.2756 MODIFIER 1 SNV 1 . CCG . . +RGPD1 0 . GRCh38 chr2 86942392 86942392 + Intron SNP C C G novel TUMOR NORMAL C C c.49-8904N>G ENST00000559485 41 32 9 21 16 5 RGPD1,intron_variant,,ENST00000559485,NM_001024457.3;RGPD1,intron_variant,,ENST00000409776,;RGPD1,intron_variant,,ENST00000398193,;,regulatory_region_variant,,ENSR00000119786,; G ENSG00000187627 ENST00000559485 Transcript intron_variant -/6697 -/5247 -/1748 1 1 RGPD1 HGNC HGNC:32414 protein_coding YES CCDS46358.2 ENSP00000453170 P0DJD0 UPI00018815D1 NM_001024457.3 1/22 MODIFIER 1 SNV 1 . CCG . . +RP4-614C10.3 0 . GRCh38 chr2 89625699 89625699 + Intron SNP G G A novel TUMOR NORMAL G G n.869+9N>T ENST00000636037 393 217 176 201 154 47 RP4-614C10.3,intron_variant,,ENST00000636037,; A ENSG00000283132 ENST00000636037 Transcript intron_variant,non_coding_transcript_variant -/1257 1 -1 RP4-614C10.3 Clone_based_vega_gene unprocessed_pseudogene YES 6/8 MODIFIER 1 SNV . AGC . .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/vep_annotate_results.vcf Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,102 @@ +##fileformat=VCFv4.1 +##fileDate=20180622 +##phasing=none +##reference=file:///export/galaxy/database/files/000/dataset_3.dat +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=IGT,Number=1,Type=String,Description="Genotype when called independently (only filled if called in joint prior mode)"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth"> +##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##FORMAT=<ID=BCOUNT,Number=4,Type=Integer,Description="Occurrence count for each base at this site (A,C,G,T)"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> +##FORMAT=<ID=JGQ,Number=1,Type=Integer,Description="Joint genotype quality (only filled if called in join prior mode)"> +##FORMAT=<ID=VAQ,Number=1,Type=Integer,Description="Variant allele quality"> +##FORMAT=<ID=BQ,Number=.,Type=Integer,Description="Average base quality"> +##FORMAT=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality across all reads"> +##FORMAT=<ID=AMQ,Number=.,Type=Integer,Description="Average mapping quality for each allele present in the genotype"> +##FORMAT=<ID=SS,Number=1,Type=Integer,Description="Variant status relative to non-adjacent Normal, 0=wildtype,1=germline,2=somatic,3=LOH,4=unknown"> +##FORMAT=<ID=SSC,Number=1,Type=Integer,Description="Somatic Score"> +##VEP=v86 cache=/export/tool_deps/_conda/envs/mulled-v1-c5544a4fa88c522f4b1beb0a65a70220b74d71ea68381cf90426c17c3730f692/vep_cache/homo_sapiens/86_GRCh38 db=. COSMIC=77 genebuild=2014-07 polyphen=2.2.2 assembly=GRCh38.p7 ESP=20141103 ClinVar=201607 sift=sift5.2.2 regbuild=14 gencode=GENCODE 25 dbSNP=147 HGMD-PUBLIC=20162 +##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR +chr1 133129 . G A . . CSQ=A|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP27|ENSG00000233750|Transcript|ENST00000442987|processed_pseudogene|1/1||||2105|||||||1||HGNC|HGNC:48835,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000453576|lincRNA||1/1||||||||||-1||Clone_based_vega_gene|,A|upstream_gene_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000471248|lincRNA|||||||||||3956|-1||Clone_based_vega_gene|,A|upstream_gene_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000477740|lincRNA|||||||||||3912|-1||Clone_based_vega_gene|,A|downstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|Transcript|ENST00000494149|processed_pseudogene|||||||||||2012|-1||Clone_based_vega_gene|,A|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|Transcript|ENST00000595919|processed_pseudogene|||||||||||4553|-1||Clone_based_vega_gene|,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000610542|lincRNA||1/3||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:376:187,124,29,36:65,0,311,0:161:.:0:32:5:4:0:. 0/1:0/1:907:361,249,237,60:296,1,610,0:214:.:214:29,32:24:22,25:2:218 +chr1 133160 . G A . . CSQ=A|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP27|ENSG00000233750|Transcript|ENST00000442987|processed_pseudogene|1/1||||2136|||||||1||HGNC|HGNC:48835,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000453576|lincRNA||1/1||||||||||-1||Clone_based_vega_gene|,A|upstream_gene_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000471248|lincRNA|||||||||||3987|-1||Clone_based_vega_gene|,A|upstream_gene_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000477740|lincRNA|||||||||||3943|-1||Clone_based_vega_gene|,A|downstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|Transcript|ENST00000494149|processed_pseudogene|||||||||||1981|-1||Clone_based_vega_gene|,A|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|Transcript|ENST00000595919|processed_pseudogene|||||||||||4522|-1||Clone_based_vega_gene|,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000610542|lincRNA||1/3||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:351:158,147,16,30:46,0,305,0:145:.:0:33:13:12:0:. 0/1:0/1:871:370,354,61,86:145,0,724,2:30:.:30:32,33:35:36,35:2:57 +chr1 133483 . G T . . CSQ=T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP27|ENSG00000233750|Transcript|ENST00000442987|processed_pseudogene|1/1||||2459|||||||1||HGNC|HGNC:48835,T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000453576|lincRNA|1/2||||84|||||||-1||Clone_based_vega_gene|,T|upstream_gene_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000471248|lincRNA|||||||||||4310|-1||Clone_based_vega_gene|,T|upstream_gene_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000477740|lincRNA|||||||||||4266|-1||Clone_based_vega_gene|,T|downstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|Transcript|ENST00000494149|processed_pseudogene|||||||||||1658|-1||Clone_based_vega_gene|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|Transcript|ENST00000595919|processed_pseudogene|||||||||||4199|-1||Clone_based_vega_gene|,T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|RP11-34P13.7|ENSG00000238009|Transcript|ENST00000610542|lincRNA|1/4||||241|||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:388:203,111,46,28:2,0,314,72:1:.:0:29:1:0:0:. 0/1:0/1:1101:577,245,160,119:1,0,822,278:41:.:41:29,30:11:10,14:2:71 +chr1 1250144 . T C . . CSQ=C|upstream_gene_variant|MODIFIER|FAM132A|ENSG00000184163|Transcript|ENST00000330388|protein_coding|||||||||||3422|-1||HGNC|HGNC:32308,C|downstream_gene_variant|MODIFIER|UBE2J2|ENSG00000160087|Transcript|ENST00000347370|protein_coding|||||||||||3770|-1||HGNC|HGNC:19268,C|downstream_gene_variant|MODIFIER|UBE2J2|ENSG00000160087|Transcript|ENST00000349431|protein_coding|||||||||||4326|-1||HGNC|HGNC:19268,C|downstream_gene_variant|MODIFIER|UBE2J2|ENSG00000160087|Transcript|ENST00000400929|protein_coding|||||||||||4841|-1||HGNC|HGNC:19268,C|downstream_gene_variant|MODIFIER|UBE2J2|ENSG00000160087|Transcript|ENST00000450390|nonsense_mediated_decay|||||||||||3765|-1||HGNC|HGNC:19268,C|downstream_gene_variant|MODIFIER|UBE2J2|ENSG00000160087|Transcript|ENST00000464036|nonsense_mediated_decay|||||||||||4904|-1||HGNC|HGNC:19268,C|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|RP5-902P8.12|ENSG00000260179|Transcript|ENST00000565563|lincRNA|1/1||||1191|||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:10:4,6,0,0:0,0,0,10:57:.:0:29:22:22:0:. 0/1:0/1:18:9,6,0,3:0,3,0,15:6:.:6:33,29:52:35,55:2:28 +chr1 2280653 . A G . . CSQ=G|intron_variant|MODIFIER|SKI|ENSG00000157933|Transcript|ENST00000378536|protein_coding||1/6||||||||||1||HGNC|HGNC:10896,G|intron_variant&non_coding_transcript_variant|MODIFIER|SKI|ENSG00000157933|Transcript|ENST00000478223|processed_transcript||1/2||||||||||1||HGNC|HGNC:10896,G|intron_variant&non_coding_transcript_variant|MODIFIER|SKI|ENSG00000157933|Transcript|ENST00000508416|processed_transcript||1/1||||||||||1||HGNC|HGNC:10896 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:55:24,16,2,13:40,0,15,0:55:.:0:25:15:19:0:. 0/1:0/1:70:15,19,8,28:34,0,36,0:118:.:118:25,30:28:41,15:2:28 +chr1 2280736 . A G . . CSQ=G|intron_variant|MODIFIER|SKI|ENSG00000157933|Transcript|ENST00000378536|protein_coding||1/6||||||||||1||HGNC|HGNC:10896,G|intron_variant&non_coding_transcript_variant|MODIFIER|SKI|ENSG00000157933|Transcript|ENST00000478223|processed_transcript||1/2||||||||||1||HGNC|HGNC:10896,G|intron_variant&non_coding_transcript_variant|MODIFIER|SKI|ENSG00000157933|Transcript|ENST00000508416|processed_transcript||1/1||||||||||1||HGNC|HGNC:10896 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:2,6,0,0:8,0,0,0:51:.:0:31:18:18:0:. 0/1:0/1:8:2,3,0,3:5,0,3,0:19:.:19:27,31:25:20,34:2:24 +chr1 11522650 . G A . . CSQ=A|intron_variant|MODIFIER|DISP3|ENSG00000204624|Transcript|ENST00000294484|protein_coding||10/20||||||||||1||HGNC|HGNC:29251 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:4,2,0,0:0,0,6,0:18:.:0:30:1:1:0:. 1/1:1/1:2:0,1,0,1:1,0,1,0:3:.:3:33:35:40:2:18 +chr1 57636120 . C A . . CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|DAB1|ENSG00000173406|Transcript|ENST00000485760|processed_transcript||7/20||||||||||-1||HGNC|HGNC:2661 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:34:14,12,8,0:8,26,0,0:55:.:0:21:54:60:0:. 0/1:0/1:44:16,12,16,0:16,28,0,0:1:.:1:14,21:56:51,59:2:25 +chr1 72135390 . C A . . CSQ=A|intron_variant|MODIFIER|NEGR1|ENSG00000172260|Transcript|ENST00000357731|protein_coding||1/6||||||||||-1||HGNC|HGNC:17302 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:53:36,7,0,10:10,43,0,0:53:.:0:24:27:26:0:. 0/1:0/1:90:60,4,3,23:26,64,0,0:57:.:57:23,24:51:45,54:2:26 +chr1 116600445 . G A . . CSQ=A|intron_variant|MODIFIER|IGSF3|ENSG00000143061|Transcript|ENST00000318837|protein_coding||6/10||||||||||-1||HGNC|HGNC:5950,A|intron_variant|MODIFIER|IGSF3|ENSG00000143061|Transcript|ENST00000369483|protein_coding||7/11||||||||||-1||HGNC|HGNC:5950,A|intron_variant|MODIFIER|IGSF3|ENSG00000143061|Transcript|ENST00000369486|protein_coding||6/10||||||||||-1||HGNC|HGNC:5950 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:24:0,22,0,2:2,0,22,0:55:.:0:33:32:34:0:. 0/1:0/1:30:0,20,0,10:10,0,20,0:42:.:42:32,33:48:27,58:2:28 +chr1 116600446 . A G . . CSQ=G|intron_variant|MODIFIER|IGSF3|ENSG00000143061|Transcript|ENST00000318837|protein_coding||6/10||||||||||-1||HGNC|HGNC:5950,G|intron_variant|MODIFIER|IGSF3|ENSG00000143061|Transcript|ENST00000369483|protein_coding||7/11||||||||||-1||HGNC|HGNC:5950,G|intron_variant|MODIFIER|IGSF3|ENSG00000143061|Transcript|ENST00000369486|protein_coding||6/10||||||||||-1||HGNC|HGNC:5950 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:22:0,22,0,0:22,0,0,0:93:.:0:30:34:34:0:. 0/1:0/1:29:0,20,0,9:20,0,9,0:28:.:28:30,32:48:58,25:2:55 +chr1 125167865 . T C . . CSQ=C|intergenic_variant|MODIFIER|||||||||||||||||||| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:4:3,1,0,0:0,0,0,4:14:.:0:29:0:0:0:. 1/1:1/1:1:0,0,0,1:0,1,0,0:22:.:22:34:22:22:2:16 +chr1 143192820 . G A . . CSQ=A|intergenic_variant|MODIFIER|||||||||||||||||||| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:4,2,0,0:0,0,6,0:18:.:0:30:0:0:0:. 1/1:1/1:2:0,1,1,0:1,0,1,0:26:.:26:32:20:40:2:19 +chr1 143192836 . C T . . CSQ=T|intergenic_variant|MODIFIER|||||||||||||||||||| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:4,2,0,0:0,6,0,0:18:.:0:30:0:0:0:. 1/1:1/1:2:0,1,1,0:0,1,0,1:25:.:25:31:20:40:2:18 +chr1 147517696 . C T . . CSQ=T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|LINC00624|ENSG00000278811|Transcript|ENST00000619867|antisense|1/6||||180|||||||-1||HGNC|HGNC:44254,T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|LINC00624|ENSG00000278811|Transcript|ENST00000621316|antisense|1/4||||180|||||||-1||HGNC|HGNC:44254 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:33:60:60:0:. 0/1:0/1:9:1,4,1,3:0,5,0,4:63:.:63:33,32:60:60,60:2:18 +chr1 148359371 . C A . . CSQ=A|downstream_gene_variant|MODIFIER|RNVU1-1|ENSG00000207340|Transcript|ENST00000384610|snRNA|||||||||||2999|-1||HGNC|HGNC:10133,A|upstream_gene_variant|MODIFIER|RP6-74O6.6|ENSG00000272824|Transcript|ENST00000609678|lincRNA|||||||||||685|-1||Clone_based_vega_gene|,A|downstream_gene_variant|MODIFIER|LINC01138|ENSG00000274020|Transcript|ENST00000622328|lincRNA|||||||||||3624|-1||HGNC|HGNC:49454 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:8,0,0,0:0,8,0,0:51:.:0:32:38:38:0:. 0/1:0/1:15:12,0,0,3:3,12,0,0:3:.:3:29,33:55:60,54:2:23 +chr1 149390829 . C A . . CSQ=A|5_prime_UTR_variant|MODIFIER|NBPF19|ENSG00000271383|Transcript|ENST00000621744|protein_coding|1/97||||207|||||||1||HGNC|HGNC:31999 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:33:0,12,3,18:20,12,0,1:2:.:0:27:3:8:0:. 0/1:0/1:39:1,22,1,15:16,23,0,0:20:.:20:31,25:33:12,48:2:25 +chr1 151171163 . A G . . CSQ=G|intron_variant|MODIFIER|TMOD4|ENSG00000163157|Transcript|ENST00000295314|protein_coding||7/9||||||||||-1||HGNC|HGNC:11874,G|downstream_gene_variant|MODIFIER|SCNM1|ENSG00000163156|Transcript|ENST00000368902|protein_coding|||||||||||2025|1||HGNC|HGNC:23136,G|downstream_gene_variant|MODIFIER|SCNM1|ENSG00000163156|Transcript|ENST00000368905|protein_coding|||||||||||866|1||HGNC|HGNC:23136,G|downstream_gene_variant|MODIFIER|TMOD4|ENSG00000163157|Transcript|ENST00000441701|protein_coding|||||||||||1166|-1|cds_end_NF|HGNC|HGNC:11874,G|downstream_gene_variant|MODIFIER|SCNM1|ENSG00000163156|Transcript|ENST00000459799|processed_transcript|||||||||||2030|1||HGNC|HGNC:23136,G|downstream_gene_variant|MODIFIER|SCNM1|ENSG00000163156|Transcript|ENST00000461862|processed_transcript|||||||||||3184|1||HGNC|HGNC:23136,G|intron_variant&NMD_transcript_variant|MODIFIER|TMOD4|ENSG00000163157|Transcript|ENST00000463543|nonsense_mediated_decay||7/9||||||||||-1||HGNC|HGNC:11874,G|intron_variant|MODIFIER|TMOD4|ENSG00000163157|Transcript|ENST00000466891|protein_coding||2/3||||||||||-1|cds_start_NF|HGNC|HGNC:11874,G|downstream_gene_variant|MODIFIER|SCNM1|ENSG00000163156|Transcript|ENST00000471039|processed_transcript|||||||||||4213|1||HGNC|HGNC:23136,G|downstream_gene_variant|MODIFIER|TMOD4|ENSG00000163157|Transcript|ENST00000488488|processed_transcript|||||||||||270|-1||HGNC|HGNC:11874,G|intron_variant&non_coding_transcript_variant|MODIFIER|VPS72|ENSG00000163159|Transcript|ENST00000491094|retained_intron||10/12||||||||||-1||HGNC|HGNC:11644,G|downstream_gene_variant|MODIFIER|SCNM1|ENSG00000163156|Transcript|ENST00000497147|processed_transcript|||||||||||2052|1||HGNC|HGNC:23136,G|downstream_gene_variant|MODIFIER|TMOD4|ENSG00000163157|Transcript|ENST00000601585|processed_transcript|||||||||||4454|-1||HGNC|HGNC:11874,G|downstream_gene_variant|MODIFIER|SCNM1|ENSG00000163156|Transcript|ENST00000602841|protein_coding|||||||||||2030|1||HGNC|HGNC:23136 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:0,2,0,0:2,0,0,0:33:.:0:33:60:60:0:. 1/1:1/1:1:0,0,0,1:0,0,1,0:30:.:30:32:60:60:2:16 +chr1 152303901 . C G . . CSQ=G|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||11021|10985|3662|S/T|aGt/aCt|||-1||HGNC|HGNC:3748,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:563:229,216,52,66:1,445,117,0:11:.:0:32:0:0:0:. 0/1:0/1:2572:939,1036,377,220:2,1975,595,0:93:.:93:32,32:15:16,12:2:120 +chr1 152303983 . C T . . CSQ=T|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||10939|10903|3635|D/N|Gac/Aac|||-1||HGNC|HGNC:3748,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:614:273,243,43,55:0,516,0,98:73:.:0:33:3:3:0:. 0/1:0/1:2851:1013,1052,312,474:0,2065,0,786:228:.:228:33,31:28:29,27:2:247 +chr1 152304107 . C G . . CSQ=G|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||10815|10779|3593|E/D|gaG/gaC|||-1||HGNC|HGNC:3748,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:394:195,173,13,13:0,368,26,0:255:.:0:33:18:20:0:. 0/1:0/1:1708:624,733,200,151:0,1357,351,0:155:.:155:33,32:41:47,18:2:182 +chr1 152304122 . G A . . CSQ=A|synonymous_variant|LOW|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||10800|10764|3588|H|caC/caT|||-1||HGNC|HGNC:3748,A|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,A|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:389:186,184,5,14:19,0,370,0:255:.:0:29:23:25:0:. 0/1:0/1:1605:602,679,166,158:324,0,1281,0:114:.:114:30,29:47:16,55:2:141 +chr1 152304150 . G C . . CSQ=C|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||10772|10736|3579|T/R|aCg/aGg|||-1||HGNC|HGNC:3748,C|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,C|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:433:164,184,41,44:0,85,348,0:218:.:0:30:24:29:0:. 0/1:0/1:1470:533,682,135,120:0,255,1215,0:116:.:116:33,30:53:22,59:2:143 +chr1 152306079 . T C . . CSQ=C|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||8843|8807|2936|D/G|gAc/gGc|||-1||HGNC|HGNC:3748,C|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,C|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:578:205,203,83,87:0,170,0,408:2:.:0:31:0:0:0:. 0/1:0/1:3958:1466,1740,451,301:0,750,2,3206:203:.:203:33,31:14:19,12:2:63 +chr1 152306338 . C T . . CSQ=T|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||8584|8548|2850|G/S|Ggc/Agc|||-1||HGNC|HGNC:3748,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:411:137,165,68,41:0,302,0,109:135:.:0:30:22:27:0:. 0/1:0/1:4985:2106,2070,462,347:3,4176,2,804:228:.:228:30,32:50:54,31:2:108 +chr1 152306380 . T G . . CSQ=G|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||8542|8506|2836|S/R|Agt/Cgt|||-1||HGNC|HGNC:3748,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:487:206,225,32,24:0,0,56,431:203:.:0:31:10:9:0:. 0/1:0/1:5056:2043,2014,517,482:1,0,998,4057:228:.:228:29,32:36:39,35:2:228 +chr1 152307694 . C G . . CSQ=G|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||7228|7192|2398|E/Q|Gag/Cag|||-1||HGNC|HGNC:3748,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:537:220,192,50,75:0,412,125,0:57:.:0:33:1:1:0:. 0/1:0/1:2374:820,645,307,602:0,1465,909,0:228:.:228:32,31:22:19,27:2:145 +chr1 152307789 . C G . . CSQ=G|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||7133|7097|2366|S/T|aGt/aCt|||-1||HGNC|HGNC:3748,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:574:227,204,65,78:1,431,142,0:7:.:0:32:0:0:0:. 0/1:0/1:2091:717,763,159,452:2,1480,609,0:228:.:228:32,31:14:13,17:2:111 +chr1 152307871 . C T . . CSQ=T|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||7051|7015|2339|D/N|Gac/Aac|||-1||HGNC|HGNC:3748,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:445:209,207,16,13:2,416,0,27:80:.:0:33:6:6:0:. 0/1:0/1:1915:846,652,315,102:1,1498,1,415:228:.:228:33,30:28:29,23:2:250 +chr1 152307896 . G A . . CSQ=A|synonymous_variant|LOW|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||7026|6990|2330|H|caC/caT|||-1||HGNC|HGNC:3748,A|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,A|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:367:184,183,0,0:0,0,367,0:114:.:0:29:9:9:0:. 0/1:0/1:1801:593,587,615,6:620,0,1180,1:228:.:228:31,29:34:26,39:2:250 +chr1 152307995 . C G . . CSQ=G|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||6927|6891|2297|E/D|gaG/gaC|||-1||HGNC|HGNC:3748,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:429:173,167,41,48:0,340,89,0:135:.:0:33:15:17:0:. 0/1:0/1:3096:1236,990,353,517:1,2226,869,0:228:.:228:33,32:44:49,34:2:108 +chr1 152308083 . T C . . CSQ=C|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||6839|6803|2268|H/R|cAt/cGt|||-1||HGNC|HGNC:3748,C|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,C|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:422:192,182,18,30:2,46,0,374:255:.:0:31:36:41:0:. 0/1:0/1:2559:1011,1235,84,229:2,311,0,2246:64:.:64:28,31:55:26,59:2:91 +chr1 152308424 . T G . . CSQ=G|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||6498|6462|2154|Q/H|caA/caC|||-1||HGNC|HGNC:3748,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:422:170,175,45,32:0,0,77,345:64:.:0:32:37:39:0:. 0/1:0/1:3124:1059,1130,507,428:0,2,933,2189:228:.:228:29,31:55:48,58:2:37 +chr1 152308814 . A G . . CSQ=G|synonymous_variant|LOW|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||6108|6072|2024|H|caT/caC|||-1||HGNC|HGNC:3748,G|upstream_gene_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000392688|antisense|||||||||||4645|1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,G|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:424:122,174,86,42:296,1,127,0:88:.:0:31:21:27:0:. 0/1:0/1:3521:1103,1235,588,595:2338,1,1182,0:228:.:228:30,29:52:58,40:2:61 +chr1 152310807 . C T . . CSQ=T|missense_variant|MODERATE|FLG|ENSG00000143631|Transcript|ENST00000368799|protein_coding|3/3||||4115|4079|1360|R/H|cGc/cAc|||-1||HGNC|HGNC:3748,T|upstream_gene_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000392688|antisense|||||||||||2652|1||HGNC|HGNC:27913,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000420707|antisense||4/8||||||||||1||HGNC|HGNC:27913,T|intron_variant&non_coding_transcript_variant|MODIFIER|FLG-AS1|ENSG00000237975|Transcript|ENST00000593011|antisense||2/3||||||||||1||HGNC|HGNC:27913 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:394:172,150,22,50:2,322,0,70:51:.:0:30:40:40:0:. 0/1:0/1:2461:670,644,573,574:4,1314,3,1140:228:.:228:30,30:53:60,46:2:24 +chr1 158607804 . C A . . CSQ=A|downstream_gene_variant|MODIFIER|OR10Z1|ENSG00000198967|Transcript|ENST00000361284|protein_coding|||||||||||424|1||HGNC|HGNC:14996,A|downstream_gene_variant|MODIFIER|SPTA1|ENSG00000163554|Transcript|ENST00000368147|protein_coding|||||||||||2902|-1||HGNC|HGNC:11272,A|downstream_gene_variant|MODIFIER|SPTA1|ENSG00000163554|Transcript|ENST00000481212|retained_intron|||||||||||4945|-1||HGNC|HGNC:11272,A|downstream_gene_variant|MODIFIER|SPTA1|ENSG00000163554|Transcript|ENST00000485680|processed_transcript|||||||||||3404|-1||HGNC|HGNC:11272,A|downstream_gene_variant|MODIFIER|SPTA1|ENSG00000163554|Transcript|ENST00000498708|retained_intron|||||||||||4979|-1||HGNC|HGNC:11272 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:0,8,0,0:0,8,0,0:51:.:0:33:60:60:0:. 0/1:0/1:10:1,7,1,1:2,8,0,0:6:.:6:31,33:60:60,60:2:23 +chr1 161363118 . C T . . CSQ=T|3_prime_UTR_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000342751|protein_coding|5/5||||1056|||||||1||HGNC|HGNC:10682,T|downstream_gene_variant|MODIFIER|CFAP126|ENSG00000188931|Transcript|ENST00000367974|protein_coding|||||||||||1613|-1||HGNC|HGNC:32325,T|3_prime_UTR_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000367975|protein_coding|6/6||||1344|||||||1||HGNC|HGNC:10682,T|downstream_gene_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000392169|protein_coding|||||||||||685|1||HGNC|HGNC:10682,T|downstream_gene_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000432287|protein_coding|||||||||||657|1||HGNC|HGNC:10682,T|upstream_gene_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000437833|processed_transcript|||||||||||4904|1||HGNC|HGNC:10682,T|downstream_gene_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000470743|nonsense_mediated_decay|||||||||||461|1|cds_start_NF|HGNC|HGNC:10682,T|downstream_gene_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000504963|nonsense_mediated_decay|||||||||||666|1||HGNC|HGNC:10682,T|downstream_gene_variant|MODIFIER|SDHC|ENSG00000143252|Transcript|ENST00000513009|protein_coding|||||||||||578|1||HGNC|HGNC:10682 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:171:24,101,8,38:0,125,0,46:77:.:0:33:10:9:0:. 0/1:0/1:326:37,152,29,108:0,189,0,137:228:.:228:33,31:35:33,37:2:50 +chr1 161444478 . T A . . CSQ=A|upstream_gene_variant|MODIFIER|RP11-122G18.11|ENSG00000283317|Transcript|ENST00000636824|lincRNA|||||||||||3482|-1||Clone_based_vega_gene|,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-122G18.10|ENSG00000283360|Transcript|ENST00000637155|lincRNA||1/4||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:90:27,33,21,9:30,0,0,60:47:.:0:31:4:4:0:. 0/1:0/1:85:39,15,25,6:31,0,0,54:61:.:61:31,31:13:20,9:2:20 +chr1 161444486 . A G . . CSQ=G|upstream_gene_variant|MODIFIER|RP11-122G18.11|ENSG00000283317|Transcript|ENST00000636824|lincRNA|||||||||||3490|-1||Clone_based_vega_gene|,G|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-122G18.10|ENSG00000283360|Transcript|ENST00000637155|lincRNA||1/4||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:86:23,33,21,9:56,0,30,0:43:.:0:30:4:5:0:. 0/1:0/1:73:29,13,25,6:42,0,31,0:74:.:74:29,26:15:12,20:2:16 +chr1 161444488 . A G . . CSQ=G|upstream_gene_variant|MODIFIER|RP11-122G18.11|ENSG00000283317|Transcript|ENST00000636824|lincRNA|||||||||||3492|-1||Clone_based_vega_gene|,G|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-122G18.10|ENSG00000283360|Transcript|ENST00000637155|lincRNA||1/4||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:88:22,35,21,10:57,0,31,0:42:.:0:30:4:5:0:. 0/1:0/1:74:30,13,24,7:43,0,31,0:72:.:72:29,28:15:12,19:2:15 +chr1 161444514 . T A . . CSQ=A|upstream_gene_variant|MODIFIER|RP11-122G18.11|ENSG00000283317|Transcript|ENST00000636824|lincRNA|||||||||||3518|-1||Clone_based_vega_gene|,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-122G18.10|ENSG00000283360|Transcript|ENST00000637155|lincRNA||1/4||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:99:10,60,19,10:29,0,0,70:54:.:0:29:3:3:0:. 0/1:0/1:70:10,25,22,13:35,0,0,35:98:.:153:29,29:17:21,12:2:33 +chr1 161444527 . C G . . CSQ=G|upstream_gene_variant|MODIFIER|RP11-122G18.11|ENSG00000283317|Transcript|ENST00000636824|lincRNA|||||||||||3531|-1||Clone_based_vega_gene|,G|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-122G18.10|ENSG00000283360|Transcript|ENST00000637155|lincRNA||1/4||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:125:13,90,12,10:0,103,22,0:6:.:0:32:2:1:0:. 0/1:0/1:74:15,38,12,9:0,53,21,0:85:.:85:32,31:16:14,23:2:109 +chr1 178725267 . G A . . CSQ=A|upstream_gene_variant|MODIFIER|RALGPS2|ENSG00000116191|Transcript|ENST00000324778|protein_coding|||||||||||129|1|cds_end_NF|HGNC|HGNC:30279,A|5_prime_UTR_variant|MODIFIER|RALGPS2|ENSG00000116191|Transcript|ENST00000367634|protein_coding|1/19||||121|||||||1||HGNC|HGNC:30279,A|5_prime_UTR_variant|MODIFIER|RALGPS2|ENSG00000116191|Transcript|ENST00000367635|protein_coding|1/20||||103|||||||1||HGNC|HGNC:30279,A|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|RALGPS2|ENSG00000116191|Transcript|ENST00000495034|retained_intron|1/10||||103|||||||1||HGNC|HGNC:30279,A|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|RP11-428K3.1|ENSG00000273062|Transcript|ENST00000608517|antisense|1/1||||1019|||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:10:10,0,0,0:0,0,10,0:57:.:0:28:56:56:0:. 0/1:0/1:10:7,0,1,2:3,0,7,0:25:.:25:28,28:60:60,60:2:30 +chr1 203166324 . C T . . CSQ=T|downstream_gene_variant|MODIFIER|MYBPH|ENSG00000133055|Transcript|ENST00000255416|protein_coding|||||||||||1487|-1||HGNC|HGNC:7552,T|3_prime_UTR_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000309502|protein_coding|6/6||||1816|||||||1||HGNC|HGNC:262,T|3_prime_UTR_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000337894|protein_coding|4/4||||1838|||||||1||HGNC|HGNC:262,T|3_prime_UTR_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000367235|protein_coding|3/3||||1794|||||||1||HGNC|HGNC:262,T|3_prime_UTR_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000367236|protein_coding|3/3||||2326|||||||1||HGNC|HGNC:262,T|downstream_gene_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000464019|processed_transcript|||||||||||583|1||HGNC|HGNC:262,T|downstream_gene_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000467253|processed_transcript|||||||||||209|1||HGNC|HGNC:262,T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000472535|processed_transcript|2/2||||1211|||||||1||HGNC|HGNC:262,T|downstream_gene_variant|MODIFIER|ADORA1|ENSG00000163485|Transcript|ENST00000618295|protein_coding|||||||||||262|1||HGNC|HGNC:262,T|downstream_gene_variant|MODIFIER|MYBPH|ENSG00000133055|Transcript|ENST00000621380|protein_coding|||||||||||1487|-1||HGNC|HGNC:7552 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:6,0,0,0:0,6,0,0:45:.:0:34:60:60:0:. 0/1:0/1:8:6,0,2,0:0,6,0,2:11:.:11:33,33:60:60,60:2:18 +chr1 222468242 . T C . . CSQ=C|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP13|ENSG00000234419|Transcript|ENST00000422015|processed_pseudogene|1/1||||149|||||||1||HGNC|HGNC:37907 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:61:19,37,0,5:0,5,0,56:21:.:0:31:4:4:0:. 1/1:1/1:84:4,4,17,59:0,76,0,8:116:.:240:26:33:34:2:21 +chr1 223947556 . G A . . CSQ=A|upstream_gene_variant|MODIFIER|CICP5|ENSG00000233771|Transcript|ENST00000424045|unprocessed_pseudogene|||||||||||49|1||HGNC|HGNC:37754,A|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000608760|retained_intron|||||||||||3838|-1||HGNC|HGNC:51732,A|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000634905|processed_transcript|||||||||||3923|-1||HGNC|HGNC:51732 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:91:64,7,20,0:20,0,71,0:1:.:0:28:0:0:0:. 0/1:0/1:65:42,3,19,1:20,0,45,0:26:.:38:32,28:16:26,11:2:57 +chr1 223947588 . G T . . CSQ=T|upstream_gene_variant|MODIFIER|CICP5|ENSG00000233771|Transcript|ENST00000424045|unprocessed_pseudogene|||||||||||17|1||HGNC|HGNC:37754,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000608760|retained_intron|||||||||||3806|-1||HGNC|HGNC:51732,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000634905|processed_transcript|||||||||||3891|-1||HGNC|HGNC:51732 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:117:85,18,7,7:0,0,103,14:1:.:0:32:0:0:0:. 1/1:1/1:87:39,24,20,4:0,0,63,24:11:.:60:29:11:28:2:20 +chr1 223947978 . G T . . CSQ=T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP5|ENSG00000233771|Transcript|ENST00000424045|unprocessed_pseudogene|1/3||||374|||||||1||HGNC|HGNC:37754,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000608760|retained_intron|||||||||||3416|-1||HGNC|HGNC:51732,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000634905|processed_transcript|||||||||||3501|-1||HGNC|HGNC:51732 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:254:171,20,53,10:0,0,191,63:1:.:0:32:1:2:0:. 0/1:0/1:430:205,20,192,13:0,0,225,205:225:.:225:32,30:35:39,31:2:134 +chr1 223948062 . G A . . CSQ=A|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP5|ENSG00000233771|Transcript|ENST00000424045|unprocessed_pseudogene|1/3||||458|||||||1||HGNC|HGNC:37754,A|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000608760|retained_intron|||||||||||3332|-1||HGNC|HGNC:51732,A|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000634905|processed_transcript|||||||||||3417|-1||HGNC|HGNC:51732 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:304:132,66,44,62:104,0,198,2:62:.:0:29:20:30:0:. 0/1:0/1:1124:380,206,360,178:537,0,586,1:228:.:228:31,29:47:36,58:2:35 +chr1 223948558 . C T . . CSQ=T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP5|ENSG00000233771|Transcript|ENST00000424045|unprocessed_pseudogene|1/3||||954|||||||1||HGNC|HGNC:37754,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000608760|retained_intron|||||||||||2836|-1||HGNC|HGNC:51732,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000634905|processed_transcript|||||||||||2921|-1||HGNC|HGNC:51732 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:40:8,27,1,4:0,35,0,5:1:.:0:28:1:1:0:. 0/1:0/1:154:10,64,21,59:0,74,0,80:176:.:220:28,31:22:23,22:2:49 +chr1 223949699 . T C . . CSQ=C|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP5|ENSG00000233771|Transcript|ENST00000424045|unprocessed_pseudogene|2/3||||1941|||||||1||HGNC|HGNC:37754,C|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000608760|retained_intron|||||||||||1695|-1||HGNC|HGNC:51732,C|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000634905|processed_transcript|||||||||||1780|-1||HGNC|HGNC:51732 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:189:94,95,0,0:0,0,0,189:28:.:0:30:1:1:0:. 0/1:0/1:370:104,102,99,65:0,164,0,206:206:.:206:30,31:25:18,30:2:233 +chr1 223950486 . C T . . CSQ=T|downstream_gene_variant|MODIFIER|CICP5|ENSG00000233771|Transcript|ENST00000424045|unprocessed_pseudogene|||||||||||70|1||HGNC|HGNC:37754,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000608760|retained_intron|||||||||||908|-1||HGNC|HGNC:51732,T|downstream_gene_variant|MODIFIER|GTF2IP20|ENSG00000272645|Transcript|ENST00000634905|processed_transcript|||||||||||993|-1||HGNC|HGNC:51732 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:79:14,4,49,12:0,18,0,61:58:.:0:30:10:31:0:. 0/1:0/1:115:19,6,52,38:0,25,0,90:73:.:177:30,30:31:56,25:2:31 +chr1 227976709 . T C . . CSQ=C|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP26|ENSG00000233003|Transcript|ENST00000425003|unprocessed_pseudogene|1/3||||1485|||||||-1||HGNC|HGNC:48834,C|upstream_gene_variant|MODIFIER|SEPT14P17|ENSG00000274886|Transcript|ENST00000617978|unprocessed_pseudogene|||||||||||3342|1||HGNC|HGNC:51703 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:10:8,2,0,0:0,0,0,10:22:.:0:25:1:1:0:. 1/1:1/1:91:3,1,82,5:0,86,1,4:25:.:25:32:3:2:2:20 +chr1 227977309 . A G . . CSQ=G|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|CICP26|ENSG00000233003|Transcript|ENST00000425003|unprocessed_pseudogene|1/3||||885|||||||-1||HGNC|HGNC:48834,G|upstream_gene_variant|MODIFIER|SEPT14P17|ENSG00000274886|Transcript|ENST00000617978|unprocessed_pseudogene|||||||||||2742|1||HGNC|HGNC:51703 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:170:101,30,27,12:131,0,38,1:116:.:0:31:9:11:0:. 0/1:0/1:461:185,51,194,31:236,0,225,0:228:.:228:31,28:38:48,27:2:113 +chr1 234978406 . C G . . CSQ=G|downstream_gene_variant|MODIFIER|RP11-443B7.3|ENSG00000258082|Transcript|ENST00000549744|lincRNA|||||||||||1241|-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:2,0,0,0:0,2,0,0:33:.:0:32:60:60:0:. 1/1:1/1:1:0,0,1,0:0,0,1,0:12:.:12:12:60:60:2:15 +chr1 234978426 . C G . . CSQ=G|downstream_gene_variant|MODIFIER|RP11-443B7.3|ENSG00000258082|Transcript|ENST00000549744|lincRNA|||||||||||1221|-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:2,0,0,0:0,2,0,0:33:.:0:31:60:60:0:. 1/1:1/1:1:0,0,1,0:0,0,1,0:30:.:30:35:60:60:2:16 +chr1 234978429 . T C . . CSQ=C|downstream_gene_variant|MODIFIER|RP11-443B7.3|ENSG00000258082|Transcript|ENST00000549744|lincRNA|||||||||||1218|-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:2:2,0,0,0:0,0,0,2:33:.:0:34:60:60:0:. 1/1:1/1:1:0,0,1,0:0,1,0,0:25:.:25:25:60:60:2:16 +chr1 240207797 . G A . . CSQ=A|synonymous_variant|LOW|FMN2|ENSG00000155816|Transcript|ENST00000319653|protein_coding|5/18||||3215|2985|995|A|gcG/gcA|||1||HGNC|HGNC:14074,A|downstream_gene_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000447095|protein_coding|||||||||||610|1|cds_end_NF|HGNC|HGNC:14074 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:161:61,72,12,16:26,2,133,0:59:.:0:28:20:18:0:. 0/1:0/1:245:67,113,13,52:64,1,180,0:34:.:34:30,28:38:47,35:2:32 +chr1 240207800 . C A . . CSQ=A|synonymous_variant|LOW|FMN2|ENSG00000155816|Transcript|ENST00000319653|protein_coding|5/18||||3218|2988|996|G|ggC/ggA|||1||HGNC|HGNC:14074,A|downstream_gene_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000447095|protein_coding|||||||||||613|1|cds_end_NF|HGNC|HGNC:14074 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:152:57,69,12,14:22,126,4,0:95:.:0:30:18:17:0:. 0/1:0/1:225:60,107,14,44:53,167,5,0:8:.:8:30,29:37:45,34:2:35 +chr1 240207806 . C T . . CSQ=T|synonymous_variant|LOW|FMN2|ENSG00000155816|Transcript|ENST00000319653|protein_coding|5/18||||3224|2994|998|P|ccC/ccT|||1||HGNC|HGNC:14074,T|downstream_gene_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000447095|protein_coding|||||||||||619|1|cds_end_NF|HGNC|HGNC:14074 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:144:57,65,10,12:1,122,0,21:114:.:0:32:15:14:0:. 0/1:0/1:216:59,105,12,40:1,164,0,51:13:.:13:31,29:34:31,45:2:40 +chr1 240207812 . G A,T . . CSQ=A|synonymous_variant|LOW|FMN2|ENSG00000155816|Transcript|ENST00000319653|protein_coding|5/18||||3230|3000|1000|P|ccG/ccA|||1||HGNC|HGNC:14074,T|synonymous_variant|LOW|FMN2|ENSG00000155816|Transcript|ENST00000319653|protein_coding|5/18||||3230|3000|1000|P|ccG/ccT|||1||HGNC|HGNC:14074,A|downstream_gene_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000447095|protein_coding|||||||||||625|1|cds_end_NF|HGNC|HGNC:14074,T|downstream_gene_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000447095|protein_coding|||||||||||625|1|cds_end_NF|HGNC|HGNC:14074 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/2:0/2:124:22,15,33,54:12,8,37,67:37:.:69:21,26:10:5,12:1:. 1/2:1/2:185:12,4,50,119:31,10,16,128:95:.:165:27,26:28:38,27:2:69 +chr1 240208175 . T G . . CSQ=G|synonymous_variant|LOW|FMN2|ENSG00000155816|Transcript|ENST00000319653|protein_coding|5/18||||3593|3363|1121|P|ccT/ccG|||1||HGNC|HGNC:14074,G|downstream_gene_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000447095|protein_coding|||||||||||988|1|cds_end_NF|HGNC|HGNC:14074 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:125:51,50,10,14:0,7,17,101:155:.:0:28:31:36:0:. 0/1:0/1:146:64,47,12,23:0,5,30,111:2:.:2:29,27:47:24,53:2:29 +chr1 240472659 . G T . . CSQ=T|intron_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000319653|protein_coding||17/17||||||||||1||HGNC|HGNC:14074,T|intron_variant&non_coding_transcript_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000496950|processed_transcript||2/2||||||||||1||HGNC|HGNC:14074,T|intron_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000543681|protein_coding||2/2||||||||||1||HGNC|HGNC:14074,T|intron_variant|MODIFIER|FMN2|ENSG00000155816|Transcript|ENST00000545751|protein_coding||10/10||||||||||1||HGNC|HGNC:14074 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,0,6,0:45:.:0:25:60:60:0:. 0/1:0/1:7:0,5,0,2:0,0,5,2:8:.:8:28,30:60:60,60:2:18 +chr1 242810191 . T G . . CSQ=G|intergenic_variant|MODIFIER|||||||||||||||||||| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:71:26,30,3,12:0,0,15,56:88:.:0:30:18:22:0:. 0/1:0/1:171:59,40,54,18:0,0,72,99:219:.:219:31,30:39:35,42:2:61 +chr1 242810198 . T G . . CSQ=G|intergenic_variant|MODIFIER|||||||||||||||||||| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:79:24,34,3,18:0,0,21,58:76:.:0:30:16:21:0:. 0/1:0/1:177:57,38,54,28:0,0,82,95:228:.:228:30,30:39:35,42:2:49 +chr1 243052907 . C T . . CSQ=T|upstream_gene_variant|MODIFIER|LINC01347|ENSG00000231512|Transcript|ENST00000437691|lincRNA|||||||||||655|-1||EntrezGene|HGNC:50566,T|downstream_gene_variant|MODIFIER|RP11-261C10.7|ENSG00000253326|Transcript|ENST00000517560|processed_pseudogene|||||||||||1954|-1||Clone_based_vega_gene|,T|downstream_gene_variant|MODIFIER|RP11-261C10.8|ENSG00000278455|Transcript|ENST00000611420|processed_pseudogene|||||||||||1288|1||Clone_based_vega_gene|,T|downstream_gene_variant|MODIFIER|LINC01347|ENSG00000214837|Transcript|ENST00000627498|processed_transcript|||||||||||3400|-1||HGNC|HGNC:50566 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:7:0,7,0,0:0,7,0,0:15:.:0:25:0:0:0:. 0/1:0/1:17:0,11,0,6:0,11,0,6:23:.:23:28,31:25:26,24:2:21 +chr1 244891249 . C T . . CSQ=T|intergenic_variant|MODIFIER|||||||||||||||||||| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:30:60:60:0:. 0/1:0/1:7:0,5,0,2:0,5,0,2:4:.:4:22,28:54:60,40:2:18 +chr2 3035112 . T C . . CSQ=C|intron_variant&non_coding_transcript_variant|MODIFIER|LINC01250|ENSG00000234423|Transcript|ENST00000457478|lincRNA||3/6||||||||||-1||HGNC|HGNC:49844 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:263:82,120,30,31:0,59,2,202:64:.:0:29:49:57:0:. 0/1:0/1:559:136,265,60,98:0,157,1,401:167:.:167:27,30:53:35,60:2:37 +chr2 3329166 . G C . . CSQ=C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000382125|protein_coding||3/8||||||||||-1||HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000398659|protein_coding||3/9||||||||||-1||HGNC|HGNC:12383,C|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000406835|nonsense_mediated_decay||4/5||||||||||-1||HGNC|HGNC:12383,C|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000435721|nonsense_mediated_decay||3/5||||||||||-1||HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000441271|protein_coding||1/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000443925|protein_coding||3/5||||||||||-1||HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000444776|protein_coding||3/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,C|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000455162|nonsense_mediated_decay||2/6||||||||||-1||HGNC|HGNC:12383,C|intron_variant&non_coding_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000463662|processed_transcript||3/5||||||||||-1||HGNC|HGNC:12383 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:66:24,30,6,6:0,12,54,0:73:.:0:31:28:32:0:. 0/1:0/1:64:19,28,10,7:0,13,47,4:19:.:19:28,31:39:23,47:2:43 +chr2 3329223 . G C . . CSQ=C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000382125|protein_coding||3/8||||||||||-1||HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000398659|protein_coding||3/9||||||||||-1||HGNC|HGNC:12383,C|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000406835|nonsense_mediated_decay||4/5||||||||||-1||HGNC|HGNC:12383,C|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000435721|nonsense_mediated_decay||3/5||||||||||-1||HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000441271|protein_coding||1/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000443925|protein_coding||3/5||||||||||-1||HGNC|HGNC:12383,C|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000444776|protein_coding||3/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,C|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000455162|nonsense_mediated_decay||2/6||||||||||-1||HGNC|HGNC:12383,C|intron_variant&non_coding_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000463662|processed_transcript||3/5||||||||||-1||HGNC|HGNC:12383 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:34:11,16,5,2:0,7,27,0:51:.:0:32:34:43:0:. 0/1:0/1:36:9,13,6,8:0,7,22,7:0:.:76:30,31:42:29,51:2:27 +chr2 3329230 . G A . . CSQ=A|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000382125|protein_coding||3/8||||||||||-1||HGNC|HGNC:12383,A|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000398659|protein_coding||3/9||||||||||-1||HGNC|HGNC:12383,A|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000406835|nonsense_mediated_decay||4/5||||||||||-1||HGNC|HGNC:12383,A|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000435721|nonsense_mediated_decay||3/5||||||||||-1||HGNC|HGNC:12383,A|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000441271|protein_coding||1/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,A|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000443925|protein_coding||3/5||||||||||-1||HGNC|HGNC:12383,A|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000444776|protein_coding||3/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,A|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000455162|nonsense_mediated_decay||2/6||||||||||-1||HGNC|HGNC:12383,A|intron_variant&non_coding_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000463662|processed_transcript||3/5||||||||||-1||HGNC|HGNC:12383 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:30:9,12,7,2:7,0,21,2:44:.:0:32:35:46:0:. 0/1:0/1:31:8,10,5,8:12,0,18,1:116:.:116:30,31:44:31,52:2:17 +chr2 3329243 . A G . . CSQ=G|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000382125|protein_coding||3/8||||||||||-1||HGNC|HGNC:12383,G|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000398659|protein_coding||3/9||||||||||-1||HGNC|HGNC:12383,G|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000406835|nonsense_mediated_decay||4/5||||||||||-1||HGNC|HGNC:12383,G|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000435721|nonsense_mediated_decay||3/5||||||||||-1||HGNC|HGNC:12383,G|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000441271|protein_coding||1/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,G|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000443925|protein_coding||3/5||||||||||-1||HGNC|HGNC:12383,G|intron_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000444776|protein_coding||3/4||||||||||-1|cds_end_NF|HGNC|HGNC:12383,G|intron_variant&NMD_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000455162|nonsense_mediated_decay||2/6||||||||||-1||HGNC|HGNC:12383,G|intron_variant&non_coding_transcript_variant|MODIFIER|TSSC1|ENSG00000032389|Transcript|ENST00000463662|processed_transcript||3/5||||||||||-1||HGNC|HGNC:12383 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:29:13,14,0,2:27,0,2,0:68:.:0:30:34:36:0:. 0/1:0/1:31:9,11,2,9:20,0,11,0:59:.:59:30,31:39:48,22:2:41 +chr2 3775131 . G C . . CSQ=C|intron_variant|MODIFIER|DCDC2C|ENSG00000214866|Transcript|ENST00000399143|protein_coding||8/10||||||||||1||HGNC|HGNC:32696,C|intron_variant|MODIFIER|DCDC2C|ENSG00000214866|Transcript|ENST00000423741|protein_coding||8/8||||||||||1|cds_start_NF&cds_end_NF|HGNC|HGNC:32696,C|intron_variant&non_coding_transcript_variant|MODIFIER|DCDC2C|ENSG00000214866|Transcript|ENST00000537457|processed_transcript||5/7||||||||||1||HGNC|HGNC:32696 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:172:79,50,25,18:0,43,129,0:117:.:0:30:33:42:0:. 0/1:0/1:366:184,70,41,71:0,112,254,0:48:.:48:32,30:39:21,47:2:75 +chr2 10080680 . C T . . CSQ=T|upstream_gene_variant|MODIFIER|CYS1|ENSG00000205795|Transcript|ENST00000381813|protein_coding|||||||||||269|-1||HGNC|HGNC:18525,T|upstream_gene_variant|MODIFIER|AC104794.4|ENSG00000233502|Transcript|ENST00000425235|lincRNA|||||||||||3101|1||Clone_based_vega_gene|,T|intron_variant&non_coding_transcript_variant|MODIFIER|CYS1|ENSG00000205795|Transcript|ENST00000477304|processed_transcript||1/2||||||||||-1||HGNC|HGNC:18525 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:6,0,0,0:0,6,0,0:45:.:0:34:60:60:0:. 0/1:0/1:5:3,0,2,0:0,3,0,2:18:.:18:34,32:60:60,60:2:18 +chr2 19985338 . C T . . CSQ=T|intron_variant|MODIFIER|WDR35|ENSG00000118965|Transcript|ENST00000281405|protein_coding||2/26||||||||||-1||HGNC|HGNC:29250,T|intron_variant|MODIFIER|WDR35|ENSG00000118965|Transcript|ENST00000345530|protein_coding||2/27||||||||||-1||HGNC|HGNC:29250,T|intron_variant&NMD_transcript_variant|MODIFIER|WDR35|ENSG00000118965|Transcript|ENST00000414212|nonsense_mediated_decay||2/27||||||||||-1||HGNC|HGNC:29250,T|upstream_gene_variant|MODIFIER|AC079145.4|ENSG00000227210|Transcript|ENST00000416575|antisense|||||||||||4879|1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:41:18,5,0,18:2,23,0,16:45:.:0:32:17:24:0:. 0/1:0/1:32:15,5,0,12:1,20,0,11:14:.:14:32,28:23:27,15:2:18 +chr2 39404606 . C T . . CSQ=T|intron_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000263881|protein_coding||1/33||||||||||-1||HGNC|HGNC:6865,T|intron_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000341681|protein_coding||1/32||||||||||-1||HGNC|HGNC:6865,T|intron_variant&NMD_transcript_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000429397|nonsense_mediated_decay||1/6||||||||||-1||HGNC|HGNC:6865,T|intron_variant&NMD_transcript_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000437968|nonsense_mediated_decay||1/3||||||||||-1||HGNC|HGNC:6865,T|intron_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000484274|protein_coding||1/2||||||||||-1||HGNC|HGNC:6865 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:24:40:40:0:. 0/1:0/1:10:0,6,0,4:0,6,0,4:30:.:30:28,26:50:57,39:2:18 +chr2 39404609 . C T . . CSQ=T|intron_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000263881|protein_coding||1/33||||||||||-1||HGNC|HGNC:6865,T|intron_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000341681|protein_coding||1/32||||||||||-1||HGNC|HGNC:6865,T|intron_variant&NMD_transcript_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000429397|nonsense_mediated_decay||1/6||||||||||-1||HGNC|HGNC:6865,T|intron_variant&NMD_transcript_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000437968|nonsense_mediated_decay||1/3||||||||||-1||HGNC|HGNC:6865,T|intron_variant|MODIFIER|MAP4K3|ENSG00000011566|Transcript|ENST00000484274|protein_coding||1/2||||||||||-1||HGNC|HGNC:6865 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:6:0,6,0,0:0,6,0,0:45:.:0:31:40:40:0:. 0/1:0/1:9:0,5,0,4:0,5,0,4:7:.:7:32,17:49:56,39:2:18 +chr2 68306124 . C A . . CSQ=A|intron_variant|MODIFIER|CNRIP1|ENSG00000119865|Transcript|ENST00000263655|protein_coding||2/2||||||||||-1||HGNC|HGNC:24546,A|intron_variant|MODIFIER|CNRIP1|ENSG00000119865|Transcript|ENST00000409559|protein_coding||2/2||||||||||-1||HGNC|HGNC:24546,A|intron_variant&non_coding_transcript_variant|MODIFIER|CNRIP1|ENSG00000119865|Transcript|ENST00000481714|processed_transcript||3/3||||||||||-1||HGNC|HGNC:24546 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:8:8,0,0,0:0,8,0,0:51:.:0:32:60:60:0:. 0/1:0/1:8:6,0,0,2:2,6,0,0:3:.:3:29,32:57:50,59:2:23 +chr2 86942193 . T G . . CSQ=G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000398193|protein_coding||1/22||||||||||1||HGNC|HGNC:32414,G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000409776|protein_coding||1/22||||||||||1||HGNC|HGNC:32414,G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000559485|protein_coding||1/22||||||||||1||HGNC|HGNC:32414 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:279:133,92,27,27:0,0,54,225:31:.:0:32:2:2:0:. 0/1:0/1:771:252,262,121,136:0,1,256,514:228:.:228:31,31:17:21,16:2:213 +chr2 86942348 . C G . . CSQ=G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000398193|protein_coding||1/22||||||||||1||HGNC|HGNC:32414,G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000409776|protein_coding||1/22||||||||||1||HGNC|HGNC:32414,G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000559485|protein_coding||1/22||||||||||1||HGNC|HGNC:32414 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:141:12,100,4,25:0,112,29,0:12:.:0:26:2:2:0:. 0/1:0/1:265:11,176,5,73:0,187,78,0:11:.:11:26,32:26:25,27:2:38 +chr2 86942392 . C G . . CSQ=G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000398193|protein_coding||1/22||||||||||1||HGNC|HGNC:32414,G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000409776|protein_coding||1/22||||||||||1||HGNC|HGNC:32414,G|intron_variant|MODIFIER|RGPD1|ENSG00000187627|Transcript|ENST00000559485|protein_coding||1/22||||||||||1||HGNC|HGNC:32414 GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:21:0,16,0,5:0,16,5,0:1:.:0:23:0:0:0:. 0/1:0/1:41:0,32,0,9:0,32,9,0:12:.:12:27,29:19:15,31:2:21 +chr2 89625699 . G A . . CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|RP4-614C10.3|ENSG00000283132|Transcript|ENST00000636037|unprocessed_pseudogene||6/8||||||||||-1||Clone_based_vega_gene| GT:IGT:DP:DP4:BCOUNT:GQ:JGQ:VAQ:BQ:MQ:AMQ:SS:SSC 0/0:0/0:201:108,46,32,15:47,0,154,0:1:.:0:32:0:0:0:. 0/1:0/1:393:143,74,110,66:176,0,217,0:141:.:146:30,32:8:11,7:2:125
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcf2maf-macros.xml Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,92 @@ + <macros> + <macro name="list-vcf2maf"> + <param name="species" type="select" label="choose species"> + <option value="ailuropoda_melanoleuca --ncbi-build ailMel1">ailuropoda_melanoleuca --ncbi-build ailMel1</option> + <option value="anas_platyrhynchos --ncbi-build BGI_duck_1.0">anas_platyrhynchos --ncbi-build BGI_duck_1.0</option> + <option value="anolis_carolinensis --ncbi-build AnoCar2.0">anolis_carolinensis --ncbi-build AnoCar2.0</option> + <option value="astyanax_mexicanus --ncbi-build AstMex102">astyanax_mexicanus --ncbi-build AstMex102</option> + <option value="bos_taurus --ncbi-build UMD3.1">bos_taurus --ncbi-build UMD3.1</option> + <option value="caenorhabditis_elegans --ncbi-build WBcel235">caenorhabditis_elegans --ncbi-build WBcel235</option> + <option value="callithrix_jacchus --ncbi-build C_jacchus3.2.1">callithrix_jacchus --ncbi-build C_jacchus3.2.1</option> + <option value="canis_familiaris --ncbi-build CanFam3.1">canis_familiaris --ncbi-build CanFam3.1</option> + <option value="cavia_porcellus --ncbi-build cavPor3">cavia_porcellus --ncbi-build cavPor3</option> + <option value="chlorocebus_sabaeus --ncbi-build ChlSab1.1">chlorocebus_sabaeus --ncbi-build ChlSab1.1</option> + <option value="choloepus_hoffmanni --ncbi-build choHof1">choloepus_hoffmanni --ncbi-build choHof1</option> + <option value="ciona_intestinalis --ncbi-build KH">ciona_intestinalis --ncbi-build KH</option> + <option value="ciona_savignyi --ncbi-build CSAV2.0">ciona_savignyi --ncbi-build CSAV2.0</option> + <option value="danio_rerio --ncbi-build GRCz10">danio_rerio --ncbi-build GRCz10</option> + <option value="dasypus_novemcinctus --ncbi-build Dasnov3.0">dasypus_novemcinctus --ncbi-build Dasnov3.0</option> + <option value="dipodomys_ordii --ncbi-build dipOrd1">dipodomys_ordii --ncbi-build dipOrd1</option> + <option value="drosophila_melanogaster --ncbi-build BDGP6">drosophila_melanogaster --ncbi-build BDGP6</option> + <option value="echinops_telfairi --ncbi-build TENREC">echinops_telfairi --ncbi-build TENREC</option> + <option value="equus_caballus --ncbi-build EquCab2">equus_caballus --ncbi-build EquCab2</option> + <option value="erinaceus_europaeus --ncbi-build HEDGEHOG">erinaceus_europaeus --ncbi-build HEDGEHOG</option> + <option value="felis_catus --ncbi-build Felis_catus_6.2">felis_catus --ncbi-build Felis_catus_6.2</option> + <option value="ficedula_albicollis --ncbi-build FicAlb_1.4">ficedula_albicollis --ncbi-build FicAlb_1.4</option> + <option value="gadus_morhua --ncbi-build gadMor1">gadus_morhua --ncbi-build gadMor1</option> + <option value="gallus_gallus --ncbi-build Gallus_gallus_5.0">gallus_gallus --ncbi-build Gallus_gallus_5.0</option> + <option value="gasterosteus_aculeatus --ncbi-build BROADS1">gasterosteus_aculeatus --ncbi-build BROADS1</option> + <option value="gorilla_gorilla --ncbi-build gorGor3.1">gorilla_gorilla --ncbi-build gorGor3.1</option> + <option value="homo_sapiens --ncbi-build GRCh37">homo_sapiens --ncbi-build GRCh37</option> + <option value="homo_sapiens --ncbi-build GRCh38" selected="true">homo_sapiens --ncbi-build GRCh38</option> + <option value="ictidomys_tridecemlineatus --ncbi-build spetri2">ictidomys_tridecemlineatus --ncbi-build spetri2</option> + <option value="latimeria_chalumnae --ncbi-build LatCha1">latimeria_chalumnae --ncbi-build LatCha1</option> + <option value="lepisosteus_oculatus --ncbi-build LepOcu1">lepisosteus_oculatus --ncbi-build LepOcu1</option> + <option value="loxodonta_africana --ncbi-build loxAfr3">loxodonta_africana --ncbi-build loxAfr3</option> + <option value="macaca_mulatta --ncbi-build Mmul_8.0.1">macaca_mulatta --ncbi-build Mmul_8.0.1</option> + <option value="macropus_eugenii --ncbi-build Meug_1.0">macropus_eugenii --ncbi-build Meug_1.0</option> + <option value="meleagris_gallopavo --ncbi-build UMD2">meleagris_gallopavo --ncbi-build UMD2</option> + <option value="microcebus_murinus --ncbi-build Mmur_2.0">microcebus_murinus --ncbi-build Mmur_2.0</option> + <option value="monodelphis_domestica --ncbi-build BROADO5">monodelphis_domestica --ncbi-build BROADO5</option> + <option value="mus_musculus_129s1svimj --ncbi-build 129S1_SvImJ_v1">mus_musculus_129s1svimj --ncbi-build 129S1_SvImJ_v1</option> + <option value="mus_musculus_aj --ncbi-build A_J_v1">mus_musculus_aj --ncbi-build A_J_v1</option> + <option value="mus_musculus_akrj --ncbi-build AKR_J_v1">mus_musculus_akrj --ncbi-build AKR_J_v1</option> + <option value="mus_musculus_balbcj --ncbi-build BALB_cJ_v1">mus_musculus_balbcj --ncbi-build BALB_cJ_v1</option> + <option value="mus_musculus_c3hhej --ncbi-build C3H_HeJ_v1">mus_musculus_c3hhej --ncbi-build C3H_HeJ_v1</option> + <option value="mus_musculus_c57bl6nj --ncbi-build C57BL_6NJ_v1">mus_musculus_c57bl6nj --ncbi-build C57BL_6NJ_v1</option> + <option value="mus_musculus_casteij --ncbi-build CAST_EiJ_v1">mus_musculus_casteij --ncbi-build CAST_EiJ_v1</option> + <option value="mus_musculus_cbaj --ncbi-build CBA_J_v1">mus_musculus_cbaj --ncbi-build CBA_J_v1</option> + <option value="mus_musculus_dba2j --ncbi-build DBA_2J_v1">mus_musculus_dba2j --ncbi-build DBA_2J_v1</option> + <option value="mus_musculus_fvbnj --ncbi-build FVB_NJ_v1">mus_musculus_fvbnj --ncbi-build FVB_NJ_v1</option> + <option value="mus_musculus_lpj --ncbi-build LP_J_v1">mus_musculus_lpj --ncbi-build LP_J_v1</option> + <option value="mus_musculus_nodshiltj --ncbi-build NOD_ShiLtJ_v1">mus_musculus_nodshiltj --ncbi-build NOD_ShiLtJ_v1</option> + <option value="mus_musculus_nzohlltj --ncbi-build NZO_HlLtJ_v1">mus_musculus_nzohlltj --ncbi-build NZO_HlLtJ_v1</option> + <option value="mus_musculus_pwkphj --ncbi-build PWK_PhJ_v1">mus_musculus_pwkphj --ncbi-build PWK_PhJ_v1</option> + <option value="mus_musculus --ncbi-build GRCm38">mus_musculus --ncbi-build GRCm38</option> + <option value="mus_musculus_wsbeij --ncbi-build WSB_EiJ_v1">mus_musculus_wsbeij --ncbi-build WSB_EiJ_v1</option> + <option value="mus_spretus_spreteij --ncbi-build SPRET_EiJ_v1">mus_spretus_spreteij --ncbi-build SPRET_EiJ_v1</option> + <option value="mustela_putorius_furo --ncbi-build MusPutFur1.0">mustela_putorius_furo --ncbi-build MusPutFur1.0</option> + <option value="myotis_lucifugus --ncbi-build Myoluc2.0">myotis_lucifugus --ncbi-build Myoluc2.0</option> + <option value="nomascus_leucogenys --ncbi-build Nleu1.0">nomascus_leucogenys --ncbi-build Nleu1.0</option> + <option value="ochotona_princeps --ncbi-build pika">ochotona_princeps --ncbi-build pika</option> + <option value="oreochromis_niloticus --ncbi-build Orenil1.0">oreochromis_niloticus --ncbi-build Orenil1.0</option> + <option value="ornithorhynchus_anatinus --ncbi-build OANA5">ornithorhynchus_anatinus --ncbi-build OANA5</option> + <option value="oryctolagus_cuniculus --ncbi-build OryCun2.0">oryctolagus_cuniculus --ncbi-build OryCun2.0</option> + <option value="oryzias_latipes --ncbi-build MEDAKA1">oryzias_latipes --ncbi-build MEDAKA1</option> + <option value="otolemur_garnettii --ncbi-build OtoGar3">otolemur_garnettii --ncbi-build OtoGar3</option> + <option value="ovis_aries --ncbi-build Oar_v3.1">ovis_aries --ncbi-build Oar_v3.1</option> + <option value="pan_troglodytes --ncbi-build CHIMP2.1.4">pan_troglodytes --ncbi-build CHIMP2.1.4</option> + <option value="papio_anubis --ncbi-build PapAnu2.0">papio_anubis --ncbi-build PapAnu2.0</option> + <option value="pelodiscus_sinensis --ncbi-build PelSin_1.0">pelodiscus_sinensis --ncbi-build PelSin_1.0</option> + <option value="petromyzon_marinus --ncbi-build Pmarinus_7.0">petromyzon_marinus --ncbi-build Pmarinus_7.0</option> + <option value="poecilia_formosa --ncbi-build PoeFor_5.1.2">poecilia_formosa --ncbi-build PoeFor_5.1.2</option> + <option value="pongo_abelii --ncbi-build PPYG2">pongo_abelii --ncbi-build PPYG2</option> + <option value="procavia_capensis --ncbi-build proCap1">procavia_capensis --ncbi-build proCap1</option> + <option value="pteropus_vampyrus --ncbi-build pteVam1">pteropus_vampyrus --ncbi-build pteVam1</option> + <option value="rattus_norvegicus --ncbi-build Rnor_6.0">rattus_norvegicus --ncbi-build Rnor_6.0</option> + <option value="saccharomyces_cerevisiae --ncbi-build R64 ">saccharomyces_cerevisiae --ncbi-build R64 </option> + <option value="sarcophilus_harrisii --ncbi-build DEVIL7.0">sarcophilus_harrisii --ncbi-build DEVIL7.0</option> + <option value="sorex_araneus --ncbi-build COMMON_SHREW1">sorex_araneus --ncbi-build COMMON_SHREW1</option> + <option value="sus_scrofa --ncbi-build Sscrofa10.2">sus_scrofa --ncbi-build Sscrofa10.2</option> + <option value="taeniopygia_guttata --ncbi-build taeGut3.2.4">taeniopygia_guttata --ncbi-build taeGut3.2.4</option> + <option value="takifugu_rubripes --ncbi-build FUGU4">takifugu_rubripes --ncbi-build FUGU4</option> + <option value="tarsius_syrichta --ncbi-build tarSyr1">tarsius_syrichta --ncbi-build tarSyr1</option> + <option value="tetraodon_nigroviridis --ncbi-build TETRAODON8">tetraodon_nigroviridis --ncbi-build TETRAODON8</option> + <option value="tupaia_belangeri --ncbi-build TREESHREW">tupaia_belangeri --ncbi-build TREESHREW</option> + <option value="tursiops_truncatus --ncbi-build turTru1">tursiops_truncatus --ncbi-build turTru1</option> + <option value="vicugna_pacos --ncbi-build vicPac1">vicugna_pacos --ncbi-build vicPac1</option> + <option value="xenopus_tropicalis --ncbi-build JGI_4.2">xenopus_tropicalis --ncbi-build JGI_4.2</option> + <option value="xiphophorus_maculatus --ncbi-build Xipmac4.4.2">xiphophorus_maculatus --ncbi-build Xipmac4.4.2</option> + </param> + </macro> + </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcf2maf.pl Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,1117 @@ +#!/usr/bin/env perl + +# vcf2maf - Convert a VCF into a MAF by mapping each variant to only one of all possible gene isoforms + +use strict; +use warnings; +use IO::File; +use Getopt::Long qw( GetOptions ); +use Pod::Usage qw( pod2usage ); +use File::Copy qw( move ); +use File::Path qw( mkpath ); +use Config; + +# Set any default paths and constants +my ( $tumor_id, $normal_id ) = ( "TUMOR", "NORMAL" ); +my ( $vep_path, $vep_data, $vep_forks, $buffer_size, $any_allele ) = ( "$ENV{HOME}/vep", "$ENV{HOME}/.vep", 4, 5000, 0 ); +my ( $ref_fasta, $filter_vcf ) = ( "$ENV{HOME}/.vep/homo_sapiens/91_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz", "$ENV{HOME}/.vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz" ); +my ( $species, $ncbi_build, $cache_version, $maf_center, $retain_info, $min_hom_vaf, $max_filter_ac ) = ( "homo_sapiens", "GRCh37", "", ".", "", 0.7, 10 ); +my $perl_bin = $Config{perlpath}; + +# Find out if samtools and tabix are properly installed, and warn the user if it's not +my ( $samtools ) = map{chomp; $_}`which samtools`; +( $samtools and -e $samtools ) or die "ERROR: Please install samtools, and make sure it's in your PATH\n"; +my ( $tabix ) = map{chomp; $_}`which tabix`; +( $tabix and -e $tabix ) or die "ERROR: Please install tabix, and make sure it's in your PATH\n"; + +# Hash to convert 3-letter amino-acid codes to their 1-letter codes +my %aa3to1 = qw( Ala A Arg R Asn N Asp D Asx B Cys C Glu E Gln Q Glx Z Gly G His H Ile I Leu L + Lys K Met M Phe F Pro P Ser S Thr T Trp W Tyr Y Val V Xxx X Ter * ); + +# Prioritize Sequence Ontology terms in order of severity, as estimated by Ensembl: +# http://useast.ensembl.org/info/genome/variation/predicted_data.html#consequences +sub GetEffectPriority { + my ( $effect ) = @_; + $effect = '' unless( defined $effect ); + my %effectPriority = ( + 'transcript_ablation' => 1, # A feature ablation whereby the deleted region includes a transcript feature + 'exon_loss_variant' => 1, # A sequence variant whereby an exon is lost from the transcript + 'splice_donor_variant' => 2, # A splice variant that changes the 2 base region at the 5' end of an intron + 'splice_acceptor_variant' => 2, # A splice variant that changes the 2 base region at the 3' end of an intron + 'stop_gained' => 3, # A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript + 'frameshift_variant' => 3, # A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three + 'stop_lost' => 3, # A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript + 'start_lost' => 4, # A codon variant that changes at least one base of the canonical start codon + 'initiator_codon_variant' => 4, # A codon variant that changes at least one base of the first codon of a transcript + 'disruptive_inframe_insertion' => 5, # An inframe increase in cds length that inserts one or more codons into the coding sequence within an existing codon + 'disruptive_inframe_deletion' => 5, # An inframe decrease in cds length that deletes bases from the coding sequence starting within an existing codon + 'inframe_insertion' => 5, # An inframe non synonymous variant that inserts bases into the coding sequence + 'inframe_deletion' => 5, # An inframe non synonymous variant that deletes bases from the coding sequence + 'protein_altering_variant' => 5, # A sequence variant which is predicted to change the protein encoded in the coding sequence + 'missense_variant' => 6, # A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved + 'conservative_missense_variant' => 6, # A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious + 'rare_amino_acid_variant' => 6, # A sequence variant whereby at least one base of a codon encoding a rare amino acid is changed, resulting in a different encoded amino acid + 'transcript_amplification' => 7, # A feature amplification of a region containing a transcript + 'splice_region_variant' => 8, # A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron + 'stop_retained_variant' => 9, # A sequence variant where at least one base in the terminator codon is changed, but the terminator remains + 'synonymous_variant' => 9, # A sequence variant where there is no resulting change to the encoded amino acid + 'incomplete_terminal_codon_variant' => 10, # A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed + 'coding_sequence_variant' => 11, # A sequence variant that changes the coding sequence + 'mature_miRNA_variant' => 11, # A transcript variant located with the sequence of the mature miRNA + 'exon_variant' => 11, # A sequence variant that changes exon sequence + '5_prime_UTR_variant' => 12, # A UTR variant of the 5' UTR + '5_prime_UTR_premature_start_codon_gain_variant' => 12, # snpEff-specific effect, creating a start codon in 5' UTR + '3_prime_UTR_variant' => 12, # A UTR variant of the 3' UTR + 'non_coding_exon_variant' => 13, # A sequence variant that changes non-coding exon sequence + 'non_coding_transcript_exon_variant' => 13, # snpEff-specific synonym for non_coding_exon_variant + 'non_coding_transcript_variant' => 14, # A transcript variant of a non coding RNA gene + 'nc_transcript_variant' => 14, # A transcript variant of a non coding RNA gene (older alias for non_coding_transcript_variant) + 'intron_variant' => 14, # A transcript variant occurring within an intron + 'intragenic_variant' => 14, # A variant that occurs within a gene but falls outside of all transcript features. This occurs when alternate transcripts of a gene do not share overlapping sequence + 'INTRAGENIC' => 14, # snpEff-specific synonym of intragenic_variant + 'NMD_transcript_variant' => 15, # A variant in a transcript that is the target of NMD + 'upstream_gene_variant' => 16, # A sequence variant located 5' of a gene + 'downstream_gene_variant' => 16, # A sequence variant located 3' of a gene + 'TFBS_ablation' => 17, # A feature ablation whereby the deleted region includes a transcription factor binding site + 'TFBS_amplification' => 17, # A feature amplification of a region containing a transcription factor binding site + 'TF_binding_site_variant' => 17, # A sequence variant located within a transcription factor binding site + 'regulatory_region_ablation' => 17, # A feature ablation whereby the deleted region includes a regulatory region + 'regulatory_region_amplification' => 17, # A feature amplification of a region containing a regulatory region + 'regulatory_region_variant' => 17, # A sequence variant located within a regulatory region + 'regulatory_region' =>17, # snpEff-specific effect that should really be regulatory_region_variant + 'feature_elongation' => 18, # A sequence variant that causes the extension of a genomic feature, with regard to the reference sequence + 'feature_truncation' => 18, # A sequence variant that causes the reduction of a genomic feature, with regard to the reference sequence + 'intergenic_variant' => 19, # A sequence variant located in the intergenic region, between genes + 'intergenic_region' => 19, # snpEff-specific effect that should really be intergenic_variant + '' => 20 + ); + unless( defined $effectPriority{$effect} ) { + warn "WARNING: Unrecognized effect \"$effect\". Assigning lowest priority!\n"; + return 20; + } + return $effectPriority{$effect}; +} + +# Prioritize the transcript biotypes that variants are annotated to, based on disease significance: +# All possible biotypes are defined here: http://www.gencodegenes.org/gencode_biotypes.html +sub GetBiotypePriority { + my ( $biotype ) = @_; + $biotype = '' unless( defined $biotype ); + my %biotype_priority = ( + 'protein_coding' => 1, # Contains an open reading frame (ORF) + 'LRG_gene' => 2, # Gene in a "Locus Reference Genomic" region known to have disease-related sequence variations + 'IG_C_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT + 'IG_D_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT + 'IG_J_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT + 'IG_LV_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT + 'IG_V_gene' => 2, # Immunoglobulin (Ig) variable chain genes imported or annotated according to the IMGT + 'TR_C_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT + 'TR_D_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT + 'TR_J_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT + 'TR_V_gene' => 2, # T-cell receptor (TcR) genes imported or annotated according to the IMGT + 'miRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'snRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'snoRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'ribozyme' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'tRNA' => 3, #Added by Y. Boursin + 'sRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'scaRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'rRNA' => 3, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'lincRNA' => 3, # Long, intervening noncoding (linc) RNAs, that can be found in evolutionarily conserved, intergenic regions + 'bidirectional_promoter_lncrna' => 3, # A non-coding locus that originates from within the promoter region of a protein-coding gene, with transcription proceeding in the opposite direction on the other strand + 'bidirectional_promoter_lncRNA' => 3, # A non-coding locus that originates from within the promoter region of a protein-coding gene, with transcription proceeding in the opposite direction on the other strand + 'known_ncrna' => 4, + 'vaultRNA' => 4, # Short non coding RNA genes that form part of the vault ribonucleoprotein complex + 'macro_lncRNA' => 4, # unspliced lncRNAs that are several kb in size + 'Mt_tRNA' => 4, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'Mt_rRNA' => 4, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'antisense' => 5, # Has transcripts that overlap the genomic span (i.e. exon or introns) of a protein-coding locus on the opposite strand + 'antisense_RNA' => 5, # Alias for antisense (Y. Boursin) + 'sense_intronic' => 5, # Long non-coding transcript in introns of a coding gene that does not overlap any exons + 'sense_overlapping' => 5, # Long non-coding transcript that contains a coding gene in its intron on the same strand + '3prime_overlapping_ncrna' => 5, # Transcripts where ditag and/or published experimental data strongly supports the existence of short non-coding transcripts transcribed from the 3'UTR + '3prime_overlapping_ncRNA' => 5, # Transcripts where ditag and/or published experimental data strongly supports the existence of short non-coding transcripts transcribed from the 3'UTR + 'misc_RNA' => 5, # Non-coding RNA predicted using sequences from RFAM and miRBase + 'non_coding' => 5, # Transcript which is known from the literature to not be protein coding + 'regulatory_region' => 6, # A region of sequence that is involved in the control of a biological process + 'disrupted_domain' => 6, # Otherwise viable coding region omitted from this alternatively spliced transcript because the splice variation affects a region coding for a protein domain + 'processed_transcript' => 6, # Doesn't contain an ORF + 'TEC' => 6, # To be Experimentally Confirmed. This is used for non-spliced EST clusters that have polyA features. This category has been specifically created for the ENCODE project to highlight regions that could indicate the presence of protein coding genes that require experimental validation, either by 5' RACE or RT-PCR to extend the transcripts, or by confirming expression of the putatively-encoded peptide with specific antibodies + 'TF_binding_site' => 7, # A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex + 'CTCF_binding_site' =>7, # A transcription factor binding site with consensus sequence CCGCGNGGNGGCAG, bound by CCCTF-binding factor + 'promoter_flanking_region' => 7, # A region immediately adjacent to a promoter which may or may not contain transcription factor binding sites + 'enhancer' => 7, # A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter + 'promoter' => 7, # A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery + 'open_chromatin_region' => 7, # A DNA sequence that in the normal state of the chromosome corresponds to an unfolded, un-complexed stretch of double-stranded DNA + 'retained_intron' => 7, # Alternatively spliced transcript believed to contain intronic sequence relative to other, coding, variants + 'nonsense_mediated_decay' => 7, # If the coding sequence (following the appropriate reference) of a transcript finishes >50bp from a downstream splice site then it is tagged as NMD. If the variant does not cover the full reference coding sequence then it is annotated as NMD if NMD is unavoidable i.e. no matter what the exon structure of the missing portion is the transcript will be subject to NMD + 'non_stop_decay' => 7, # Transcripts that have polyA features (including signal) without a prior stop codon in the CDS, i.e. a non-genomic polyA tail attached directly to the CDS without 3' UTR. These transcripts are subject to degradation + 'ambiguous_orf' => 7, # Transcript believed to be protein coding, but with more than one possible open reading frame + 'pseudogene' => 8, # Have homology to proteins but generally suffer from a disrupted coding sequence and an active homologous gene can be found at another locus. Sometimes these entries have an intact coding sequence or an open but truncated ORF, in which case there is other evidence used (for example genomic polyA stretches at the 3' end) to classify them as a pseudogene. Can be further classified as one of the following + 'processed_pseudogene' => 8, # Pseudogene that lack introns and is thought to arise from reverse transcription of mRNA followed by reinsertion of DNA into the genome + 'polymorphic_pseudogene' => 8, # Pseudogene owing to a SNP/DIP but in other individuals/haplotypes/strains the gene is translated + 'retrotransposed' => 8, # Pseudogene owing to a reverse transcribed and re-inserted sequence + 'translated_processed_pseudogene' => 8, # Pseudogenes that have mass spec data suggesting that they are also translated + 'translated_unprocessed_pseudogene' => 8, # Pseudogenes that have mass spec data suggesting that they are also translated + 'transcribed_processed_pseudogene' => 8, # Pseudogene where protein homology or genomic structure indicates a pseudogene, but the presence of locus-specific transcripts indicates expression + 'transcribed_unprocessed_pseudogene' => 8, # Pseudogene where protein homology or genomic structure indicates a pseudogene, but the presence of locus-specific transcripts indicates expression + 'transcribed_unitary_pseudogene' => 8, #Pseudogene where protein homology or genomic structure indicates a pseudogene, but the presence of locus-specific transcripts indicates expression + 'unitary_pseudogene' => 8, # A species specific unprocessed pseudogene without a parent gene, as it has an active orthologue in another species + 'unprocessed_pseudogene' => 8, # Pseudogene that can contain introns since produced by gene duplication + 'Mt_tRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'tRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'snoRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'snRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'scRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'rRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'misc_RNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'miRNA_pseudogene' => 8, # Non-coding RNAs predicted to be pseudogenes by the Ensembl pipeline + 'IG_C_pseudogene' => 8, # Inactivated immunoglobulin gene + 'IG_D_pseudogene' => 8, # Inactivated immunoglobulin gene + 'IG_J_pseudogene' => 8, # Inactivated immunoglobulin gene + 'IG_V_pseudogene' => 8, # Inactivated immunoglobulin gene + 'TR_J_pseudogene' => 8, # Inactivated immunoglobulin gene + 'TR_V_pseudogene' => 8, # Inactivated immunoglobulin gene + 'artifact' => 9, # Used to tag mistakes in the public databases (Ensembl/SwissProt/Trembl) + '' => 10 + ); + unless( defined $biotype_priority{$biotype} ) { + warn "WARNING: Unrecognized biotype \"$biotype\". Assigning lowest priority!\n"; + return 10; + } + return $biotype_priority{$biotype}; +} + +# Check for missing or crappy arguments +unless( @ARGV and $ARGV[0] =~ m/^-/ ) { + pod2usage( -verbose => 0, -message => "$0: Missing or invalid arguments!\n", -exitval => 2 ); +} + +# Parse options and print usage if there is a syntax error, or if usage was explicitly requested +my ( $man, $help ) = ( 0, 0 ); +my ( $input_vcf, $output_maf, $tmp_dir, $custom_enst_file ); +my ( $vcf_tumor_id, $vcf_normal_id, $remap_chain ); +GetOptions( + 'help!' => \$help, + 'man!' => \$man, + 'input-vcf=s' => \$input_vcf, + 'output-maf=s' => \$output_maf, + 'tmp-dir=s' => \$tmp_dir, + 'tumor-id=s' => \$tumor_id, + 'normal-id=s' => \$normal_id, + 'vcf-tumor-id=s' => \$vcf_tumor_id, + 'vcf-normal-id=s' => \$vcf_normal_id, + 'custom-enst=s' => \$custom_enst_file, + 'vep-path=s' => \$vep_path, + 'vep-data=s' => \$vep_data, + 'vep-forks=s' => \$vep_forks, + 'buffer-size=i' => \$buffer_size, + 'any-allele!' => \$any_allele, + 'ref-fasta=s' => \$ref_fasta, + 'species=s' => \$species, + 'ncbi-build=s' => \$ncbi_build, + 'cache-version=s' => \$cache_version, + 'maf-center=s' => \$maf_center, + 'retain-info=s' => \$retain_info, + 'min-hom-vaf=s' => \$min_hom_vaf, + 'remap-chain=s' => \$remap_chain, + 'filter-vcf=s' => \$filter_vcf, + 'max-filter-ac=i' => \$max_filter_ac +) or pod2usage( -verbose => 1, -input => \*DATA, -exitval => 2 ); +pod2usage( -verbose => 1, -input => \*DATA, -exitval => 0 ) if( $help ); +pod2usage( -verbose => 2, -input => \*DATA, -exitval => 0 ) if( $man ); + +# Check if required arguments are missing or problematic +( defined $input_vcf and defined $output_maf ) or die "ERROR: Both input-vcf and output-maf must be defined!\n"; +( -s $input_vcf ) or die "ERROR: Provided --input-vcf is missing or empty: $input_vcf\n"; +( -s $ref_fasta ) or die "ERROR: Provided --ref-fasta is missing or empty: $ref_fasta\n"; +( $input_vcf !~ m/\.(gz|bz2|bcf)$/ ) or die "ERROR: Unfortunately, --input-vcf cannot be in a compressed format\n"; + +# Unless specified, assume that the VCF uses the same sample IDs that the MAF will contain +$vcf_tumor_id = $tumor_id unless( $vcf_tumor_id ); +$vcf_normal_id = $normal_id unless( $vcf_normal_id ); + +# Load up the custom isoform overrides if provided: +my %custom_enst; +if( $custom_enst_file ) { + ( -s $custom_enst_file ) or die "ERROR: Provided --custom-enst file is missing or empty: $custom_enst_file\n"; + %custom_enst = map{chomp; ( $_, 1 )}`grep -v ^# $custom_enst_file | cut -f1`; +} + +# Create a folder for the intermediate VCFs if user-defined, or default to the input VCF's folder +if( defined $tmp_dir ) { + mkpath( $tmp_dir ) unless( -d $tmp_dir ); +} +else { + $tmp_dir = substr( $input_vcf, 0, rindex( $input_vcf, "/" )) if( $input_vcf =~ m/\// ); + $tmp_dir = "." unless( $tmp_dir ); # In case the input VCF is in the current working directory +} + +# Also figure out the base name of the input VCF, cuz we'll be naming a lot of files based on that +my $input_name = substr( $input_vcf, rindex( $input_vcf, "/" ) + 1 ); +$input_name =~ s/(\.vcf)*$//; + +# If the VCF contains SVs, split the breakpoints into separate lines before passing to VEP +my $split_svs = 0; +my $orig_vcf_fh = IO::File->new( $input_vcf ) or die "ERROR: Couldn't open --input-vcf: $input_vcf!\n"; +my $split_vcf_fh = IO::File->new( "$tmp_dir/$input_name.split.vcf", "w" ) or die "ERROR: Couldn't open VCF: $tmp_dir/$input_name.split.vcf!\n"; +while( my $line = $orig_vcf_fh->getline ) { + # If the file uses Mac OS 9 newlines, quit with an error + ( $line !~ m/\r$/ ) or die "ERROR: Your VCF uses CR line breaks, which we can't support. Please use LF or CRLF.\n"; + + if( $line =~ m/^#/ ) { + $split_vcf_fh->print( $line ); # Write header lines unchanged + next; + } + + chomp( $line ); + my @cols = split( "\t", $line ); + my %info = map {( m/=/ ? ( split( /=/, $_, 2 )) : ( $_, "1" ))} split( /\;/, $cols[7] ); + if( $info{SVTYPE} ){ + # Remove SVTYPE tag if REF/ALT alleles are defined, or VEP won't report transcript effects + if( $cols[3]=~m/^[ACGTN]+$/i and $cols[4]=~m/^[ACGTN,]+$/i ) { + $cols[7]=~s/(SVTYPE=\w+;|;SVTYPE=\w+|SVTYPE=\w+)//; + $split_vcf_fh->print( join( "\t", @cols ), "\n" ); + } + # For legit SVs except insertions, split them into two separate breakpoint events + elsif( $info{SVTYPE}=~m/^(BND|TRA|DEL|DUP|INV)$/ ) { + $split_svs = 1; + # Don't tell VEP it's an SV, by removing the SVTYPE tag + $cols[7]=~s/(SVTYPE=\w+;|;SVTYPE=\w+|SVTYPE=\w+)//; + # Rename two SV specific INFO keys to something friendlier + $cols[7]=~s/CT=([35]to[35])/Frame=$1/; + $cols[7]=~s/SVMETHOD=([\w.]+)/Method=$1/; + $cols[4] = "<" . $info{SVTYPE} . ">"; + # Fetch the REF allele at the second breakpoint using samtools faidx + my $ref2 = `$samtools faidx $ref_fasta $info{CHR2}:$info{END}-$info{END} | grep -v ^\\>`; + chomp( $ref2 ); + $split_vcf_fh->print( join( "\t", $info{CHR2}, $info{END}, $cols[2], ( $ref2 ? $ref2 : $cols[3] ), @cols[4..$#cols] ), "\n" ); + $split_vcf_fh->print( join( "\t", @cols ), "\n" ); + } + $input_vcf = "$tmp_dir/$input_name.split.vcf"; + } + else { + $split_vcf_fh->print( join( "\t", @cols ), "\n" ); + } +} +$split_vcf_fh->close; +$orig_vcf_fh->close; + +# Delete the split.vcf created above if we didn't find any variants with the SVTYPE tag +unlink( "$tmp_dir/$input_name.split.vcf" ) if( $input_vcf ne "$tmp_dir/$input_name.split.vcf" ); + +# If a liftOver chain was provided, remap and switch the input VCF before annotation +my ( %remap ); +if( $remap_chain ) { + # Find out if liftOver is properly installed, and warn the user if it's not + my $liftover = `which liftOver`; + chomp( $liftover ); + ( $liftover and -e $liftover ) or die "ERROR: Please install liftOver, and make sure it's in your PATH\n"; + + # Make a BED file from the VCF, run liftOver on it, and create a hash mapping old to new loci + `grep -v ^# $input_vcf | cut -f1,2 | awk '{OFS="\\t"; print \$1,\$2-1,\$2,\$1":"\$2}' > $tmp_dir/$input_name.bed`; + %remap = map{chomp; my @c=split("\t"); ($c[3], "$c[0]:$c[2]")}`$liftover $tmp_dir/$input_name.bed $remap_chain /dev/stdout /dev/null 2> /dev/null`; + unlink( "$tmp_dir/$input_name.bed" ); + + # Create a new VCF in the temp folder, with remapped loci on which we'll run annotation + my $orig_vcf_fh = IO::File->new( $input_vcf ) or die "ERROR: Couldn't open --input-vcf: $input_vcf!\n"; + my $remap_vcf_fh = IO::File->new( "$tmp_dir/$input_name.remap.vcf", "w" ) or die "ERROR: Couldn't open VCF: $tmp_dir/$input_name.remap.vcf!\n"; + while( my $line = $orig_vcf_fh->getline ) { + if( $line =~ m/^#/ ) { + $remap_vcf_fh->print( $line ); # Write header lines unchanged + } + else { + chomp( $line ); + my @cols = split( "\t", $line ); + my $locus = $cols[0] . ":" . $cols[1]; + if( defined $remap{$locus} ) { + # Retain original variant under INFO, so we can append it later to the output MAF + $cols[7] = ( !$cols[7] or $cols[7] eq "." ? "" : "$cols[7];" ) . "REMAPPED_POS=" . join( ":", @cols[0,1,3,4] ); + @cols[0,1] = split( ":", $remap{$locus} ); + $remap_vcf_fh->print( join( "\t", @cols ), "\n" ); + } + else { + warn "WARNING: Skipping variant at $locus; Unable to liftOver using $remap_chain\n"; + } + } + } + $remap_vcf_fh->close; + $orig_vcf_fh->close; + $input_vcf = "$tmp_dir/$input_name.remap.vcf"; +} + +# Before running annotation, let's pull flanking reference bps for each variant to do some checks, +# and we'll also pull out overlapping calls from the filter VCF +my $vcf_fh = IO::File->new( $input_vcf ) or die "ERROR: Couldn't open --input-vcf: $input_vcf!\n"; +my ( %ref_bps, @ref_regions, %uniq_loci, %uniq_regions, %flanking_bps, %filter_data ); +while( my $line = $vcf_fh->getline ) { + # Skip header lines, and pull variant loci to pass to samtools later + next if( $line =~ m/^#/ ); + chomp( $line ); + my ( $chr, $pos, undef, $ref ) = split( "\t", $line ); + # Create a region that spans the length of the reference allele and 1bp flanks around it + my $region = "$chr:" . ( $pos - 1 ) . "-" . ( $pos + length( $ref )); + $ref_bps{$region} = $ref; + push( @ref_regions, $region ); + $uniq_regions{$region} = 1; + $uniq_loci{"$chr:$pos-$pos"} = 1; +} +$vcf_fh->close; + +# samtools runs faster when passed many loci at a time, but limited to around 125k args, at least +# on CentOS 6. If there are too many loci, split them into 50k chunks and run separately +my ( $lines, @regions_split ) = ( "", ()); +my @regions = keys %uniq_regions; +my $chr_prefix_in_use = ( @regions and $regions[0] =~ m/^chr/ ? 1 : 0 ); +push( @regions_split, [ splice( @regions, 0, 50000 ) ] ) while @regions; +map{ my $region = join( " ", @{$_} ); $lines .= `$samtools faidx $ref_fasta $region` } @regions_split; +foreach my $line ( grep( length, split( ">", $lines ))) { + # Carefully split this FASTA entry, properly chomping newlines for long indels + my ( $region, $bps ) = split( "\n", $line, 2 ); + $bps =~ s/\r|\n//g; + if( $bps ){ + $bps = uc( $bps ); + $flanking_bps{$region} = $bps; + } +} + +# If flanking_bps is entirely empty, then it's most likely that the user chose the wrong ref-fasta +# Or it's also possible that an outdated samtools was unable to parse the gzipped FASTA files +# ::NOTE:: If input had no variants, don't break here, so we can continue to create an empty MAF +( !@regions or %flanking_bps ) or die "ERROR: You're either using an outdated samtools, or --ref-fasta is not the same genome build as your --input-vcf."; + +# Skip filtering if not handling GRCh37, and filter-vcf is pointing to the default GRCh37 ExAC VCF +if(( $species eq "homo_sapiens" and $ncbi_build eq "GRCh37" and $filter_vcf ) or ( $filter_vcf and $filter_vcf ne "$ENV{HOME}/.vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz" )) { + ( -s $filter_vcf ) or die "ERROR: Provided --filter-vcf is missing or empty: $filter_vcf\n"; + # Query each variant locus on the filter VCF, using tabix, just like we used samtools earlier + ( $lines, @regions_split ) = ( "", ()); + my @regions = keys %uniq_loci; + push( @regions_split, [ splice( @regions, 0, 50000 ) ] ) while @regions; + # ::NOTE:: chr-prefix removal works safely here because ExAC is limited to 1..22, X, Y + map{ my $loci = join( " ", map{s/^chr//; $_} @{$_} ); $lines .= `$tabix $filter_vcf $loci` } @regions_split; + foreach my $line ( split( "\n", $lines )) { + my ( $chr, $pos, undef, $ref, $alt, undef, $filter, $info_line ) = split( "\t", $line ); + # Parse out data from info column, and store it for later, along with REF, ALT, and FILTER + my $locus = ( $chr_prefix_in_use ? "chr$chr:$pos" : "$chr:$pos" ); + %{$filter_data{$locus}} = map {( m/=/ ? ( split( /=/, $_, 2 )) : ( $_, "1" ))} split( /\;/, $info_line ); + $filter_data{$locus}{REF} = $ref; + $filter_data{$locus}{ALT} = $alt; + $filter_data{$locus}{FILTER} = $filter; + } +} + +# For each variant locus and reference allele in the input VCF, report any problems +foreach my $region ( @ref_regions ) { + my $ref = $ref_bps{$region}; + my ( $locus ) = map{ my ( $chr, $pos ) = split( ":" ); ++$pos; "$chr:$pos" } split( "-", $region ); + if( !defined $flanking_bps{$region} ) { + warn "WARNING: Couldn't retrieve bps around $locus from reference FASTA: $ref_fasta\n"; + } + elsif( $flanking_bps{$region} !~ m/^[ACGTN]+$/ ) { + warn "WARNING: Retrieved invalid bps " . $flanking_bps{$region} . " around $locus from reference FASTA: $ref_fasta\n"; + } + elsif( $ref ne substr( $flanking_bps{$region}, 1, length( $ref ))) { + warn "WARNING: Reference allele $ref at $locus doesn't match " . + substr( $flanking_bps{$region}, 1, length( $ref )) . " (flanking bps: " . + $flanking_bps{$region} . ") from reference FASTA: $ref_fasta\n"; + } +} + +# Annotate variants in given VCF to all possible transcripts +my $output_vcf = ( $remap_chain ? "$tmp_dir/$input_name.remap.vep.vcf" : "$tmp_dir/$input_name.vep.vcf" ); +# Skip running VEP if an annotated VCF already exists +unless( -s $output_vcf ) { + warn "STATUS: Running VEP and writing to: $output_vcf\n"; + # Make sure we can find the VEP script + my $vep_script = ( -s "$vep_path/vep" ? "$vep_path/vep" : "$vep_path/variant_effect_predictor.pl" ); + ( -s $vep_script ) or die "ERROR: Cannot find VEP script in path: $vep_path\n"; + + # Contruct VEP command using some default options and run it + my $vep_cmd = "$perl_bin $vep_script --species $species --assembly $ncbi_build --offline --no_progress --no_stats --buffer_size $buffer_size --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --vcf --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --dir $vep_data --fasta $ref_fasta --format vcf --input_file $input_vcf --output_file $output_vcf"; + # VEP barks if --fork is set to 1. So don't use this argument unless it's >1 + $vep_cmd .= " --fork $vep_forks" if( $vep_forks > 1 ); + # Require allele match for co-located variants unless user-rejected or we're using a newer VEP + $vep_cmd .= " --check_allele" unless( $any_allele or $vep_script =~ m/vep$/ ); + # Add --cache-version only if the user specifically asked for a version + $vep_cmd .= " --cache_version $cache_version" if( $cache_version ); + # Add options that only work on human variants + if( $species eq "homo_sapiens" ) { + # Slight change in these arguments if using the newer VEP + $vep_cmd .= " --polyphen b " . ( $vep_script =~ m/vep$/ ? "--af --af_1kg --af_esp --af_gnomad" : "--gmaf --maf_1kg --maf_esp" ); + } + # Add options that work for most species, except a few we know about + $vep_cmd .= " --regulatory" unless( $species eq "canis_familiaris" ); + + # Make sure it ran without error codes + system( $vep_cmd ) == 0 or die "\nERROR: Failed to run the VEP annotator! Command: $vep_cmd\n"; + ( -s $output_vcf ) or warn "WARNING: VEP-annotated VCF file is missing or empty: $output_vcf\n"; +} + +# Define default MAF Header (https://wiki.nci.nih.gov/x/eJaPAQ) with our vcf2maf additions +my @maf_header = qw( + Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand + Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 + dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode + Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 + Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status + Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score + BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID + Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects +); + +# Add extra annotation columns to the MAF in a consistent order +my @ann_cols = qw( Allele Gene Feature Feature_type Consequence cDNA_position CDS_position + Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL + SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen + EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC + PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL + HGVS_OFFSET PHENO MINIMISED ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE + ExAC_AF_OTH ExAC_AF_SAS GENE_PHENO FILTER flanking_bps variant_id variant_qual ExAC_AF_Adj + ExAC_AC_AN_Adj ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_EAS ExAC_AC_AN_FIN + ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_FILTER gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF + gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF ); + +my @ann_cols_format; # To store the actual order of VEP data, that may differ between runs +push( @maf_header, @ann_cols ); + +# If the user has INFO fields they want to retain, create additional columns for those +my @addl_info_cols = (); +if( $retain_info or $remap_chain or $split_svs ) { + # But let's not overwrite existing columns with the same name + my %maf_cols = map{ my $c = lc; ( $c, 1 )} @maf_header; + @addl_info_cols = grep{ my $c = lc; !$maf_cols{$c}} split( ",", $retain_info ); + # If a remap-chain was used, add a column to retain the original chr:pos:ref:alt + push( @addl_info_cols, "REMAPPED_POS" ) if( $remap_chain ); + # If we had to split some SVs earlier, add some columns with some useful info about SVs + push( @addl_info_cols, qw( Fusion Method Frame CONSENSUS )) if( $split_svs ); + push( @maf_header, @addl_info_cols ); +} + +# Locate and load the file mapping ENSG IDs to Entrez IDs +my ( $script_dir ) = $0 =~ m/^(.*)\/vcf2maf/; +$script_dir = "." unless( $script_dir ); + +my $entrez_id_file = "$script_dir/data/ensg_to_entrez_id_map_ensembl_feb2014.tsv"; +my %entrez_id_map = (); +if( -s $entrez_id_file ) { + %entrez_id_map = map{chomp; split("\t")} `grep -hv ^# $entrez_id_file`; +} + +# Parse through each variant in the annotated VCF, pull out CSQ/ANN from the INFO column, and choose +# one transcript per variant whose annotation will be used in the MAF +my $maf_fh = IO::File->new( $output_maf, ">" ) or die "ERROR: Couldn't open --output-maf: $output_maf!\n"; +$maf_fh->print( "#version 2.4\n" . join( "\t", @maf_header ), "\n" ); # Print MAF header +( -s $output_vcf ) or exit; # Warnings on this were printed earlier, but quit here, only after a blank MAF is created +my $annotated_vcf_fh = IO::File->new( $output_vcf ) or die "ERROR: Couldn't open annotated VCF: $output_vcf!\n"; +my ( $vcf_tumor_idx, $vcf_normal_idx, %sv_pair ); +while( my $line = $annotated_vcf_fh->getline ) { + + # Parse out the VEP CSQ/ANN format, which seems to differ between runs + if( $line =~ m/^##INFO=<ID=(CSQ|ANN).*Format: (\S+)">$/ ) { + # Use this as the expected column order of VEP annotation, unless we already got it from CSQ + @ann_cols_format = split( /\|/, $2 ) unless( @ann_cols_format and $1 eq "ANN" ); + } + # Skip all other header lines + next if( $line =~ m/^##/ ); + + chomp( $line ); + my ( $chrom, $pos, $var_id, $ref, $alt, $var_qual, $filter, $info_line, $format_line, @rest ) = split( "\t", $line ); + + # Set ID, QUAL, and FILTER to "." unless defined and non-empty + $var_id = "." unless( defined $var_id and $var_id ne "" ); + $var_qual = "." unless( defined $var_qual and $var_qual ne "" ); + $filter = "." unless( defined $filter and $filter ne "" ); + + # If FORMATted genotype fields are available, find the sample with the variant, and matched normal + if( $line =~ m/^#CHROM/ ) { + if( $format_line and scalar( @rest ) > 0 ) { + for( my $i = 0; $i <= $#rest; ++$i ) { + $vcf_tumor_idx = $i if( $rest[$i] eq $vcf_tumor_id ); + $vcf_normal_idx = $i if( $rest[$i] eq $vcf_normal_id ); + } + ( defined $vcf_tumor_idx ) or warn "WARNING: No genotype column for $vcf_tumor_id in VCF!\n"; + ( defined $vcf_normal_idx ) or warn "WARNING: No genotype column for $vcf_normal_id in VCF!\n"; + } + next; + } + + # Parse out the data in the info column, and store into a hash + my %info = map {( m/=/ ? ( split( /=/, $_, 2 )) : ( $_, "1" ))} split( /\;/, $info_line ); + + # By default, the variant allele is the first (usually the only) allele listed under ALT. If + # there are >1 alleles in ALT, choose the first non-REF allele listed under tumor GT, that is + # also not seen under normal GT. If tumor GT is undefined or ambiguous, choose the tumor allele + # with the most supporting read depth, if available. + my @alleles = ( $ref, split( /,/, $alt )); + my $var_allele_idx = 1; + + # Parse out info from the normal genotype field + my ( %nrm_info, @nrm_depths ); + if( defined $vcf_normal_idx ) { + my @format_keys = split( /\:/, $format_line ); + my $idx = 0; + %nrm_info = map {( $format_keys[$idx++], $_ )} split( /\:/, $rest[$vcf_normal_idx] ); + } + + # Parse out info from the tumor genotype field + my ( %tum_info, @tum_depths ); + if( defined $vcf_tumor_idx ) { + my @format_keys = split( /\:/, $format_line ); + my $idx = 0; + %tum_info = map {( $format_keys[$idx++], $_ )} split( /\:/, $rest[$vcf_tumor_idx] ); + + # If possible, parse the tumor genotype to identify the variant allele + if( defined $tum_info{GT} and $tum_info{GT} ne "." and $tum_info{GT} ne "./." ) { + my @tum_gt = split( /[\/|]/, $tum_info{GT} ); + # Default to the first non-REF allele seen in tumor GT + ( $var_allele_idx ) = grep {$_ ne "0"} @tum_gt; + # If possible, choose the first non-REF tumor allele that is also not in normal GT + if( defined $nrm_info{GT} and $nrm_info{GT} ne "." and $nrm_info{GT} ne "./." ) { + my %nrm_gt = map {( $_, 1 )} split( /[\/|]/, $nrm_info{GT} ); + ( $var_allele_idx ) = grep {$_ ne "0" and !$nrm_gt{$_}} @tum_gt; + } + # If GT was unhelpful, default to the first ALT allele and set GT to undefined + if( !defined $var_allele_idx or $var_allele_idx !~ m/^\d+$/ or $var_allele_idx >= scalar( @alleles )) { + $var_allele_idx = 1; + $tum_info{GT} = "./."; + } + } + + # Standardize tumor AD and DP based on data in the genotype fields + FixAlleleDepths( \@alleles, $var_allele_idx, \%tum_info ); + @tum_depths = split( ",", $tum_info{AD} ); + + # If genotype is undefined, use the allele depths collected to choose the major variant allele + unless( defined $tum_info{GT} and $tum_info{GT} ne '.' and $tum_info{GT} ne "./." ) { + # The first depth listed belongs to the reference allele. Of the rest, find the largest + for( my $i = 1; $i <= $#tum_depths; ++$i ) { + $var_allele_idx = $i if( $tum_depths[$i] and $tum_depths[$i] > $tum_depths[$var_allele_idx] ); + } + $tum_info{GT} = "./."; + if( defined $tum_info{DP} and $tum_info{DP} ne '.' and $tum_info{DP} != 0 and defined $tum_depths[$var_allele_idx] ) { + my $vaf = $tum_depths[$var_allele_idx] / $tum_info{DP}; + $tum_info{GT} = ( $vaf < $min_hom_vaf ? "0/1" : "1/1" ); + } + } + } + + # Set the variant allele to whatever we selected above + my $var = $alleles[$var_allele_idx]; + + # Standardize normal AD and DP based on data in the genotype fields + if( defined $vcf_normal_idx ) { + FixAlleleDepths( \@alleles, $var_allele_idx, \%nrm_info ); + @nrm_depths = split( ",", $nrm_info{AD} ); + $nrm_info{GT} = "./." unless( defined $nrm_info{GT} and $nrm_info{GT} ne '.' ); + } + + # Figure out the appropriate start/stop loci and variant type/allele to report in the MAF + my $start = my $stop = my $var_type = my $inframe = ""; + my ( $ref_length, $var_length ) = ( length( $ref ), length( $var )); + # Backup the VCF-style position and REF/ALT alleles, so we can use it later + my ( $vcf_pos, $vcf_ref, $vcf_var ) = ( $pos, $ref, $var ); + # Remove any prefixed reference bps from all alleles, using "-" for simple indels + while( $ref and $var and substr( $ref, 0, 1 ) eq substr( $var, 0, 1 ) and $ref ne $var ) { + ( $ref, $var, @alleles ) = map{$_ = substr( $_, 1 ); ( $_ ? $_ : "-" )} ( $ref, $var, @alleles ); + --$ref_length; --$var_length; ++$pos; + } + # Handle SNPs, DNPs, TNPs, or anything larger (ONP) + if( $ref_length == $var_length ) { + ( $start, $stop ) = ( $pos, $pos + $var_length - 1 ); + my %np_type = qw( 1 SNP 2 DNP 3 TNP ); + $var_type = ( $var_length > 3 ? "ONP" : $np_type{$var_length} ); + } + # Handle all indels, including those complex ones which contain substitutions + elsif( $ref_length != $var_length ) { + if( $ref_length < $var_length ) { # Handle insertions, and the special case for complex ones + ( $start, $stop ) = (( $ref eq "-" ? $pos - 1 : $pos ), ( $ref eq "-" ? $pos : $pos + $ref_length - 1 )); + $var_type = "INS"; + } + else { # Handle deletions + ( $start, $stop ) = ( $pos, $pos + $ref_length - 1 ); + $var_type = "DEL"; + } + $inframe = ( abs( $ref_length - $var_length ) % 3 == 0 ? 1 : 0 ); + } + + my @all_effects; # A list of effects of this variant on all possible transcripts + my $maf_effect; # A single effect per variant to report in the standard MAF columns + my %maf_line = map{( $_, '' )} @maf_header; # Initialize MAF fields with blank strings + + # VEP provides a comma-delimited list of consequences, with pipe-delim details per consequence + # It replaces ',' in details with '&'. We'll assume that all '&'s we see, were formerly commas + # "Consequence" might list multiple effects on the same transcript e.g. missense,splice_region + if( $info{CSQ} or $info{ANN} ) { + + my $ann_lines = ( $info{CSQ} ? $info{CSQ} : $info{ANN} ); + foreach my $ann_line ( split( /,/, $ann_lines )) { + my $idx = 0; + my %effect = map{s/\&/,/g; ( $ann_cols_format[$idx++], ( defined $_ ? $_ : '' ))} split( /\|/, $ann_line ); + + # Remove transcript ID from HGVS codon/protein changes, to make it easier on the eye + $effect{HGVSc} =~ s/^.*:// if( $effect{HGVSc} ); + $effect{HGVSp} =~ s/^.*:// if( $effect{HGVSp} ); + + # Remove the prefixed HGVSc code in HGVSp, if found + $effect{HGVSp} =~ s/^.*\((p\.\S+)\)/$1/ if( $effect{HGVSp} and $effect{HGVSp} =~ m/^c\./ ); + + # Sort consequences by decreasing order of severity, and pick the most severe one + $effect{Consequence} = join( ",", sort { GetEffectPriority($a) <=> GetEffectPriority($b) } split( ",", $effect{Consequence} )); + ( $effect{One_Consequence} ) = split( ",", $effect{Consequence} ); + + # When VEP fails to provide any value in Consequence, tag it as an intergenic variant + $effect{One_Consequence} = "intergenic_variant" unless( $effect{Consequence} ); + + # Create a shorter HGVS protein format using 1-letter codes + if( $effect{HGVSp} ) { + my $hgvs_p_short = $effect{HGVSp}; + while( $hgvs_p_short and my ( $find, $replace ) = each %aa3to1 ) { + eval "\$hgvs_p_short =~ s{$find}{$replace}g"; + } + $effect{HGVSp_Short} = $hgvs_p_short; + } + + # Fix HGVSp_Short, CDS_position, and Protein_position for splice acceptor/donor variants + if( $effect{One_Consequence} =~ m/^(splice_acceptor_variant|splice_donor_variant)$/ ) { + my ( $c_pos ) = $effect{HGVSc} =~ m/^c.(\d+)/; + if( defined $c_pos ) { + $c_pos = 1 if( $c_pos < 1 ); # Handle negative cDNA positions used in 5' UTRs + my $p_pos = sprintf( "%.0f", ( $c_pos + $c_pos % 3 ) / 3 ); + $effect{HGVSp_Short} = "p.X" . $p_pos . "_splice"; + $effect{CDS_position} =~ s/^-(\/\d+)$/$c_pos$1/; + $effect{Protein_position} =~ s/^-(\/\d+)$/$p_pos$1/; + } + } + + # Fix HGVSp_Short for Silent mutations, so it mentions the amino-acid and position + if( defined $effect{HGVSp_Short} and $effect{HGVSp_Short} eq "p.=" ) { + my ( $p_pos ) = $effect{Protein_position} =~ m/^(\d+)(-\d+)?\/\d+$/; + my $aa = $effect{Amino_acids}; + $effect{HGVSp_Short} = "p.$aa" . $p_pos . "="; + } + + # Copy VEP data into MAF fields that don't share the same identifier + $effect{Transcript_ID} = $effect{Feature}; + $effect{Exon_Number} = $effect{EXON}; + $effect{Hugo_Symbol} = ( $effect{SYMBOL} ? $effect{SYMBOL} : '' ); + + # If AF columns from the older VEP are found, rename to the newer ones for consistency + my %af_col = qw( GMAF AF AFR_MAF AFR_AF AMR_MAF AMR_AF ASN_MAF ASN_AF EAS_MAF EAS_AF + EUR_MAF EUR_AF SAS_MAF SAS_AF AA_MAF AA_AF EA_MAF EA_AF ); + map { $effect{$af_col{$_}} = $effect{$_} if( defined $effect{$_} )} keys %af_col; + + # If VEP couldn't find this variant in dbSNP/COSMIC/etc., we'll say it's "novel" + if( $effect{Existing_variation} ) { + # ::NOTE:: If seen in a DB other than dbSNP, this field will remain blank + $effect{dbSNP_RS} = join( ",", grep{m/^rs\d+$/} split( /,/, $effect{Existing_variation} )); + } + else { + $effect{dbSNP_RS} = "novel"; + } + + # Transcript_Length isn't separately reported, but can be parsed out from cDNA_position + ( $effect{Transcript_Length} ) = $effect{cDNA_position} =~ m/\/(\d+)$/; + $effect{Transcript_Length} = 0 unless( defined $effect{Transcript_Length} ); + + # Skip effects on other ALT alleles. If ALLELE_NUM is undefined (e.g. for INFO:SVTYPE), don't skip any + push( @all_effects, \%effect ) unless( $effect{ALLELE_NUM} and $effect{ALLELE_NUM} != $var_allele_idx ); + } + + # Sort effects first by transcript biotype, then by severity, and then by longest transcript + @all_effects = sort { + GetBiotypePriority( $a->{BIOTYPE} ) <=> GetBiotypePriority( $b->{BIOTYPE} ) || + GetEffectPriority( $a->{One_Consequence} ) <=> GetEffectPriority( $b->{One_Consequence} ) || + $b->{Transcript_Length} <=> $a->{Transcript_Length} + } @all_effects; + + # Find the highest priority effect with a gene symbol (usually the first one) + my ( $effect_with_gene_name ) = grep { $_->{SYMBOL} } @all_effects; + my $maf_gene = $effect_with_gene_name->{SYMBOL} if( $effect_with_gene_name ); + + # If the gene has user-defined custom isoform overrides, choose that instead + ( $maf_effect ) = grep { $_->{SYMBOL} and $_->{SYMBOL} eq $maf_gene and $_->{Transcript_ID} and $custom_enst{$_->{Transcript_ID}} } @all_effects; + + # Find the effect on the canonical transcript of that highest priority gene + ( $maf_effect ) = grep { $_->{SYMBOL} and $_->{SYMBOL} eq $maf_gene and $_->{CANONICAL} and $_->{CANONICAL} eq "YES" } @all_effects unless( $maf_effect ); + + # If that gene has no canonical transcript tagged, choose the highest priority canonical effect on any gene + ( $maf_effect ) = grep { $_->{CANONICAL} and $_->{CANONICAL} eq "YES" } @all_effects unless( $maf_effect ); + + # If none of the effects are tagged as canonical, then just report the top priority effect + $maf_effect = $all_effects[0] unless( $maf_effect ); + } + + # Construct the MAF columns from the $maf_effect hash + %maf_line = map{( $_, ( $maf_effect->{$_} ? $maf_effect->{$_} : '' ))} @maf_header; + $maf_line{Hugo_Symbol} = $maf_effect->{Transcript_ID} unless( $maf_effect->{Hugo_Symbol} ); + $maf_line{Hugo_Symbol} = 'Unknown' unless( $maf_effect->{Transcript_ID} ); + $maf_line{Entrez_Gene_Id} = ( defined $entrez_id_map{$maf_effect->{Gene}} ? $entrez_id_map{$maf_effect->{Gene}} : "0" ); + $maf_line{Center} = $maf_center; + $maf_line{NCBI_Build} = $ncbi_build; + $maf_line{Chromosome} = $chrom; + $maf_line{Start_Position} = $start; + $maf_line{End_Position} = $stop; + $maf_line{Strand} = '+'; # Per MAF definition, only the positive strand is an accepted value + $maf_line{STRAND_VEP} = $maf_effect->{STRAND}; # Renamed to avoid mixup with "Strand" above + $maf_line{Variant_Classification} = GetVariantClassification( $maf_effect->{One_Consequence}, $var_type, $inframe ); + $maf_line{Variant_Type} = $var_type; + $maf_line{Reference_Allele} = $ref; + # ::NOTE:: If tumor genotype is unavailable, then we'll assume it's ref/var heterozygous + $maf_line{Tumor_Seq_Allele1} = $ref; + $maf_line{Tumor_Seq_Allele2} = $var; + if( defined $tum_info{GT} and $tum_info{GT} ne "." and $tum_info{GT} ne "./." ) { + # ::NOTE:: MAF only supports biallelic sites. Tumor_Seq_Allele2 must always be the $var + # picked earlier. For Tumor_Seq_Allele1, pick the first non-var allele in GT (usually $ref) + my ( $idx1, $idx2 ) = split( /[\/|]/, $tum_info{GT} ); + # If GT was monoploid, then $idx2 will be undefined, and we should set it equal to $idx1 + $idx2 = $idx1 unless( defined $idx2 ); + $maf_line{Tumor_Seq_Allele1} = ( $alleles[$idx1] ne $var ? $alleles[$idx1] : $alleles[$idx2] ); + } + # ::NOTE:: If normal genotype is unavailable, then we'll assume it's ref/ref homozygous + $maf_line{Match_Norm_Seq_Allele1} = $ref; + $maf_line{Match_Norm_Seq_Allele2} = $ref; + if( defined $nrm_info{GT} and $nrm_info{GT} ne "." and $nrm_info{GT} ne "./." ) { + # ::NOTE:: MAF only supports biallelic sites. So choose the first two alleles listed in GT + my ( $idx1, $idx2 ) = split( /[\/|]/, $nrm_info{GT} ); + # If GT was monoploid, then $idx2 will be undefined, and we should set it equal to $idx1 + $idx2 = $idx1 unless( defined $idx2 ); + $maf_line{Match_Norm_Seq_Allele1} = $alleles[$idx1]; + $maf_line{Match_Norm_Seq_Allele2} = $alleles[$idx2]; + } + $maf_line{Tumor_Sample_Barcode} = $tumor_id; + $maf_line{Matched_Norm_Sample_Barcode} = $normal_id; + $maf_line{t_depth} = $tum_info{DP} if( defined $tum_info{DP} and $tum_info{DP} ne "." ); + ( $maf_line{t_ref_count}, $maf_line{t_alt_count} ) = @tum_depths[0,$var_allele_idx] if( @tum_depths ); + $maf_line{n_depth} = $nrm_info{DP} if( defined $nrm_info{DP} and $nrm_info{DP} ne "." ); + ( $maf_line{n_ref_count}, $maf_line{n_alt_count} ) = @nrm_depths[0,$var_allele_idx] if( @nrm_depths ); + + # Create a semicolon delimited list summarizing the prioritized effects in @all_effects + $maf_line{all_effects} = ""; + foreach my $effect ( @all_effects ) { + my $gene_name = $effect->{Hugo_Symbol}; + my $effect_type = $effect->{One_Consequence}; + my $protein_change = ( $effect->{HGVSp} ? $effect->{HGVSp} : '' ); + my $transcript_id = ( $effect->{Transcript_ID} ? $effect->{Transcript_ID} : '' ); + my $refseq_ids = ( $effect->{RefSeq} ? $effect->{RefSeq} : '' ); + $maf_line{all_effects} .= "$gene_name,$effect_type,$protein_change,$transcript_id,$refseq_ids;" if( $effect_type and $transcript_id ); + } + + # If this variant was seen in the ExAC VCF, let's report allele counts and frequencies + # ExAC merges and pads multiallelic sites, so we need to normalize each variant, (remove common + # suffixed bps) before we compare it to our variant allele + my $locus = "$chrom:$vcf_pos"; + if( defined $filter_data{$locus} ) { + my $idx = 0; + $maf_line{ExAC_FILTER} = $filter_data{$locus}{FILTER}; + foreach my $f_var ( split( ",", $filter_data{$locus}{ALT} )) { + my $f_ref = $filter_data{$locus}{REF}; + # De-pad suffixed bps that are identical between ref/var alleles + while( $f_ref and $f_var and substr( $f_ref, -1, 1 ) eq substr( $f_var, -1, 1 ) and $f_ref ne $f_var ) { + ( $f_ref, $f_var ) = map{substr( $_, 0, -1 )} ( $f_ref, $f_var ); + } + # If this normalized variant matches our input variant, report its allele counts + # ExAC reports MNPs as separate SNPs. So we'll need to report the ACs of the first SNP + if(( $vcf_ref eq $f_ref and $vcf_var eq $f_var ) or + (( $var_type eq "DNP" or $var_type eq "TNP" or $var_type eq "ONP") and + ( $vcf_ref =~ m/^$f_ref/ and $vcf_var =~ m/^$f_var/ ))) { + my @var_acs = split( ",", $filter_data{$locus}{AC} ); + my $var_ac = $var_acs[$idx]; + my $pop_an = $filter_data{$locus}{AN}; + $maf_line{ExAC_AF} = sprintf( "%.4g", ( $pop_an ? ( $var_ac / $pop_an ) : 0 )); + $maf_line{ExAC_AC_AN} = join( "/", $var_ac, $pop_an ); + # Do the same for AC/AN in each subpopulation, and the adjusted total AC/AN (Adj) + foreach my $subpop ( qw( AFR AMR EAS FIN NFE OTH SAS Adj )) { + @var_acs = split( ",", $filter_data{$locus}{"AC_$subpop"} ); + $var_ac = $var_acs[$idx]; + $pop_an = $filter_data{$locus}{"AN_$subpop"}; + $maf_line{"ExAC_AF_$subpop"} = sprintf( "%.4g", ( $pop_an ? ( $var_ac / $pop_an ) : 0 )); + $maf_line{"ExAC_AC_AN_$subpop"} = join( "/", $var_ac, $pop_an ); + } + last; + } + ++$idx; + } + } + + # Copy FILTER from input VCF, and tag calls with high allele counts in any ExAC subpopulation + my $subpop_count = 0; + # Remove existing common_variant tags from input, so it's redefined by our criteria here + $filter = join( ";", grep{ $_ ne "common_variant" } split( /,|;/, $filter )); + foreach my $subpop ( qw( AFR AMR EAS FIN NFE OTH SAS )) { + if( $maf_line{"ExAC_AC_AN_$subpop"} ) { + my ( $subpop_ac ) = split( "/", $maf_line{"ExAC_AC_AN_$subpop"} ); + $subpop_count++ if( $subpop_ac > $max_filter_ac ); + } + } + if( $subpop_count > 0 ) { + $filter = (( $filter eq "PASS" or $filter eq "." or !$filter ) ? "common_variant" : "$filter;common_variant" ); + } + $maf_line{FILTER} = $filter; + + # Also add the reference allele flanking bps that we generated earlier with samtools + my $region = "$chrom:" . ( $vcf_pos - 1 ) . "-" . ( $vcf_pos + length( $vcf_ref )); + $maf_line{flanking_bps} = $flanking_bps{$region}; + + # Add ID and QUAL from the input VCF into respective MAF columns + $maf_line{variant_id} = $var_id; + $maf_line{variant_qual} = $var_qual; + + # If there are additional INFO data to add, then add those + foreach my $info_col ( @addl_info_cols ) { + $maf_line{$info_col} = ( defined $info{$info_col} ? $info{$info_col} : "" ); + } + + # If this is an SV, pair up gene names from separate lines to backfill the Fusion column later + if( $split_svs and $var=~m/^<BND|DEL|DUP|INV>$/ ) { + my $sv_key = "$var_id-$tumor_id"; + if( $sv_pair{$sv_key} ) { + $sv_pair{$sv_key} = $sv_pair{$sv_key} . "-" . $maf_line{Hugo_Symbol} . " fusion"; + } + else { + $sv_pair{$sv_key} = $maf_line{Hugo_Symbol}; + } + } + + # At this point, we've generated all we can about this variant, so write it to the MAF + $maf_fh->print( join( "\t", map{( defined $maf_line{$_} ? $maf_line{$_} : "" )} @maf_header ) . "\n" ); +} +$maf_fh->close; +$annotated_vcf_fh->close; + +# If the MAF lists SVs, backfill the Fusion column with gene-pair names +if( $split_svs ) { + my $output_name = substr( $output_maf, rindex( $output_maf, "/" ) + 1 ); + $output_name =~ s/(\.maf)*$//; + my $tmp_output_maf = "$tmp_dir/$output_name.tmp.maf"; + + my $in_maf_fh = IO::File->new( $output_maf ) or die "ERROR: Couldn't open: $output_maf!\n"; + my $out_maf_fh = IO::File->new( $tmp_output_maf, ">" ) or die "ERROR: Couldn't open: $tmp_output_maf!\n"; + my ( $tid_idx, $fusion_idx, $var_id_idx ) = ( 0, 0, 0 ); + while( my $line = $in_maf_fh->getline ) { + chomp( $line ); + if( $line =~ m/^#/ ) { + $out_maf_fh->print( "$line\n" ); # Copy comments unchanged + } + elsif( $line =~ m/^Hugo_Symbol/ ) { + # Copy the header unchanged, after figuring out necessary column indexes + foreach( split( /\t/, $line )) { last if( $_ eq "Tumor_Sample_Barcode" ); ++$tid_idx; } + foreach( split( /\t/, $line )) { last if( $_ eq "Fusion" ); ++$fusion_idx; } + foreach( split( /\t/, $line )) { last if( $_ eq "variant_id" ); ++$var_id_idx; } + $out_maf_fh->print( "$line\n" ); # Copy header unchanged + } + else { + # Write the gene-pair name into the Fusion column if it was backfilled earlier + my @cols = split( /\t/, $line, -1 ); + my $sv_key = $cols[$var_id_idx] . "-" . $cols[$tid_idx]; + $cols[$fusion_idx] = $sv_pair{$sv_key} if( $sv_pair{$sv_key} ); + $out_maf_fh->print( join( "\t", @cols ) . "\n" ); + } + } + $out_maf_fh->close; + $in_maf_fh->close; + + move( $tmp_output_maf, $output_maf ); +} + +# Converts Sequence Ontology variant types to MAF variant classifications +sub GetVariantClassification { + my ( $effect, $var_type, $inframe ) = @_; + return "Splice_Site" if( $effect =~ /^(splice_acceptor_variant|splice_donor_variant|transcript_ablation|exon_loss_variant)$/ ); + return "Nonsense_Mutation" if( $effect eq 'stop_gained' ); + return "Frame_Shift_Del" if(( $effect eq 'frameshift_variant' or ( $effect eq 'protein_altering_variant' and !$inframe )) and $var_type eq 'DEL' ); + return "Frame_Shift_Ins" if(( $effect eq 'frameshift_variant' or ( $effect eq 'protein_altering_variant' and !$inframe )) and $var_type eq 'INS' ); + return "Nonstop_Mutation" if( $effect eq 'stop_lost' ); + return "Translation_Start_Site" if( $effect =~ /^(initiator_codon_variant|start_lost)$/ ); + return "In_Frame_Ins" if( $effect =~ /^(inframe_insertion|disruptive_inframe_insertion)$/ or ( $effect eq 'protein_altering_variant' and $inframe and $var_type eq 'INS' )); + return "In_Frame_Del" if( $effect =~ /^(inframe_deletion|disruptive_inframe_deletion)$/ or ( $effect eq 'protein_altering_variant' and $inframe and $var_type eq 'DEL' )); + return "Missense_Mutation" if( $effect =~ /^(missense_variant|coding_sequence_variant|conservative_missense_variant|rare_amino_acid_variant)$/ ); + return "Intron" if ( $effect =~ /^(transcript_amplification|intron_variant|INTRAGENIC|intragenic_variant)$/ ); + return "Splice_Region" if( $effect eq 'splice_region_variant' ); + return "Silent" if( $effect =~ /^(incomplete_terminal_codon_variant|synonymous_variant|stop_retained_variant|NMD_transcript_variant)$/ ); + return "RNA" if( $effect =~ /^(mature_miRNA_variant|exon_variant|non_coding_exon_variant|non_coding_transcript_exon_variant|non_coding_transcript_variant|nc_transcript_variant)$/ ); + return "5'UTR" if( $effect =~ /^(5_prime_UTR_variant|5_prime_UTR_premature_start_codon_gain_variant)$/ ); + return "3'UTR" if( $effect eq '3_prime_UTR_variant' ); + return "IGR" if( $effect =~ /^(TF_binding_site_variant|regulatory_region_variant|regulatory_region|intergenic_variant|intergenic_region)$/ ); + return "5'Flank" if( $effect eq 'upstream_gene_variant' ); + return "3'Flank" if ( $effect eq 'downstream_gene_variant' ); + + # Annotate everything else simply as a targeted region + # TFBS_ablation, TFBS_amplification,regulatory_region_ablation, regulatory_region_amplification, + # feature_elongation, feature_truncation + return "Targeted_Region"; +} + +# Fix the AD and DP fields, given data from a FORMATted genotype string +sub FixAlleleDepths { + my ( $alleles_ref, $var_allele_idx, $fmt_info_ref ) = @_; + my %fmt_info = %{$fmt_info_ref}; + my @alleles = @{$alleles_ref}; + my @depths = (); + + # If AD is defined, then parse out all REF/ALT allele depths, or whatever is in it + if( defined $fmt_info{AD} and $fmt_info{AD} ne "." ) { + @depths = map{( m/^\d+$/ ? $_ : "" )}split( /,/, $fmt_info{AD} ); + } + + # Handle VarScan VCF lines where AD contains only 1 depth, and REF allele depth is in RD + if( scalar( @depths ) == 1 and defined $fmt_info{RD} ) { + @depths = map{""} @alleles; + $depths[0] = $fmt_info{RD}; + $depths[$var_allele_idx] = $fmt_info{AD}; + } + # Handle SomaticSniper VCF lines, where allele depths must be extracted from BCOUNT + elsif( !defined $fmt_info{AD} and defined $fmt_info{BCOUNT} ) { + my %b_idx = ( A=>0, C=>1, G=>2, T=>3 ); + my @bcount = split( /,/, $fmt_info{BCOUNT} ); + @depths = map{(( defined $b_idx{$_} and defined $bcount[$b_idx{$_}] ) ? $bcount[$b_idx{$_}] : "" )} @alleles; + } + # Handle VCF SNV lines by Strelka, where allele depths are in AU:CU:GU:TU + elsif( !defined $fmt_info{AD} and scalar( grep{defined $fmt_info{$_}} qw/AU CU GU TU/ ) == 4 ) { + # Strelka allele depths come in tiers 1,2. We'll use tier1 cuz it's stricter, and DP already is + map{( $fmt_info{$_.'U'} ) = split( ",", $fmt_info{$_.'U'} )} qw( A C G T ); + + # If the only ALT allele is N, then set it to the allele with the highest non-ref readcount + if( scalar( @alleles ) == 2 and $alleles[1] eq "N" ) { + my %acgt_depths = map{( defined $fmt_info{$_.'U'} ? ( $_, $fmt_info{$_.'U'} ) : ( $_, "" ))} qw( A C G T ); + my @deepest = sort {$acgt_depths{$b} <=> $acgt_depths{$a}} keys %acgt_depths; + ( $alleles[1] ) = ( $deepest[0] ne $alleles[0] ? $deepest[0] : $deepest[1] ); + } + @depths = map{( defined $fmt_info{$_.'U'} ? $fmt_info{$_.'U'} : "" )} @alleles; + } + # Handle VCF Indel lines by Strelka, where variant allele depth is in TIR + elsif( !defined $fmt_info{AD} and $fmt_info{TIR} ) { + # Reference allele depth is not provided by Strelka for indels, so we have to skip it + @depths = ( "", ( split /,/, $fmt_info{TIR} )[0] ); + } + # Handle VCF lines by CaVEMan, where allele depths are in FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ + elsif( !defined $fmt_info{AD} and scalar( grep{defined $fmt_info{$_}} qw/FAZ FCZ FGZ FTZ RAZ RCZ RGZ RTZ/ ) == 8 ) { + # Create tags for forward+reverse strand reads, and use those to determine REF/ALT depths + map{ $fmt_info{$_} = $fmt_info{'F'.$_} + $fmt_info{'R'.$_} } qw( AZ CZ GZ TZ ); + @depths = map{( defined $fmt_info{$_.'Z'} ? $fmt_info{$_.'Z'} : "" )} @alleles; + } + # Handle VCF lines from the Ion Torrent Suite where ALT depths are in AO and REF depths are in RO + elsif( !defined $fmt_info{AD} and defined $fmt_info{AO} and defined $fmt_info{RO} ) { + @depths = ( $fmt_info{RO}, map{( m/^\d+$/ ? $_ : "" )}split( /,/, $fmt_info{AO} )); + } + # Handle VCF lines from Delly where REF/ALT SV junction read counts are in RR/RV respectively + elsif( !defined $fmt_info{AD} and defined $fmt_info{RR} and defined $fmt_info{RV} ) { + # Reference allele depth and depths for any other ALT alleles must be left undefined + @depths = map{""} @alleles; + $depths[0] = $fmt_info{RR}; + $depths[$var_allele_idx] = $fmt_info{RV}; + } + # Handle VCF lines from cgpPindel, where ALT depth and total depth are in PP:NP:PR:NR + elsif( !defined $fmt_info{AD} and scalar( grep{defined $fmt_info{$_}} qw/PP NP PR NR/ ) == 4 ) { + # Reference allele depth and depths for any other ALT alleles must be left undefined + @depths = map{""} @alleles; + $depths[$var_allele_idx] = $fmt_info{PP} + $fmt_info{NP}; + $fmt_info{DP} = $fmt_info{PR} + $fmt_info{NR}; + } + # Handle VCF lines with ALT allele fraction in FA, which needs to be multiplied by DP to get AD + elsif( !defined $fmt_info{AD} and defined $fmt_info{FA} and defined $fmt_info{DP} and $fmt_info{DP} ne '.' ) { + # Reference allele depth and depths for any other ALT alleles must be left undefined + @depths = map{""} @alleles; + $depths[$var_allele_idx] = sprintf( "%.0f", $fmt_info{FA} * $fmt_info{DP} ); + } + # Handle VCF lines from mpileup/bcftools where DV contains the ALT allele depth + elsif( !defined $fmt_info{AD} and defined $fmt_info{DV} and defined $fmt_info{DP} ) { + # Reference allele depth and depths for any other ALT alleles must be left undefined + @depths = map{""} @alleles; + $depths[$var_allele_idx] = $fmt_info{DV}; + } + # Handle VCF lines where AD contains only 1 value, that we can assume is the variant allele + elsif( defined $fmt_info{AD} and @depths and scalar( @depths ) == 1 ) { + # Reference allele depth and depths for any other ALT alleles must be left undefined + @depths = map{""} @alleles; + $depths[$var_allele_idx] = $fmt_info{AD}; + } + # For all other lines where #depths is not equal to #alleles, blank out the depths + elsif( @depths and scalar( @depths ) ne scalar( @alleles )) { + @depths = map{""} @alleles; + } + + # Sanity check that REF/ALT allele depths are lower than the total depth + if( defined $fmt_info{DP} and $fmt_info{DP} ne '.' and (( $depths[0] and $depths[0] > $fmt_info{DP} ) or + ( $depths[$var_allele_idx] and $depths[$var_allele_idx] > $fmt_info{DP} ) or + ( $depths[0] and $depths[$var_allele_idx] and $depths[0] + $depths[$var_allele_idx] > $fmt_info{DP} ))) { + $fmt_info{DP} = 0; + map{$fmt_info{DP} += $_ if($_ and $_ ne '.')} @depths; + } + + # If we have REF/ALT allele depths, but no DP, then set DP equal to the sum of all ADs + if(( defined $depths[0] and defined $depths[$var_allele_idx] ) and ( !defined $fmt_info{DP} or $fmt_info{DP} eq '.' )) { + $fmt_info{DP} = 0; + map{$fmt_info{DP} += $_ if($_ and $_ ne '.')} @depths; + } + + # Put all our changes back into the hash/array references that were passed over + $fmt_info{AD} = join( ",", map{( $_ ne "" ? $_ : "." )} @depths ); + %{$fmt_info_ref} = %fmt_info; + @{$alleles_ref} = @alleles; + + return 1; +} + +__DATA__ + +=head1 NAME + + vcf2maf.pl - Convert a VCF into a MAF by mapping each variant to only one of all possible gene isoforms + +=head1 SYNOPSIS + + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf WD4086.vcf --output-maf WD4086.maf --tumor-id WD4086 --normal-id NB4086 + +=head1 OPTIONS + + --input-vcf Path to input file in VCF format + --output-maf Path to output MAF file + --tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] + --tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] + --normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] + --vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] + --vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] + --custom-enst List of custom ENST IDs that override canonical selection + --vep-path Folder containing the vep script [~/vep] + --vep-data VEP's base cache/plugin directory [~/.vep] + --vep-forks Number of forked processes to use when running VEP [4] + --buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] + --any-allele When reporting co-located variants, allow mismatched variant alleles too + --ref-fasta Reference FASTA file [~/.vep/homo_sapiens/91_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz] + --filter-vcf A VCF for FILTER tag common_variant. Set to 0 to disable [~/.vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz] + --max-filter-ac Use tag common_variant if the filter-vcf reports a subpopulation AC higher than this [10] + --species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] + --ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] + --cache-version Version of offline cache to use with VEP (e.g. 75, 84, 91) [Default: Installed version] + --maf-center Variant calling center to report in MAF [.] + --retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] + --min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] + --remap-chain Chain file to remap variants to a different assembly before running VEP + --help Print a brief help message and quit + --man Print the detailed manual + +=head1 DESCRIPTION + +To convert a VCF into a MAF, each variant must be mapped to only one of all possible gene transcripts/isoforms that it might affect. This selection of a single effect per variant, is often subjective. So this project is an attempt to make the selection criteria smarter, reproducible, and more configurable. + +This script needs VEP, a variant annotator that maps effects of a variant on all possible genes and transcripts. For more info, see the README. + +=head2 Relevant links: + + Homepage: https://github.com/ckandoth/vcf2maf + VCF format: http://samtools.github.io/hts-specs/ + MAF format: https://wiki.nci.nih.gov/x/eJaPAQ + VEP: http://ensembl.org/info/docs/tools/vep/index.html + VEP annotated VCF format: http://ensembl.org/info/docs/tools/vep/vep_formats.html#vcfout + +=head1 AUTHORS + + Cyriac Kandoth (ckandoth@gmail.com) + Shweta Chavan (chavan.shweta@gmail.com) + +=head1 LICENSE + + Apache-2.0 | Apache License, Version 2.0 | https://www.apache.org/licenses/LICENSE-2.0 + +=cut
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vep-annotate-macros.xml Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,92 @@ +<macros> + <macro name="list-cache-annotate"> + <param name="species" type="select" label="choose cache"> + <option value="ailuropoda_melanoleuca --ASSEMBLY ailMel1">ailuropoda_melanoleuca --ASSEMBLY ailMel1</option> + <option value="anas_platyrhynchos --ASSEMBLY BGI_duck_1.0">anas_platyrhynchos --ASSEMBLY BGI_duck_1.0</option> + <option value="anolis_carolinensis --ASSEMBLY AnoCar2.0">anolis_carolinensis --ASSEMBLY AnoCar2.0</option> + <option value="astyanax_mexicanus --ASSEMBLY AstMex102">astyanax_mexicanus --ASSEMBLY AstMex102</option> + <option value="bos_taurus --ASSEMBLY UMD3.1">bos_taurus --ASSEMBLY UMD3.1</option> + <option value="caenorhabditis_elegans --ASSEMBLY WBcel235">caenorhabditis_elegans --ASSEMBLY WBcel235</option> + <option value="callithrix_jacchus --ASSEMBLY C_jacchus3.2.1">callithrix_jacchus --ASSEMBLY C_jacchus3.2.1</option> + <option value="canis_familiaris --ASSEMBLY CanFam3.1">canis_familiaris --ASSEMBLY CanFam3.1</option> + <option value="cavia_porcellus --ASSEMBLY cavPor3">cavia_porcellus --ASSEMBLY cavPor3</option> + <option value="chlorocebus_sabaeus --ASSEMBLY ChlSab1.1">chlorocebus_sabaeus --ASSEMBLY ChlSab1.1</option> + <option value="choloepus_hoffmanni --ASSEMBLY choHof1">choloepus_hoffmanni --ASSEMBLY choHof1</option> + <option value="ciona_intestinalis --ASSEMBLY KH">ciona_intestinalis --ASSEMBLY KH</option> + <option value="ciona_savignyi --ASSEMBLY CSAV2.0">ciona_savignyi --ASSEMBLY CSAV2.0</option> + <option value="danio_rerio --ASSEMBLY GRCz10">danio_rerio --ASSEMBLY GRCz10</option> + <option value="dasypus_novemcinctus --ASSEMBLY Dasnov3.0">dasypus_novemcinctus --ASSEMBLY Dasnov3.0</option> + <option value="dipodomys_ordii --ASSEMBLY dipOrd1">dipodomys_ordii --ASSEMBLY dipOrd1</option> + <option value="drosophila_melanogaster --ASSEMBLY BDGP6">drosophila_melanogaster --ASSEMBLY BDGP6</option> + <option value="echinops_telfairi --ASSEMBLY TENREC">echinops_telfairi --ASSEMBLY TENREC</option> + <option value="equus_caballus --ASSEMBLY EquCab2">equus_caballus --ASSEMBLY EquCab2</option> + <option value="erinaceus_europaeus --ASSEMBLY HEDGEHOG">erinaceus_europaeus --ASSEMBLY HEDGEHOG</option> + <option value="felis_catus --ASSEMBLY Felis_catus_6.2">felis_catus --ASSEMBLY Felis_catus_6.2</option> + <option value="ficedula_albicollis --ASSEMBLY FicAlb_1.4">ficedula_albicollis --ASSEMBLY FicAlb_1.4</option> + <option value="gadus_morhua --ASSEMBLY gadMor1">gadus_morhua --ASSEMBLY gadMor1</option> + <option value="gallus_gallus --ASSEMBLY Gallus_gallus_5.0">gallus_gallus --ASSEMBLY Gallus_gallus_5.0</option> + <option value="gasterosteus_aculeatus --ASSEMBLY BROADS1">gasterosteus_aculeatus --ASSEMBLY BROADS1</option> + <option value="gorilla_gorilla --ASSEMBLY gorGor3.1">gorilla_gorilla --ASSEMBLY gorGor3.1</option> + <option value="homo_sapiens --ASSEMBLY GRCh37">homo_sapiens --ASSEMBLY GRCh37</option> + <option value="homo_sapiens --ASSEMBLY GRCh38" selected="true">homo_sapiens --ASSEMBLY GRCh38</option> + <option value="ictidomys_tridecemlineatus --ASSEMBLY spetri2">ictidomys_tridecemlineatus --ASSEMBLY spetri2</option> + <option value="latimeria_chalumnae --ASSEMBLY LatCha1">latimeria_chalumnae --ASSEMBLY LatCha1</option> + <option value="lepisosteus_oculatus --ASSEMBLY LepOcu1">lepisosteus_oculatus --ASSEMBLY LepOcu1</option> + <option value="loxodonta_africana --ASSEMBLY loxAfr3">loxodonta_africana --ASSEMBLY loxAfr3</option> + <option value="macaca_mulatta --ASSEMBLY Mmul_8.0.1">macaca_mulatta --ASSEMBLY Mmul_8.0.1</option> + <option value="macropus_eugenii --ASSEMBLY Meug_1.0">macropus_eugenii --ASSEMBLY Meug_1.0</option> + <option value="meleagris_gallopavo --ASSEMBLY UMD2">meleagris_gallopavo --ASSEMBLY UMD2</option> + <option value="microcebus_murinus --ASSEMBLY Mmur_2.0">microcebus_murinus --ASSEMBLY Mmur_2.0</option> + <option value="monodelphis_domestica --ASSEMBLY BROADO5">monodelphis_domestica --ASSEMBLY BROADO5</option> + <option value="mus_musculus_129s1svimj --ASSEMBLY 129S1_SvImJ_v1">mus_musculus_129s1svimj --ASSEMBLY 129S1_SvImJ_v1</option> + <option value="mus_musculus_aj --ASSEMBLY A_J_v1">mus_musculus_aj --ASSEMBLY A_J_v1</option> + <option value="mus_musculus_akrj --ASSEMBLY AKR_J_v1">mus_musculus_akrj --ASSEMBLY AKR_J_v1</option> + <option value="mus_musculus_balbcj --ASSEMBLY BALB_cJ_v1">mus_musculus_balbcj --ASSEMBLY BALB_cJ_v1</option> + <option value="mus_musculus_c3hhej --ASSEMBLY C3H_HeJ_v1">mus_musculus_c3hhej --ASSEMBLY C3H_HeJ_v1</option> + <option value="mus_musculus_c57bl6nj --ASSEMBLY C57BL_6NJ_v1">mus_musculus_c57bl6nj --ASSEMBLY C57BL_6NJ_v1</option> + <option value="mus_musculus_casteij --ASSEMBLY CAST_EiJ_v1">mus_musculus_casteij --ASSEMBLY CAST_EiJ_v1</option> + <option value="mus_musculus_cbaj --ASSEMBLY CBA_J_v1">mus_musculus_cbaj --ASSEMBLY CBA_J_v1</option> + <option value="mus_musculus_dba2j --ASSEMBLY DBA_2J_v1">mus_musculus_dba2j --ASSEMBLY DBA_2J_v1</option> + <option value="mus_musculus_fvbnj --ASSEMBLY FVB_NJ_v1">mus_musculus_fvbnj --ASSEMBLY FVB_NJ_v1</option> + <option value="mus_musculus_lpj --ASSEMBLY LP_J_v1">mus_musculus_lpj --ASSEMBLY LP_J_v1</option> + <option value="mus_musculus_nodshiltj --ASSEMBLY NOD_ShiLtJ_v1">mus_musculus_nodshiltj --ASSEMBLY NOD_ShiLtJ_v1</option> + <option value="mus_musculus_nzohlltj --ASSEMBLY NZO_HlLtJ_v1">mus_musculus_nzohlltj --ASSEMBLY NZO_HlLtJ_v1</option> + <option value="mus_musculus_pwkphj --ASSEMBLY PWK_PhJ_v1">mus_musculus_pwkphj --ASSEMBLY PWK_PhJ_v1</option> + <option value="mus_musculus --ASSEMBLY GRCm38">mus_musculus --ASSEMBLY GRCm38</option> + <option value="mus_musculus_wsbeij --ASSEMBLY WSB_EiJ_v1">mus_musculus_wsbeij --ASSEMBLY WSB_EiJ_v1</option> + <option value="mus_spretus_spreteij --ASSEMBLY SPRET_EiJ_v1">mus_spretus_spreteij --ASSEMBLY SPRET_EiJ_v1</option> + <option value="mustela_putorius_furo --ASSEMBLY MusPutFur1.0">mustela_putorius_furo --ASSEMBLY MusPutFur1.0</option> + <option value="myotis_lucifugus --ASSEMBLY Myoluc2.0">myotis_lucifugus --ASSEMBLY Myoluc2.0</option> + <option value="nomascus_leucogenys --ASSEMBLY Nleu1.0">nomascus_leucogenys --ASSEMBLY Nleu1.0</option> + <option value="ochotona_princeps --ASSEMBLY pika">ochotona_princeps --ASSEMBLY pika</option> + <option value="oreochromis_niloticus --ASSEMBLY Orenil1.0">oreochromis_niloticus --ASSEMBLY Orenil1.0</option> + <option value="ornithorhynchus_anatinus --ASSEMBLY OANA5">ornithorhynchus_anatinus --ASSEMBLY OANA5</option> + <option value="oryctolagus_cuniculus --ASSEMBLY OryCun2.0">oryctolagus_cuniculus --ASSEMBLY OryCun2.0</option> + <option value="oryzias_latipes --ASSEMBLY MEDAKA1">oryzias_latipes --ASSEMBLY MEDAKA1</option> + <option value="otolemur_garnettii --ASSEMBLY OtoGar3">otolemur_garnettii --ASSEMBLY OtoGar3</option> + <option value="ovis_aries --ASSEMBLY Oar_v3.1">ovis_aries --ASSEMBLY Oar_v3.1</option> + <option value="pan_troglodytes --ASSEMBLY CHIMP2.1.4">pan_troglodytes --ASSEMBLY CHIMP2.1.4</option> + <option value="papio_anubis --ASSEMBLY PapAnu2.0">papio_anubis --ASSEMBLY PapAnu2.0</option> + <option value="pelodiscus_sinensis --ASSEMBLY PelSin_1.0">pelodiscus_sinensis --ASSEMBLY PelSin_1.0</option> + <option value="petromyzon_marinus --ASSEMBLY Pmarinus_7.0">petromyzon_marinus --ASSEMBLY Pmarinus_7.0</option> + <option value="poecilia_formosa --ASSEMBLY PoeFor_5.1.2">poecilia_formosa --ASSEMBLY PoeFor_5.1.2</option> + <option value="pongo_abelii --ASSEMBLY PPYG2">pongo_abelii --ASSEMBLY PPYG2</option> + <option value="procavia_capensis --ASSEMBLY proCap1">procavia_capensis --ASSEMBLY proCap1</option> + <option value="pteropus_vampyrus --ASSEMBLY pteVam1">pteropus_vampyrus --ASSEMBLY pteVam1</option> + <option value="rattus_norvegicus --ASSEMBLY Rnor_6.0">rattus_norvegicus --ASSEMBLY Rnor_6.0</option> + <option value="saccharomyces_cerevisiae --ASSEMBLY R64 --ASSEMBLY 1 --ASSEMBLY 1">saccharomyces_cerevisiae --ASSEMBLY R64 --ASSEMBLY 1 --ASSEMBLY 1</option> + <option value="sarcophilus_harrisii --ASSEMBLY DEVIL7.0">sarcophilus_harrisii --ASSEMBLY DEVIL7.0</option> + <option value="sorex_araneus --ASSEMBLY COMMON_SHREW1">sorex_araneus --ASSEMBLY COMMON_SHREW1</option> + <option value="sus_scrofa --ASSEMBLY Sscrofa10.2">sus_scrofa --ASSEMBLY Sscrofa10.2</option> + <option value="taeniopygia_guttata --ASSEMBLY taeGut3.2.4">taeniopygia_guttata --ASSEMBLY taeGut3.2.4</option> + <option value="takifugu_rubripes --ASSEMBLY FUGU4">takifugu_rubripes --ASSEMBLY FUGU4</option> + <option value="tarsius_syrichta --ASSEMBLY tarSyr1">tarsius_syrichta --ASSEMBLY tarSyr1</option> + <option value="tetraodon_nigroviridis --ASSEMBLY TETRAODON8">tetraodon_nigroviridis --ASSEMBLY TETRAODON8</option> + <option value="tupaia_belangeri --ASSEMBLY TREESHREW">tupaia_belangeri --ASSEMBLY TREESHREW</option> + <option value="tursiops_truncatus --ASSEMBLY turTru1">tursiops_truncatus --ASSEMBLY turTru1</option> + <option value="vicugna_pacos --ASSEMBLY vicPac1">vicugna_pacos --ASSEMBLY vicPac1</option> + <option value="xenopus_tropicalis --ASSEMBLY JGI_4.2">xenopus_tropicalis --ASSEMBLY JGI_4.2</option> + <option value="xiphophorus_maculatus --ASSEMBLY Xipmac4.4.2">xiphophorus_maculatus --ASSEMBLY Xipmac4.4.2</option> + </param> + </macro> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vep-download-cache-macros.xml Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,125 @@ + <macros> + <macro name="list-cache"> + <param name="species" type="select" label="choose cache"> + <option value="ailuropoda_melanoleuca --ASSEMBLY ailMel1">ailuropoda_melanoleuca --ASSEMBLY ailMel1</option> + <option value="anas_platyrhynchos --ASSEMBLY BGI_duck_1.0">anas_platyrhynchos --ASSEMBLY BGI_duck_1.0</option> + <option value="anolis_carolinensis_merged --ASSEMBLY AnoCar2.0">anolis_carolinensis_merged --ASSEMBLY AnoCar2.0</option> + <option value="anolis_carolinensis_refseq --ASSEMBLY AnoCar2.0">anolis_carolinensis_refseq --ASSEMBLY AnoCar2.0</option> + <option value="anolis_carolinensis --ASSEMBLY AnoCar2.0">anolis_carolinensis --ASSEMBLY AnoCar2.0</option> + <option value="astyanax_mexicanus --ASSEMBLY AstMex102">astyanax_mexicanus --ASSEMBLY AstMex102</option> + <option value="bos_taurus_merged --ASSEMBLY UMD3.1">bos_taurus_merged --ASSEMBLY UMD3.1</option> + <option value="bos_taurus_refseq --ASSEMBLY UMD3.1">bos_taurus_refseq --ASSEMBLY UMD3.1</option> + <option value="bos_taurus --ASSEMBLY UMD3.1">bos_taurus --ASSEMBLY UMD3.1</option> + <option value="caenorhabditis_elegans --ASSEMBLY WBcel235">caenorhabditis_elegans --ASSEMBLY WBcel235</option> + <option value="callithrix_jacchus --ASSEMBLY C_jacchus3.2.1">callithrix_jacchus --ASSEMBLY C_jacchus3.2.1</option> + <option value="canis_familiaris_merged --ASSEMBLY CanFam3.1">canis_familiaris_merged --ASSEMBLY CanFam3.1</option> + <option value="canis_familiaris_refseq --ASSEMBLY CanFam3.1">canis_familiaris_refseq --ASSEMBLY CanFam3.1</option> + <option value="canis_familiaris --ASSEMBLY CanFam3.1">canis_familiaris --ASSEMBLY CanFam3.1</option> + <option value="cavia_porcellus --ASSEMBLY cavPor3">cavia_porcellus --ASSEMBLY cavPor3</option> + <option value="chlorocebus_sabaeus --ASSEMBLY ChlSab1.1">chlorocebus_sabaeus --ASSEMBLY ChlSab1.1</option> + <option value="choloepus_hoffmanni --ASSEMBLY choHof1">choloepus_hoffmanni --ASSEMBLY choHof1</option> + <option value="ciona_intestinalis_merged --ASSEMBLY KH">ciona_intestinalis_merged --ASSEMBLY KH</option> + <option value="ciona_intestinalis_refseq --ASSEMBLY KH">ciona_intestinalis_refseq --ASSEMBLY KH</option> + <option value="ciona_intestinalis --ASSEMBLY KH">ciona_intestinalis --ASSEMBLY KH</option> + <option value="ciona_savignyi --ASSEMBLY CSAV2.0">ciona_savignyi --ASSEMBLY CSAV2.0</option> + <option value="danio_rerio_merged --ASSEMBLY GRCz10">danio_rerio_merged --ASSEMBLY GRCz10</option> + <option value="danio_rerio --ASSEMBLY GRCz10">danio_rerio --ASSEMBLY GRCz10</option> + <option value="dasypus_novemcinctus --ASSEMBLY Dasnov3.0">dasypus_novemcinctus --ASSEMBLY Dasnov3.0</option> + <option value="dipodomys_ordii --ASSEMBLY dipOrd1">dipodomys_ordii --ASSEMBLY dipOrd1</option> + <option value="drosophila_melanogaster --ASSEMBLY BDGP6">drosophila_melanogaster --ASSEMBLY BDGP6</option> + <option value="echinops_telfairi --ASSEMBLY TENREC">echinops_telfairi --ASSEMBLY TENREC</option> + <option value="equus_caballus --ASSEMBLY EquCab2">equus_caballus --ASSEMBLY EquCab2</option> + <option value="erinaceus_europaeus --ASSEMBLY HEDGEHOG">erinaceus_europaeus --ASSEMBLY HEDGEHOG</option> + <option value="felis_catus_merged --ASSEMBLY Felis_catus_6.2">felis_catus_merged --ASSEMBLY Felis_catus_6.2</option> + <option value="felis_catus_refseq --ASSEMBLY Felis_catus_6.2">felis_catus_refseq --ASSEMBLY Felis_catus_6.2</option> + <option value="felis_catus --ASSEMBLY Felis_catus_6.2">felis_catus --ASSEMBLY Felis_catus_6.2</option> + <option value="ficedula_albicollis --ASSEMBLY FicAlb_1.4">ficedula_albicollis --ASSEMBLY FicAlb_1.4</option> + <option value="gadus_morhua --ASSEMBLY gadMor1">gadus_morhua --ASSEMBLY gadMor1</option> + <option value="gallus_gallus_merged --ASSEMBLY Gallus_gallus_5.0">gallus_gallus_merged --ASSEMBLY Gallus_gallus_5.0</option> + <option value="gallus_gallus_refseq --ASSEMBLY Gallus_gallus_5.0">gallus_gallus_refseq --ASSEMBLY Gallus_gallus_5.0</option> + <option value="gallus_gallus --ASSEMBLY Gallus_gallus_5.0">gallus_gallus --ASSEMBLY Gallus_gallus_5.0</option> + <option value="gasterosteus_aculeatus --ASSEMBLY BROADS1">gasterosteus_aculeatus --ASSEMBLY BROADS1</option> + <option value="gorilla_gorilla --ASSEMBLY gorGor3.1">gorilla_gorilla --ASSEMBLY gorGor3.1</option> + <option value="homo_sapiens_merged --ASSEMBLY GRCh37">homo_sapiens_merged --ASSEMBLY GRCh37</option> + <option value="homo_sapiens_merged --ASSEMBLY GRCh38">homo_sapiens_merged --ASSEMBLY GRCh38</option> + <option value="homo_sapiens_refseq --ASSEMBLY GRCh37">homo_sapiens_refseq --ASSEMBLY GRCh37</option> + <option value="homo_sapiens_refseq --ASSEMBLY GRCh38">homo_sapiens_refseq --ASSEMBLY GRCh38</option> + <option value="homo_sapiens --ASSEMBLY GRCh37">homo_sapiens --ASSEMBLY GRCh37</option> + <option value="homo_sapiens --ASSEMBLY GRCh38" selected="true">homo_sapiens --ASSEMBLY GRCh38</option> + <option value="ictidomys_tridecemlineatus --ASSEMBLY spetri2">ictidomys_tridecemlineatus --ASSEMBLY spetri2</option> + <option value="latimeria_chalumnae --ASSEMBLY LatCha1">latimeria_chalumnae --ASSEMBLY LatCha1</option> + <option value="lepisosteus_oculatus --ASSEMBLY LepOcu1">lepisosteus_oculatus --ASSEMBLY LepOcu1</option> + <option value="loxodonta_africana --ASSEMBLY loxAfr3">loxodonta_africana --ASSEMBLY loxAfr3</option> + <option value="macaca_mulatta_merged --ASSEMBLY Mmul_8.0.1">macaca_mulatta_merged --ASSEMBLY Mmul_8.0.1</option> + <option value="macaca_mulatta_refseq --ASSEMBLY Mmul_8.0.1">macaca_mulatta_refseq --ASSEMBLY Mmul_8.0.1</option> + <option value="macaca_mulatta --ASSEMBLY Mmul_8.0.1">macaca_mulatta --ASSEMBLY Mmul_8.0.1</option> + <option value="macropus_eugenii --ASSEMBLY Meug_1.0">macropus_eugenii --ASSEMBLY Meug_1.0</option> + <option value="meleagris_gallopavo --ASSEMBLY UMD2">meleagris_gallopavo --ASSEMBLY UMD2</option> + <option value="microcebus_murinus --ASSEMBLY Mmur_2.0">microcebus_murinus --ASSEMBLY Mmur_2.0</option> + <option value="monodelphis_domestica --ASSEMBLY BROADO5">monodelphis_domestica --ASSEMBLY BROADO5</option> + <option value="mus_musculus_129s1svimj --ASSEMBLY 129S1_SvImJ_v1">mus_musculus_129s1svimj --ASSEMBLY 129S1_SvImJ_v1</option> + <option value="mus_musculus_aj --ASSEMBLY A_J_v1">mus_musculus_aj --ASSEMBLY A_J_v1</option> + <option value="mus_musculus_akrj --ASSEMBLY AKR_J_v1">mus_musculus_akrj --ASSEMBLY AKR_J_v1</option> + <option value="mus_musculus_balbcj --ASSEMBLY BALB_cJ_v1">mus_musculus_balbcj --ASSEMBLY BALB_cJ_v1</option> + <option value="mus_musculus_c3hhej --ASSEMBLY C3H_HeJ_v1">mus_musculus_c3hhej --ASSEMBLY C3H_HeJ_v1</option> + <option value="mus_musculus_c57bl6nj --ASSEMBLY C57BL_6NJ_v1">mus_musculus_c57bl6nj --ASSEMBLY C57BL_6NJ_v1</option> + <option value="mus_musculus_casteij --ASSEMBLY CAST_EiJ_v1">mus_musculus_casteij --ASSEMBLY CAST_EiJ_v1</option> + <option value="mus_musculus_cbaj --ASSEMBLY CBA_J_v1">mus_musculus_cbaj --ASSEMBLY CBA_J_v1</option> + <option value="mus_musculus_dba2j --ASSEMBLY DBA_2J_v1">mus_musculus_dba2j --ASSEMBLY DBA_2J_v1</option> + <option value="mus_musculus_fvbnj --ASSEMBLY FVB_NJ_v1">mus_musculus_fvbnj --ASSEMBLY FVB_NJ_v1</option> + <option value="mus_musculus_lpj --ASSEMBLY LP_J_v1">mus_musculus_lpj --ASSEMBLY LP_J_v1</option> + <option value="mus_musculus_merged --ASSEMBLY GRCm38">mus_musculus_merged --ASSEMBLY GRCm38</option> + <option value="mus_musculus_nodshiltj --ASSEMBLY NOD_ShiLtJ_v1">mus_musculus_nodshiltj --ASSEMBLY NOD_ShiLtJ_v1</option> + <option value="mus_musculus_nzohlltj --ASSEMBLY NZO_HlLtJ_v1">mus_musculus_nzohlltj --ASSEMBLY NZO_HlLtJ_v1</option> + <option value="mus_musculus_pwkphj --ASSEMBLY PWK_PhJ_v1">mus_musculus_pwkphj --ASSEMBLY PWK_PhJ_v1</option> + <option value="mus_musculus_refseq --ASSEMBLY GRCm38">mus_musculus_refseq --ASSEMBLY GRCm38</option> + <option value="mus_musculus --ASSEMBLY GRCm38">mus_musculus --ASSEMBLY GRCm38</option> + <option value="mus_musculus_wsbeij --ASSEMBLY WSB_EiJ_v1">mus_musculus_wsbeij --ASSEMBLY WSB_EiJ_v1</option> + <option value="mus_spretus_spreteij --ASSEMBLY SPRET_EiJ_v1">mus_spretus_spreteij --ASSEMBLY SPRET_EiJ_v1</option> + <option value="mustela_putorius_furo --ASSEMBLY MusPutFur1.0">mustela_putorius_furo --ASSEMBLY MusPutFur1.0</option> + <option value="myotis_lucifugus --ASSEMBLY Myoluc2.0">myotis_lucifugus --ASSEMBLY Myoluc2.0</option> + <option value="nomascus_leucogenys --ASSEMBLY Nleu1.0">nomascus_leucogenys --ASSEMBLY Nleu1.0</option> + <option value="ochotona_princeps --ASSEMBLY pika">ochotona_princeps --ASSEMBLY pika</option> + <option value="oreochromis_niloticus --ASSEMBLY Orenil1.0">oreochromis_niloticus --ASSEMBLY Orenil1.0</option> + <option value="ornithorhynchus_anatinus --ASSEMBLY OANA5">ornithorhynchus_anatinus --ASSEMBLY OANA5</option> + <option value="oryctolagus_cuniculus_merged --ASSEMBLY OryCun2.0">oryctolagus_cuniculus_merged --ASSEMBLY OryCun2.0</option> + <option value="oryctolagus_cuniculus_refseq --ASSEMBLY OryCun2.0">oryctolagus_cuniculus_refseq --ASSEMBLY OryCun2.0</option> + <option value="oryctolagus_cuniculus --ASSEMBLY OryCun2.0">oryctolagus_cuniculus --ASSEMBLY OryCun2.0</option> + <option value="oryzias_latipes --ASSEMBLY MEDAKA1">oryzias_latipes --ASSEMBLY MEDAKA1</option> + <option value="otolemur_garnettii --ASSEMBLY OtoGar3">otolemur_garnettii --ASSEMBLY OtoGar3</option> + <option value="ovis_aries_merged --ASSEMBLY Oar_v3.1">ovis_aries_merged --ASSEMBLY Oar_v3.1</option> + <option value="ovis_aries_refseq --ASSEMBLY Oar_v3.1">ovis_aries_refseq --ASSEMBLY Oar_v3.1</option> + <option value="ovis_aries --ASSEMBLY Oar_v3.1">ovis_aries --ASSEMBLY Oar_v3.1</option> + <option value="pan_troglodytes_merged --ASSEMBLY CHIMP2.1.4">pan_troglodytes_merged --ASSEMBLY CHIMP2.1.4</option> + <option value="pan_troglodytes_refseq --ASSEMBLY CHIMP2.1.4">pan_troglodytes_refseq --ASSEMBLY CHIMP2.1.4</option> + <option value="pan_troglodytes --ASSEMBLY CHIMP2.1.4">pan_troglodytes --ASSEMBLY CHIMP2.1.4</option> + <option value="papio_anubis_merged --ASSEMBLY PapAnu2.0">papio_anubis_merged --ASSEMBLY PapAnu2.0</option> + <option value="papio_anubis_refseq --ASSEMBLY PapAnu2.0">papio_anubis_refseq --ASSEMBLY PapAnu2.0</option> + <option value="papio_anubis --ASSEMBLY PapAnu2.0">papio_anubis --ASSEMBLY PapAnu2.0</option> + <option value="pelodiscus_sinensis --ASSEMBLY PelSin_1.0">pelodiscus_sinensis --ASSEMBLY PelSin_1.0</option> + <option value="petromyzon_marinus --ASSEMBLY Pmarinus_7.0">petromyzon_marinus --ASSEMBLY Pmarinus_7.0</option> + <option value="poecilia_formosa --ASSEMBLY PoeFor_5.1.2">poecilia_formosa --ASSEMBLY PoeFor_5.1.2</option> + <option value="pongo_abelii --ASSEMBLY PPYG2">pongo_abelii --ASSEMBLY PPYG2</option> + <option value="procavia_capensis --ASSEMBLY proCap1">procavia_capensis --ASSEMBLY proCap1</option> + <option value="pteropus_vampyrus --ASSEMBLY pteVam1">pteropus_vampyrus --ASSEMBLY pteVam1</option> + <option value="rattus_norvegicus_merged --ASSEMBLY Rnor_6.0">rattus_norvegicus_merged --ASSEMBLY Rnor_6.0</option> + <option value="rattus_norvegicus_refseq --ASSEMBLY Rnor_6.0">rattus_norvegicus_refseq --ASSEMBLY Rnor_6.0</option> + <option value="rattus_norvegicus --ASSEMBLY Rnor_6.0">rattus_norvegicus --ASSEMBLY Rnor_6.0</option> + <option value="saccharomyces_cerevisiae --ASSEMBLY R64 --ASSEMBLY 1 --ASSEMBLY 1">saccharomyces_cerevisiae --ASSEMBLY R64 --ASSEMBLY 1 --ASSEMBLY 1</option> + <option value="sarcophilus_harrisii --ASSEMBLY DEVIL7.0">sarcophilus_harrisii --ASSEMBLY DEVIL7.0</option> + <option value="sorex_araneus --ASSEMBLY COMMON_SHREW1">sorex_araneus --ASSEMBLY COMMON_SHREW1</option> + <option value="sus_scrofa_merged --ASSEMBLY Sscrofa10.2">sus_scrofa_merged --ASSEMBLY Sscrofa10.2</option> + <option value="sus_scrofa_refseq --ASSEMBLY Sscrofa10.2">sus_scrofa_refseq --ASSEMBLY Sscrofa10.2</option> + <option value="sus_scrofa --ASSEMBLY Sscrofa10.2">sus_scrofa --ASSEMBLY Sscrofa10.2</option> + <option value="taeniopygia_guttata --ASSEMBLY taeGut3.2.4">taeniopygia_guttata --ASSEMBLY taeGut3.2.4</option> + <option value="takifugu_rubripes --ASSEMBLY FUGU4">takifugu_rubripes --ASSEMBLY FUGU4</option> + <option value="tarsius_syrichta --ASSEMBLY tarSyr1">tarsius_syrichta --ASSEMBLY tarSyr1</option> + <option value="tetraodon_nigroviridis --ASSEMBLY TETRAODON8">tetraodon_nigroviridis --ASSEMBLY TETRAODON8</option> + <option value="tupaia_belangeri --ASSEMBLY TREESHREW">tupaia_belangeri --ASSEMBLY TREESHREW</option> + <option value="tursiops_truncatus --ASSEMBLY turTru1">tursiops_truncatus --ASSEMBLY turTru1</option> + <option value="vicugna_pacos --ASSEMBLY vicPac1">vicugna_pacos --ASSEMBLY vicPac1</option> + <option value="xenopus_tropicalis --ASSEMBLY JGI_4.2">xenopus_tropicalis --ASSEMBLY JGI_4.2</option> + <option value="xiphophorus_maculatus --ASSEMBLY Xipmac4.4.2">xiphophorus_maculatus --ASSEMBLY Xipmac4.4.2</option> + </param> + </macro> + </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vep-unico-macros.xml Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,16 @@ + <macros> + <macro name="vep-annotate-inputs-macro"> + <param name="input" type="data" format="vcf" label="Input VCF File" /> + <param name="everything" type="select" optional="true" label="everything option" help="this option need RAM >16"> + <option value="--everything">everything</option> + </param> + <param name="buffer" type="text" value="5000" label="buffer size decrease if vcf2maf can not allocate memory" /> + </macro> + <macro name="vcf2maf-inputs-macro"> + <param name="tumour_id" type="text" value="TUMOR" label="Tumour ID (Name)"/> + <param name="normal_id" type="text" value="NORMAL" label="Normal ID (Name)"/> + <param name="input_vcf" type="data" format="vcf" label="Input VCF File" /> + <param name="reference" type="data" format="fasta" label="Input fasta file" /> + <param name="buffer" type="text" value="5000" label="buffer size decrease if vcf2maf can not allocate memory" /> + </macro> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vep_unico.xml Tue Jul 03 04:38:21 2018 -0400 @@ -0,0 +1,120 @@ +<tool id="vep-unico" name="vep-unico" version="1.0.0"> + <description> + wrapper for variant-effect-predictor86 and vcf2maf + </description> + <macros> + <import>vep-unico-macros.xml</import> + <import>vcf2maf-macros.xml</import> + <import>vep-download-cache-macros.xml</import> + <import>vep-annotate-macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="86">variant-effect-predictor</requirement> + <requirement type="package" version="1.3.1">samtools</requirement> + </requirements> + <command><![CDATA[ + #if str($veptools.veptoolsselect) =="downloadcache" + + cd \$CONDA_DEFAULT_ENV && [[ -d vep_cache ]] || mkdir vep_cache ; + cd bin && perl vep_install.pl + -a ac -s $veptools.species + --NO_HTSLIB + --CACHEDIR \$CONDA_DEFAULT_ENV/vep_cache; + ## write the cache file downloaded + echo "CACHE DOWNLOADED YET">$output1; + ls \$CONDA_DEFAULT_ENV/vep_cache/*/ >> $output1; + #end if + + + #if str($veptools.veptoolsselect) =="annotate" + perl \$CONDA_DEFAULT_ENV/bin/variant_effect_predictor.pl + --fork \${GALAXY_SLOTS:-4} + --offline + --dir \$CONDA_DEFAULT_ENV/vep_cache + --force_overwrite + #if $veptools.everything.value + --everything + #end if + --species $veptools.species + --buffer_size $veptools.buffer + --vcf + --input_file $veptools.input + --output_file $output2 + --stats_file stat.htm + #end if + + + #if str($veptools.veptoolsselect) =="vcf2maf" + ln -s $veptools.input_vcf ./input.vcf; + perl $__tool_directory__/vcf2maf.pl + --input-vcf ./input.vcf + --output-maf $output_maf + --vep-forks 1 + --tumor-id $veptools.tumour_id + --normal-id $veptools.normal_id + --buffer-size $veptools.buffer + --vep-path \$CONDA_DEFAULT_ENV/bin/ + --vep-data \$CONDA_DEFAULT_ENV/vep_cache + --ref-fasta $veptools.reference + --species $veptools.species; + + #end if + ]]> + </command> + <inputs> + <conditional name="veptools"> + <param name="veptoolsselect" type="select" label="vep"> + <option value="downloadcache" selected="true">vep-download-cache</option> + <option value="annotate">vep-annotate</option> + <option value="vcf2maf">vcf2maf</option> + </param> + <when value="downloadcache"> + <expand macro="list-cache"/> + </when> + <when value="annotate"> + <expand macro="vep-annotate-inputs-macro"/> + <expand macro="list-cache-annotate"/> + </when> + <when value="vcf2maf"> + <expand macro="vcf2maf-inputs-macro"/> + <expand macro="list-vcf2maf"/> + </when> + </conditional> + </inputs> + <outputs> + <!--vep-download-cache-outputs--> + <data format="txt" name="output1" label="cache downloaded"> + <filter>veptools['veptoolsselect'] == 'downloadcache'</filter> + </data> + <!--vep-annotate-outputs--> + <data format="vcf" name="output2" label="vep-annotated on ${on_string} "> + <filter>veptools['veptoolsselect'] == 'annotate'</filter> + </data> + <data format="html" name="stat" from_work_dir="stat.htm" label="stat on ${on_string}"> + <filter>veptools['veptoolsselect'] == 'annotate'</filter> + </data> + <!--vcf2maf-outputs--> + <data name="output_maf" format="maf" label="vcf2maf on ${on_string} "> + <filter>veptools['veptoolsselect'] == 'vcf2maf'</filter> + </data> + </outputs> + <stdio> + <exit_code range="1:" level="fatal" description="Failure" /> + </stdio> + <citations> + <citation type="doi">10.1186/s13059-016-0974-4</citation> + </citations> + <help> +**IMPORTANT** + before running vep_annotate or vcf2maf tool for the first time you have to download the cache file using vep_download_cache + + **what it does** + + - **vep_annotate** determines the effect of your variants (SNPs, insertions, deletions, CNVs or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + + - **vep_download_cache** download the cache file, used by vep v.86.It is a file containing all transcript models, regulatory features and variant data for a species. + + - **vcf2maf** convert vcf in MAF format calling vep_annotate. + + </help> +</tool>