Mercurial > repos > davidvanzessen > report_clonality_igg
comparison RScript.r @ 10:06777331fbd8 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 15 May 2014 09:27:22 -0400 |
parents | e2972f0935e9 |
children | 866d22e60e60 |
comparison
equal
deleted
inserted
replaced
9:712f3e9924d5 | 10:06777331fbd8 |
---|---|
4 | 4 |
5 inFile = args[1] | 5 inFile = args[1] |
6 outFile = args[2] | 6 outFile = args[2] |
7 outDir = args[3] | 7 outDir = args[3] |
8 clonalType = args[4] | 8 clonalType = args[4] |
9 species = args[5] | |
10 locus = args[6] | |
11 selection = args[7] | |
12 | |
13 | |
9 | 14 |
10 if (!("gridExtra" %in% rownames(installed.packages()))) { | 15 if (!("gridExtra" %in% rownames(installed.packages()))) { |
11 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") | 16 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") |
12 } | 17 } |
13 library(gridExtra) | 18 library(gridExtra) |
52 #PRODF = PROD[ -1] | 57 #PRODF = PROD[ -1] |
53 | 58 |
54 PRODF = PROD | 59 PRODF = PROD |
55 | 60 |
56 #PRODF = unique(PRODF) | 61 #PRODF = unique(PRODF) |
57 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | 62 |
63 | |
64 | |
65 if(selection == "unique"){ | |
66 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | |
67 } | |
58 | 68 |
59 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")]) | 69 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")]) |
60 PRODFV$Length = as.numeric(PRODFV$Length) | 70 PRODFV$Length = as.numeric(PRODFV$Length) |
61 Total = 0 | 71 Total = 0 |
62 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 72 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
75 Total = 0 | 85 Total = 0 |
76 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 86 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
77 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 87 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
78 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) | 88 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) |
79 | 89 |
80 V = c("v.name\tchr.orderV\nIGHV7-81\t1\nIGHV3-74\t2\nIGHV3-73\t3\nIGHV3-72\t4\nIGHV2-70\t6\nIGHV1-69\t7\nIGHV3-66\t8\nIGHV3-64\t9\nIGHV4-61\t10\nIGHV4-59\t11\nIGHV1-58\t12\nIGHV3-53\t13\nIGHV5-a\t15\nIGHV5-51\t16\nIGHV3-49\t17\nIGHV3-48\t18\nIGHV1-46\t20\nIGHV1-45\t21\nIGHV3-43\t22\nIGHV4-39\t23\nIGHV3-35\t24\nIGHV4-34\t25\nIGHV3-33\t26\nIGHV4-31\t27\nIGHV4-30-4\t28\nIGHV4-30-2\t29\nIGHV3-30-3\t30\nIGHV3-30\t31\nIGHV4-28\t32\nIGHV2-26\t33\nIGHV1-24\t34\nIGHV3-23\t35\nIGHV3-21\t37\nIGHV3-20\t38\nIGHV1-18\t40\nIGHV3-15\t41\nIGHV3-13\t42\nIGHV3-11\t43\nIGHV3-9\t44\nIGHV1-8\t45\nIGHV3-7\t46\nIGHV2-5\t47\nIGHV7-4-1\t48\nIGHV4-4\t49\nIGHV4-b\t50\nIGHV1-3\t51\nIGHV1-2\t52\nIGHV6-1\t53") | 90 V = c("v.name\tchr.orderV") |
91 D = c("v.name\tchr.orderD") | |
92 J = c("v.name\tchr.orderJ") | |
93 | |
94 if(species == "human"){ | |
95 if(locus == "igh"){ | |
96 V = c("v.name\tchr.orderV\nIGHV3-74\t1\nIGHV3-73\t2\nIGHV3-72\t3\nIGHV2-70\t4\nIGHV1-69D\t5\nIGHV1-69-2\t6\nIGHV2-70D\t7\nIGHV1-69\t8\nIGHV3-66\t9\nIGHV3-64\t10\nIGHV4-61\t11\nIGHV4-59\t12\nIGHV1-58\t13\nIGHV3-53\t14\nIGHV5-51\t15\nIGHV3-49\t16\nIGHV3-48\t17\nIGHV1-46\t18\nIGHV1-45\t19\nIGHV3-43\t20\nIGHV4-39\t21\nIGHV3-43D\t22\nIGHV4-38-2\t23\nIGHV4-34\t24\nIGHV3-33\t25\nIGHV4-31\t26\nIGHV3-30-5\t27\nIGHV4-30-4\t28\nIGHV3-30-3\t29\nIGHV4-30-2\t30\nIGHV4-30-1\t31\nIGHV3-30\t32\nIGHV4-28\t33\nIGHV2-26\t34\nIGHV1-24\t35\nIGHV3-23D\t36\nIGHV3-23\t37\nIGHV3-21\t38\nIGHV3-20\t39\nIGHV1-18\t40\nIGHV3-15\t41\nIGHV3-13\t42\nIGHV3-11\t43\nIGHV5-10-1\t44\nIGHV3-9\t45\nIGHV1-8\t46\nIGHV3-64D\t47\nIGHV3-7\t48\nIGHV2-5\t49\nIGHV7-4-1\t50\nIGHV4-4\t51\nIGHV1-3\t52\nIGHV1-2\t53\nIGHV6-1\t54") | |
97 D = c("v.name\tchr.orderD\nIGHD1-7\t1\nIGHD2-8\t2\nIGHD3-9\t3\nIGHD3-10\t4\nIGHD5-12\t5\nIGHD6-13\t6\nIGHD2-15\t7\nIGHD3-16\t8\nIGHD4-17\t9\nIGHD5-18\t10\nIGHD6-19\t11\nIGHD1-20\t12\nIGHD2-21\t13\nIGHD3-22\t14\nIGHD5-24\t15\nIGHD6-25\t16\nIGHD1-26\t17\nIGHD7-27\t18") | |
98 J = c("v.name\tchr.orderJ\nIGHJ1\t1\nIGHJ2\t2\nIGHJ3\t3\nIGHJ4\t4\nIGHJ5\t5\nIGHJ6\t6") | |
99 } else if (locus == "igk"){ | |
100 V = c("v.name\tchr.orderV\nIGKV3D-7\t1\nIGKV1D-8\t2\nIGKV1D-43\t3\nIGKV3D-11\t4\nIGKV1D-12\t5\nIGKV1D-13\t6\nIGKV3D-15\t7\nIGKV1D-16\t8\nIGKV1D-17\t9\nIGKV3D-20\t10\nIGKV2D-26\t11\nIGKV2D-28\t12\nIGKV2D-29\t13\nIGKV2D-30\t14\nIGKV1D-33\t15\nIGKV1D-39\t16\nIGKV2D-40\t17\nIGKV2-40\t18\nIGKV1-39\t19\nIGKV1-33\t20\nIGKV2-30\t21\nIGKV2-29\t22\nIGKV2-28\t23\nIGKV1-27\t24\nIGKV2-24\t25\nIGKV3-20\t26\nIGKV1-17\t27\nIGKV1-16\t28\nIGKV3-15\t29\nIGKV1-13\t30\nIGKV1-12\t31\nIGKV3-11\t32\nIGKV1-9\t33\nIGKV1-8\t34\nIGKV1-6\t35\nIGKV1-5\t36\nIGKV5-2\t37\nIGKV4-1\t38") | |
101 D = c("v.name\tchr.orderD\n") | |
102 J = c("v.name\tchr.orderJ\nIGKJ1\t1\nIGKJ2\t2\nIGKJ3\t3\nIGKJ4\t4\nIGKJ5\t5") | |
103 } else if (locus == "igl"){ | |
104 V = c("v.name\tchr.orderV\nIGLV4-69\t1\nIGLV8-61\t2\nIGLV4-60\t3\nIGLV6-57\t4\nIGLV5-52\t5\nIGLV1-51\t6\nIGLV9-49\t7\nIGLV1-47\t8\nIGLV7-46\t9\nIGLV5-45\t10\nIGLV1-44\t11\nIGLV7-43\t12\nIGLV1-41\t13\nIGLV1-40\t14\nIGLV5-39\t15\nIGLV5-37\t16\nIGLV1-36\t17\nIGLV3-27\t18\nIGLV3-25\t19\nIGLV2-23\t20\nIGLV3-22\t21\nIGLV3-21\t22\nIGLV3-19\t23\nIGLV2-18\t24\nIGLV3-16\t25\nIGLV2-14\t26\nIGLV3-12\t27\nIGLV2-11\t28\nIGLV3-10\t29\nIGLV3-9\t30\nIGLV2-8\t31\nIGLV4-3\t32\nIGLV3-1\t33") | |
105 D = c("v.name\tchr.orderD\n") | |
106 J = c("v.name\tchr.orderJ\nIGLJ1\t1\nIGLJ2\t2\nIGLJ3\t3\nIGLJ6\t4\nIGLJ7\t5") | |
107 } | |
108 } else if (species == "mouse"){ | |
109 if(locus == "igh"){ | |
110 cat("mouse igh not yet implemented") | |
111 } else if (locus == "igk"){ | |
112 cat("mouse igk not yet implemented") | |
113 } else if (locus == "igl"){ | |
114 cat("mouse igl not yet implemented") | |
115 } | |
116 } | |
117 | |
118 useD = TRUE | |
119 if(species == "human" && (locus == "igk" || locus == "igl")){ | |
120 useD = FALSE | |
121 } | |
122 | |
81 tcV = textConnection(V) | 123 tcV = textConnection(V) |
82 Vchain = read.table(tcV, sep="\t", header=TRUE) | 124 Vchain = read.table(tcV, sep="\t", header=TRUE) |
83 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) | 125 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) |
84 close(tcV) | 126 close(tcV) |
85 | 127 |
86 D = c("v.name\tchr.orderD\nIGHD1-1\t1\nIGHD2-2\t2\nIGHD3-3\t3\nIGHD6-6\t4\nIGHD1-7\t5\nIGHD2-8\t6\nIGHD3-9\t7\nIGHD3-10\t8\nIGHD4-11\t9\nIGHD5-12\t10\nIGHD6-13\t11\nIGHD1-14\t12\nIGHD2-15\t13\nIGHD3-16\t14\nIGHD4-17\t15\nIGHD5-18\t16\nIGHD6-19\t17\nIGHD1-20\t18\nIGHD2-21\t19\nIGHD3-22\t20\nIGHD4-23\t21\nIGHD5-24\t22\nIGHD6-25\t23\nIGHD1-26\t24\nIGHD7-27\t25") | |
87 tcD = textConnection(D) | 128 tcD = textConnection(D) |
88 Dchain = read.table(tcD, sep="\t", header=TRUE) | 129 Dchain = read.table(tcD, sep="\t", header=TRUE) |
89 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE) | 130 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE) |
90 close(tcD) | 131 close(tcD) |
91 | 132 |
92 | |
93 J = c("v.name\tchr.orderJ\nIGHJ1\t1\nIGHJ2\t2\nIGHJ3\t3\nIGHJ4\t4\nIGHJ5\t5\nIGHJ6\t6") | |
94 tcJ = textConnection(J) | 133 tcJ = textConnection(J) |
95 Jchain = read.table(tcJ, sep="\t", header=TRUE) | 134 Jchain = read.table(tcJ, sep="\t", header=TRUE) |
96 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) | 135 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) |
97 close(tcJ) | 136 close(tcJ) |
98 | 137 |
224 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 263 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
225 VDList = split(completeVD, f=completeVD[,"Sample"]) | 264 VDList = split(completeVD, f=completeVD[,"Sample"]) |
226 | 265 |
227 lapply(VDList, FUN=plotVD) | 266 lapply(VDList, FUN=plotVD) |
228 | 267 |
229 | |
230 | |
231 plotVJ <- function(dat){ | 268 plotVJ <- function(dat){ |
232 if(length(dat[,1]) == 0){ | 269 if(length(dat[,1]) == 0){ |
233 return() | 270 return() |
234 } | 271 } |
235 img = ggplot() + | 272 img = ggplot() + |
277 print(img) | 314 print(img) |
278 dev.off() | 315 dev.off() |
279 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 316 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
280 } | 317 } |
281 | 318 |
319 | |
282 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 320 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) |
283 | 321 |
284 DandJCount$l = log(DandJCount$Length) | 322 DandJCount$l = log(DandJCount$Length) |
285 maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")]) | 323 maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")]) |
286 DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T) | 324 DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T) |
291 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | 329 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) |
292 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 330 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
293 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 331 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
294 DJList = split(completeDJ, f=completeDJ[,"Sample"]) | 332 DJList = split(completeDJ, f=completeDJ[,"Sample"]) |
295 lapply(DJList, FUN=plotDJ) | 333 lapply(DJList, FUN=plotDJ) |
296 | |
297 | 334 |
298 sampleFile <- file("samples.txt") | 335 sampleFile <- file("samples.txt") |
299 un = unique(test$Sample) | 336 un = unique(test$Sample) |
300 un = paste(un, sep="\n") | 337 un = paste(un, sep="\n") |
301 writeLines(un, sampleFile) | 338 writeLines(un, sampleFile) |