comparison RScript.r @ 10:06777331fbd8 draft

Uploaded
author davidvanzessen
date Thu, 15 May 2014 09:27:22 -0400
parents e2972f0935e9
children 866d22e60e60
comparison
equal deleted inserted replaced
9:712f3e9924d5 10:06777331fbd8
4 4
5 inFile = args[1] 5 inFile = args[1]
6 outFile = args[2] 6 outFile = args[2]
7 outDir = args[3] 7 outDir = args[3]
8 clonalType = args[4] 8 clonalType = args[4]
9 species = args[5]
10 locus = args[6]
11 selection = args[7]
12
13
9 14
10 if (!("gridExtra" %in% rownames(installed.packages()))) { 15 if (!("gridExtra" %in% rownames(installed.packages()))) {
11 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") 16 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/")
12 } 17 }
13 library(gridExtra) 18 library(gridExtra)
52 #PRODF = PROD[ -1] 57 #PRODF = PROD[ -1]
53 58
54 PRODF = PROD 59 PRODF = PROD
55 60
56 #PRODF = unique(PRODF) 61 #PRODF = unique(PRODF)
57 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] 62
63
64
65 if(selection == "unique"){
66 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
67 }
58 68
59 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")]) 69 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")])
60 PRODFV$Length = as.numeric(PRODFV$Length) 70 PRODFV$Length = as.numeric(PRODFV$Length)
61 Total = 0 71 Total = 0
62 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) 72 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length)))
75 Total = 0 85 Total = 0
76 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) 86 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length)))
77 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) 87 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
78 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) 88 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
79 89
80 V = c("v.name\tchr.orderV\nIGHV7-81\t1\nIGHV3-74\t2\nIGHV3-73\t3\nIGHV3-72\t4\nIGHV2-70\t6\nIGHV1-69\t7\nIGHV3-66\t8\nIGHV3-64\t9\nIGHV4-61\t10\nIGHV4-59\t11\nIGHV1-58\t12\nIGHV3-53\t13\nIGHV5-a\t15\nIGHV5-51\t16\nIGHV3-49\t17\nIGHV3-48\t18\nIGHV1-46\t20\nIGHV1-45\t21\nIGHV3-43\t22\nIGHV4-39\t23\nIGHV3-35\t24\nIGHV4-34\t25\nIGHV3-33\t26\nIGHV4-31\t27\nIGHV4-30-4\t28\nIGHV4-30-2\t29\nIGHV3-30-3\t30\nIGHV3-30\t31\nIGHV4-28\t32\nIGHV2-26\t33\nIGHV1-24\t34\nIGHV3-23\t35\nIGHV3-21\t37\nIGHV3-20\t38\nIGHV1-18\t40\nIGHV3-15\t41\nIGHV3-13\t42\nIGHV3-11\t43\nIGHV3-9\t44\nIGHV1-8\t45\nIGHV3-7\t46\nIGHV2-5\t47\nIGHV7-4-1\t48\nIGHV4-4\t49\nIGHV4-b\t50\nIGHV1-3\t51\nIGHV1-2\t52\nIGHV6-1\t53") 90 V = c("v.name\tchr.orderV")
91 D = c("v.name\tchr.orderD")
92 J = c("v.name\tchr.orderJ")
93
94 if(species == "human"){
95 if(locus == "igh"){
96 V = c("v.name\tchr.orderV\nIGHV3-74\t1\nIGHV3-73\t2\nIGHV3-72\t3\nIGHV2-70\t4\nIGHV1-69D\t5\nIGHV1-69-2\t6\nIGHV2-70D\t7\nIGHV1-69\t8\nIGHV3-66\t9\nIGHV3-64\t10\nIGHV4-61\t11\nIGHV4-59\t12\nIGHV1-58\t13\nIGHV3-53\t14\nIGHV5-51\t15\nIGHV3-49\t16\nIGHV3-48\t17\nIGHV1-46\t18\nIGHV1-45\t19\nIGHV3-43\t20\nIGHV4-39\t21\nIGHV3-43D\t22\nIGHV4-38-2\t23\nIGHV4-34\t24\nIGHV3-33\t25\nIGHV4-31\t26\nIGHV3-30-5\t27\nIGHV4-30-4\t28\nIGHV3-30-3\t29\nIGHV4-30-2\t30\nIGHV4-30-1\t31\nIGHV3-30\t32\nIGHV4-28\t33\nIGHV2-26\t34\nIGHV1-24\t35\nIGHV3-23D\t36\nIGHV3-23\t37\nIGHV3-21\t38\nIGHV3-20\t39\nIGHV1-18\t40\nIGHV3-15\t41\nIGHV3-13\t42\nIGHV3-11\t43\nIGHV5-10-1\t44\nIGHV3-9\t45\nIGHV1-8\t46\nIGHV3-64D\t47\nIGHV3-7\t48\nIGHV2-5\t49\nIGHV7-4-1\t50\nIGHV4-4\t51\nIGHV1-3\t52\nIGHV1-2\t53\nIGHV6-1\t54")
97 D = c("v.name\tchr.orderD\nIGHD1-7\t1\nIGHD2-8\t2\nIGHD3-9\t3\nIGHD3-10\t4\nIGHD5-12\t5\nIGHD6-13\t6\nIGHD2-15\t7\nIGHD3-16\t8\nIGHD4-17\t9\nIGHD5-18\t10\nIGHD6-19\t11\nIGHD1-20\t12\nIGHD2-21\t13\nIGHD3-22\t14\nIGHD5-24\t15\nIGHD6-25\t16\nIGHD1-26\t17\nIGHD7-27\t18")
98 J = c("v.name\tchr.orderJ\nIGHJ1\t1\nIGHJ2\t2\nIGHJ3\t3\nIGHJ4\t4\nIGHJ5\t5\nIGHJ6\t6")
99 } else if (locus == "igk"){
100 V = c("v.name\tchr.orderV\nIGKV3D-7\t1\nIGKV1D-8\t2\nIGKV1D-43\t3\nIGKV3D-11\t4\nIGKV1D-12\t5\nIGKV1D-13\t6\nIGKV3D-15\t7\nIGKV1D-16\t8\nIGKV1D-17\t9\nIGKV3D-20\t10\nIGKV2D-26\t11\nIGKV2D-28\t12\nIGKV2D-29\t13\nIGKV2D-30\t14\nIGKV1D-33\t15\nIGKV1D-39\t16\nIGKV2D-40\t17\nIGKV2-40\t18\nIGKV1-39\t19\nIGKV1-33\t20\nIGKV2-30\t21\nIGKV2-29\t22\nIGKV2-28\t23\nIGKV1-27\t24\nIGKV2-24\t25\nIGKV3-20\t26\nIGKV1-17\t27\nIGKV1-16\t28\nIGKV3-15\t29\nIGKV1-13\t30\nIGKV1-12\t31\nIGKV3-11\t32\nIGKV1-9\t33\nIGKV1-8\t34\nIGKV1-6\t35\nIGKV1-5\t36\nIGKV5-2\t37\nIGKV4-1\t38")
101 D = c("v.name\tchr.orderD\n")
102 J = c("v.name\tchr.orderJ\nIGKJ1\t1\nIGKJ2\t2\nIGKJ3\t3\nIGKJ4\t4\nIGKJ5\t5")
103 } else if (locus == "igl"){
104 V = c("v.name\tchr.orderV\nIGLV4-69\t1\nIGLV8-61\t2\nIGLV4-60\t3\nIGLV6-57\t4\nIGLV5-52\t5\nIGLV1-51\t6\nIGLV9-49\t7\nIGLV1-47\t8\nIGLV7-46\t9\nIGLV5-45\t10\nIGLV1-44\t11\nIGLV7-43\t12\nIGLV1-41\t13\nIGLV1-40\t14\nIGLV5-39\t15\nIGLV5-37\t16\nIGLV1-36\t17\nIGLV3-27\t18\nIGLV3-25\t19\nIGLV2-23\t20\nIGLV3-22\t21\nIGLV3-21\t22\nIGLV3-19\t23\nIGLV2-18\t24\nIGLV3-16\t25\nIGLV2-14\t26\nIGLV3-12\t27\nIGLV2-11\t28\nIGLV3-10\t29\nIGLV3-9\t30\nIGLV2-8\t31\nIGLV4-3\t32\nIGLV3-1\t33")
105 D = c("v.name\tchr.orderD\n")
106 J = c("v.name\tchr.orderJ\nIGLJ1\t1\nIGLJ2\t2\nIGLJ3\t3\nIGLJ6\t4\nIGLJ7\t5")
107 }
108 } else if (species == "mouse"){
109 if(locus == "igh"){
110 cat("mouse igh not yet implemented")
111 } else if (locus == "igk"){
112 cat("mouse igk not yet implemented")
113 } else if (locus == "igl"){
114 cat("mouse igl not yet implemented")
115 }
116 }
117
118 useD = TRUE
119 if(species == "human" && (locus == "igk" || locus == "igl")){
120 useD = FALSE
121 }
122
81 tcV = textConnection(V) 123 tcV = textConnection(V)
82 Vchain = read.table(tcV, sep="\t", header=TRUE) 124 Vchain = read.table(tcV, sep="\t", header=TRUE)
83 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) 125 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
84 close(tcV) 126 close(tcV)
85 127
86 D = c("v.name\tchr.orderD\nIGHD1-1\t1\nIGHD2-2\t2\nIGHD3-3\t3\nIGHD6-6\t4\nIGHD1-7\t5\nIGHD2-8\t6\nIGHD3-9\t7\nIGHD3-10\t8\nIGHD4-11\t9\nIGHD5-12\t10\nIGHD6-13\t11\nIGHD1-14\t12\nIGHD2-15\t13\nIGHD3-16\t14\nIGHD4-17\t15\nIGHD5-18\t16\nIGHD6-19\t17\nIGHD1-20\t18\nIGHD2-21\t19\nIGHD3-22\t20\nIGHD4-23\t21\nIGHD5-24\t22\nIGHD6-25\t23\nIGHD1-26\t24\nIGHD7-27\t25")
87 tcD = textConnection(D) 128 tcD = textConnection(D)
88 Dchain = read.table(tcD, sep="\t", header=TRUE) 129 Dchain = read.table(tcD, sep="\t", header=TRUE)
89 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE) 130 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE)
90 close(tcD) 131 close(tcD)
91 132
92
93 J = c("v.name\tchr.orderJ\nIGHJ1\t1\nIGHJ2\t2\nIGHJ3\t3\nIGHJ4\t4\nIGHJ5\t5\nIGHJ6\t6")
94 tcJ = textConnection(J) 133 tcJ = textConnection(J)
95 Jchain = read.table(tcJ, sep="\t", header=TRUE) 134 Jchain = read.table(tcJ, sep="\t", header=TRUE)
96 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) 135 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE)
97 close(tcJ) 136 close(tcJ)
98 137
224 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) 263 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
225 VDList = split(completeVD, f=completeVD[,"Sample"]) 264 VDList = split(completeVD, f=completeVD[,"Sample"])
226 265
227 lapply(VDList, FUN=plotVD) 266 lapply(VDList, FUN=plotVD)
228 267
229
230
231 plotVJ <- function(dat){ 268 plotVJ <- function(dat){
232 if(length(dat[,1]) == 0){ 269 if(length(dat[,1]) == 0){
233 return() 270 return()
234 } 271 }
235 img = ggplot() + 272 img = ggplot() +
277 print(img) 314 print(img)
278 dev.off() 315 dev.off()
279 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) 316 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
280 } 317 }
281 318
319
282 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) 320 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")])
283 321
284 DandJCount$l = log(DandJCount$Length) 322 DandJCount$l = log(DandJCount$Length)
285 maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")]) 323 maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")])
286 DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T) 324 DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T)
291 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) 329 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE)
292 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) 330 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
293 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) 331 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE)
294 DJList = split(completeDJ, f=completeDJ[,"Sample"]) 332 DJList = split(completeDJ, f=completeDJ[,"Sample"])
295 lapply(DJList, FUN=plotDJ) 333 lapply(DJList, FUN=plotDJ)
296
297 334
298 sampleFile <- file("samples.txt") 335 sampleFile <- file("samples.txt")
299 un = unique(test$Sample) 336 un = unique(test$Sample)
300 un = paste(un, sep="\n") 337 un = paste(un, sep="\n")
301 writeLines(un, sampleFile) 338 writeLines(un, sampleFile)