diff RScript.r @ 10:06777331fbd8 draft

Uploaded
author davidvanzessen
date Thu, 15 May 2014 09:27:22 -0400
parents e2972f0935e9
children 866d22e60e60
line wrap: on
line diff
--- a/RScript.r	Wed Mar 26 11:55:13 2014 -0400
+++ b/RScript.r	Thu May 15 09:27:22 2014 -0400
@@ -6,6 +6,11 @@
 outFile = args[2]
 outDir = args[3]
 clonalType = args[4]
+species = args[5]
+locus = args[6]
+selection = args[7]
+
+
 
 if (!("gridExtra" %in% rownames(installed.packages()))) {
 	install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") 
@@ -54,7 +59,12 @@
 PRODF = PROD
 
 #PRODF = unique(PRODF)
-PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
+
+
+
+if(selection == "unique"){
+	PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
+}
 
 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")])
 PRODFV$Length = as.numeric(PRODFV$Length)
@@ -77,20 +87,49 @@
 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
 
-V = c("v.name\tchr.orderV\nIGHV7-81\t1\nIGHV3-74\t2\nIGHV3-73\t3\nIGHV3-72\t4\nIGHV2-70\t6\nIGHV1-69\t7\nIGHV3-66\t8\nIGHV3-64\t9\nIGHV4-61\t10\nIGHV4-59\t11\nIGHV1-58\t12\nIGHV3-53\t13\nIGHV5-a\t15\nIGHV5-51\t16\nIGHV3-49\t17\nIGHV3-48\t18\nIGHV1-46\t20\nIGHV1-45\t21\nIGHV3-43\t22\nIGHV4-39\t23\nIGHV3-35\t24\nIGHV4-34\t25\nIGHV3-33\t26\nIGHV4-31\t27\nIGHV4-30-4\t28\nIGHV4-30-2\t29\nIGHV3-30-3\t30\nIGHV3-30\t31\nIGHV4-28\t32\nIGHV2-26\t33\nIGHV1-24\t34\nIGHV3-23\t35\nIGHV3-21\t37\nIGHV3-20\t38\nIGHV1-18\t40\nIGHV3-15\t41\nIGHV3-13\t42\nIGHV3-11\t43\nIGHV3-9\t44\nIGHV1-8\t45\nIGHV3-7\t46\nIGHV2-5\t47\nIGHV7-4-1\t48\nIGHV4-4\t49\nIGHV4-b\t50\nIGHV1-3\t51\nIGHV1-2\t52\nIGHV6-1\t53")
+V = c("v.name\tchr.orderV")
+D = c("v.name\tchr.orderD")
+J = c("v.name\tchr.orderJ")
+
+if(species == "human"){
+	if(locus == "igh"){		
+		V = c("v.name\tchr.orderV\nIGHV3-74\t1\nIGHV3-73\t2\nIGHV3-72\t3\nIGHV2-70\t4\nIGHV1-69D\t5\nIGHV1-69-2\t6\nIGHV2-70D\t7\nIGHV1-69\t8\nIGHV3-66\t9\nIGHV3-64\t10\nIGHV4-61\t11\nIGHV4-59\t12\nIGHV1-58\t13\nIGHV3-53\t14\nIGHV5-51\t15\nIGHV3-49\t16\nIGHV3-48\t17\nIGHV1-46\t18\nIGHV1-45\t19\nIGHV3-43\t20\nIGHV4-39\t21\nIGHV3-43D\t22\nIGHV4-38-2\t23\nIGHV4-34\t24\nIGHV3-33\t25\nIGHV4-31\t26\nIGHV3-30-5\t27\nIGHV4-30-4\t28\nIGHV3-30-3\t29\nIGHV4-30-2\t30\nIGHV4-30-1\t31\nIGHV3-30\t32\nIGHV4-28\t33\nIGHV2-26\t34\nIGHV1-24\t35\nIGHV3-23D\t36\nIGHV3-23\t37\nIGHV3-21\t38\nIGHV3-20\t39\nIGHV1-18\t40\nIGHV3-15\t41\nIGHV3-13\t42\nIGHV3-11\t43\nIGHV5-10-1\t44\nIGHV3-9\t45\nIGHV1-8\t46\nIGHV3-64D\t47\nIGHV3-7\t48\nIGHV2-5\t49\nIGHV7-4-1\t50\nIGHV4-4\t51\nIGHV1-3\t52\nIGHV1-2\t53\nIGHV6-1\t54")
+		D = c("v.name\tchr.orderD\nIGHD1-7\t1\nIGHD2-8\t2\nIGHD3-9\t3\nIGHD3-10\t4\nIGHD5-12\t5\nIGHD6-13\t6\nIGHD2-15\t7\nIGHD3-16\t8\nIGHD4-17\t9\nIGHD5-18\t10\nIGHD6-19\t11\nIGHD1-20\t12\nIGHD2-21\t13\nIGHD3-22\t14\nIGHD5-24\t15\nIGHD6-25\t16\nIGHD1-26\t17\nIGHD7-27\t18")
+		J = c("v.name\tchr.orderJ\nIGHJ1\t1\nIGHJ2\t2\nIGHJ3\t3\nIGHJ4\t4\nIGHJ5\t5\nIGHJ6\t6")
+	} else if (locus == "igk"){
+		V = c("v.name\tchr.orderV\nIGKV3D-7\t1\nIGKV1D-8\t2\nIGKV1D-43\t3\nIGKV3D-11\t4\nIGKV1D-12\t5\nIGKV1D-13\t6\nIGKV3D-15\t7\nIGKV1D-16\t8\nIGKV1D-17\t9\nIGKV3D-20\t10\nIGKV2D-26\t11\nIGKV2D-28\t12\nIGKV2D-29\t13\nIGKV2D-30\t14\nIGKV1D-33\t15\nIGKV1D-39\t16\nIGKV2D-40\t17\nIGKV2-40\t18\nIGKV1-39\t19\nIGKV1-33\t20\nIGKV2-30\t21\nIGKV2-29\t22\nIGKV2-28\t23\nIGKV1-27\t24\nIGKV2-24\t25\nIGKV3-20\t26\nIGKV1-17\t27\nIGKV1-16\t28\nIGKV3-15\t29\nIGKV1-13\t30\nIGKV1-12\t31\nIGKV3-11\t32\nIGKV1-9\t33\nIGKV1-8\t34\nIGKV1-6\t35\nIGKV1-5\t36\nIGKV5-2\t37\nIGKV4-1\t38")
+		D = c("v.name\tchr.orderD\n")
+		J = c("v.name\tchr.orderJ\nIGKJ1\t1\nIGKJ2\t2\nIGKJ3\t3\nIGKJ4\t4\nIGKJ5\t5")
+	} else if (locus == "igl"){
+		V = c("v.name\tchr.orderV\nIGLV4-69\t1\nIGLV8-61\t2\nIGLV4-60\t3\nIGLV6-57\t4\nIGLV5-52\t5\nIGLV1-51\t6\nIGLV9-49\t7\nIGLV1-47\t8\nIGLV7-46\t9\nIGLV5-45\t10\nIGLV1-44\t11\nIGLV7-43\t12\nIGLV1-41\t13\nIGLV1-40\t14\nIGLV5-39\t15\nIGLV5-37\t16\nIGLV1-36\t17\nIGLV3-27\t18\nIGLV3-25\t19\nIGLV2-23\t20\nIGLV3-22\t21\nIGLV3-21\t22\nIGLV3-19\t23\nIGLV2-18\t24\nIGLV3-16\t25\nIGLV2-14\t26\nIGLV3-12\t27\nIGLV2-11\t28\nIGLV3-10\t29\nIGLV3-9\t30\nIGLV2-8\t31\nIGLV4-3\t32\nIGLV3-1\t33")
+		D = c("v.name\tchr.orderD\n")
+		J = c("v.name\tchr.orderJ\nIGLJ1\t1\nIGLJ2\t2\nIGLJ3\t3\nIGLJ6\t4\nIGLJ7\t5")
+	}
+} else if (species == "mouse"){
+	if(locus == "igh"){
+		cat("mouse igh not yet implemented")
+	} else if (locus == "igk"){
+		cat("mouse igk not yet implemented")
+	} else if (locus == "igl"){
+		cat("mouse igl not yet implemented")
+	}
+}
+
+useD = TRUE
+if(species == "human" && (locus == "igk" || locus == "igl")){
+	useD = FALSE
+}
+
 tcV = textConnection(V)
 Vchain = read.table(tcV, sep="\t", header=TRUE)
 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
 close(tcV)
 
-D = c("v.name\tchr.orderD\nIGHD1-1\t1\nIGHD2-2\t2\nIGHD3-3\t3\nIGHD6-6\t4\nIGHD1-7\t5\nIGHD2-8\t6\nIGHD3-9\t7\nIGHD3-10\t8\nIGHD4-11\t9\nIGHD5-12\t10\nIGHD6-13\t11\nIGHD1-14\t12\nIGHD2-15\t13\nIGHD3-16\t14\nIGHD4-17\t15\nIGHD5-18\t16\nIGHD6-19\t17\nIGHD1-20\t18\nIGHD2-21\t19\nIGHD3-22\t20\nIGHD4-23\t21\nIGHD5-24\t22\nIGHD6-25\t23\nIGHD1-26\t24\nIGHD7-27\t25")
 tcD = textConnection(D)
 Dchain = read.table(tcD, sep="\t", header=TRUE)
 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE)
 close(tcD)
 
-
-J = c("v.name\tchr.orderJ\nIGHJ1\t1\nIGHJ2\t2\nIGHJ3\t3\nIGHJ4\t4\nIGHJ5\t5\nIGHJ6\t6")
 tcJ = textConnection(J)
 Jchain = read.table(tcJ, sep="\t", header=TRUE)
 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE)
@@ -226,8 +265,6 @@
 
 lapply(VDList, FUN=plotVD)
 
-
-
 plotVJ <- function(dat){
 	if(length(dat[,1]) == 0){
 		return()
@@ -279,6 +316,7 @@
 	write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
 }
 
+
 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")])
 
 DandJCount$l = log(DandJCount$Length)
@@ -294,7 +332,6 @@
 DJList = split(completeDJ, f=completeDJ[,"Sample"])
 lapply(DJList, FUN=plotDJ)
 
-
 sampleFile <- file("samples.txt")
 un = unique(test$Sample)
 un = paste(un, sep="\n")