changeset 63:bd6fb6c03948 draft

Uploaded
author davidvanzessen
date Thu, 28 Apr 2016 08:11:04 -0400
parents 14ea4c464435
children 55f18bf19d72
files report_clonality/RScript.r
diffstat 1 files changed, 41 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/report_clonality/RScript.r	Thu Apr 28 05:24:14 2016 -0400
+++ b/report_clonality/RScript.r	Thu Apr 28 08:11:04 2016 -0400
@@ -656,28 +656,44 @@
 	  PRODF = PRODF[!fltr,]
   }
   
-  num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) }
   
-  if(locus %in% c("IGH", "TRB", "TRD")){
-	  PRODF$new.n = PRODF$N1.REGION.nt.nb + PRODF$N2.REGION.nt.nb
-  } else {
-	  PRODF$new.n = PRODF$N.REGION.nt.nb
+  print(names(PRODF))
+  #ensure certain columns are in the data (files generated with older versions of IMGT Loader)
+  col.checks = c("N3.REGION.nt.nb", "N4.REGION.nt.nb")
+  for(col.check in col.checks){
+	  if(!(col.check %in% names(PRODF))){
+		  print(paste(col.check, "not found adding new column"))
+		  if(nrow(PRODF) > 0){ #because R is anoying...
+			PRODF[,col.check] = 0
+		  } else {
+			PRODF = cbind(PRODF, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0)))
+		  }
+		  if(nrow(UNPROD) > 0){
+			UNPROD[,col.check] = 0
+		  } else {
+			UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0)))
+		  }
+	  }
   }
   
+  print(names(PRODF))
+  
+  num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) }
+  
   newData = data.frame(data.table(PRODF)[,list(unique=.N, 
                                                VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
                                                P1=mean(.SD$P3V.nt.nb, na.rm=T),
-                                               N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
+                                               N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
                                                P2=mean(.SD$P5D.nt.nb, na.rm=T),
                                                DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
                                                DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
                                                P3=mean(.SD$P3D.nt.nb, na.rm=T),
-                                               N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
+                                               N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
                                                P4=mean(.SD$P5J.nt.nb, na.rm=T),
                                                DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-                                               Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-                                               Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
-                                               Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
+                                               Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+                                               Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                               Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -685,17 +701,17 @@
   newData = data.frame(data.table(PRODF)[,list(unique=.N, 
                                                VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
                                                P1=num_median(.SD$P3V.nt.nb, na.rm=T),
-                                               N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
+                                               N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
                                                P2=num_median(.SD$P5D.nt.nb, na.rm=T),
                                                DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
                                                DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
                                                P3=num_median(.SD$P3D.nt.nb, na.rm=T),
-                                               N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
+                                               N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
                                                P4=num_median(.SD$P5J.nt.nb, na.rm=T),
                                                DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-											   Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-											   Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
-											   Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
+											   Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+											   Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+											   Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -703,17 +719,17 @@
   newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
                                                 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
                                                 P1=mean(.SD$P3V.nt.nb, na.rm=T),
-                                                N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
+                                                N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
                                                 P2=mean(.SD$P5D.nt.nb, na.rm=T),
                                                 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
                                                 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
                                                 P3=mean(.SD$P3D.nt.nb, na.rm=T),
-                                                N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
+                                                N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
                                                 P4=mean(.SD$P5J.nt.nb, na.rm=T),
                                                 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-                                                Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-                                                Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
-                                                Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
+                                                Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+                                                Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                                Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
                                           by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -721,17 +737,17 @@
     newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
                                                 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
                                                 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
-                                                N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
+                                                N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
                                                 P2=num_median(.SD$P5D.nt.nb, na.rm=T),
                                                 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
                                                 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
                                                 P3=num_median(.SD$P3D.nt.nb, na.rm=T),
-                                                N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
+                                                N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
                                                 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
                                                 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-                                                Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
-                                                Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
-                                                Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
+                                                Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+                                                Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                                Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
 															by=c("Sample")])
 															
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)