diff report_clonality/RScript.r @ 55:67627d77d63b draft

Uploaded
author davidvanzessen
date Wed, 16 Mar 2016 11:17:49 -0400
parents 5ba0377b7737
children
line wrap: on
line diff
--- a/report_clonality/RScript.r	Fri Jan 29 08:10:21 2016 -0500
+++ b/report_clonality/RScript.r	Wed Mar 16 11:17:49 2016 -0400
@@ -646,6 +646,8 @@
 	  PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb
   }
   
+  num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) }
+  
   newData = data.frame(data.table(PRODF)[,list(unique=.N, 
                                                VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
                                                P1=mean(.SD$P3V.nt.nb, na.rm=T),
@@ -671,7 +673,32 @@
                                                            mean(.SD$P5J.nt.nb, na.rm=T))),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
-  write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+  newData = data.frame(data.table(PRODF)[,list(unique=.N, 
+                                               VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                               P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                               N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+                                               P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                               DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                               DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                               P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                               N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+                                               P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                               DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+											   Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 
+																	 .SD$X5D.REGION.trimmed.nt.nb,
+																	 .SD$X3D.REGION.trimmed.nt.nb,
+																	 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+											   Total.N=num_median(  c(.SD$N1.REGION.nt.nb,
+																	.SD$N2.REGION.nt.nb), na.rm=T),
+											   Total.P=num_median(c(.SD$P3V.nt.nb,
+																 .SD$P5D.nt.nb,
+																 .SD$P3D.nt.nb,
+																 .SD$P5J.nt.nb), na.rm=T)),
+                                         by=c("Sample")])
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
   
   newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
                                                 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
@@ -696,7 +723,33 @@
 						           mean(.SD$P5J.nt.nb, na.rm=T))),
                                           by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
-  write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+    newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
+                                                VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                                P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                                N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+                                                P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                                DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                                DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                                P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                                N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+                                                P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                                DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+                                                Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 
+																	 .SD$X5D.REGION.trimmed.nt.nb,
+																	 .SD$X3D.REGION.trimmed.nt.nb,
+																	 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+                                                Total.N=num_median(  c(.SD$N1.REGION.nt.nb,
+																	.SD$N2.REGION.nt.nb), na.rm=T),
+                                                Total.P=num_median(c(.SD$P3V.nt.nb,
+																 .SD$P5D.nt.nb,
+																 .SD$P3D.nt.nb,
+																 .SD$P5J.nt.nb), na.rm=T)),
+															by=c("Sample")])
+															
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
 }
 
 # ---------------------- AA composition in CDR3 ----------------------