# HG changeset patch
# User davidvanzessen
# Date 1458141469 14400
# Node ID 67627d77d63b18646fab2451a2cb9f4ed1bbe90a
# Parent  5ba0377b77374dd889ad847a9083cd42988aa45d
Uploaded
diff -r 5ba0377b7737 -r 67627d77d63b report_clonality/RScript.r
--- a/report_clonality/RScript.r	Fri Jan 29 08:10:21 2016 -0500
+++ b/report_clonality/RScript.r	Wed Mar 16 11:17:49 2016 -0400
@@ -646,6 +646,8 @@
 	  PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb
   }
   
+  num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) }
+  
   newData = data.frame(data.table(PRODF)[,list(unique=.N, 
                                                VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
                                                P1=mean(.SD$P3V.nt.nb, na.rm=T),
@@ -671,7 +673,32 @@
                                                            mean(.SD$P5J.nt.nb, na.rm=T))),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
-  write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+  newData = data.frame(data.table(PRODF)[,list(unique=.N, 
+                                               VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                               P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                               N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+                                               P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                               DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                               DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                               P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                               N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+                                               P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                               DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+											   Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 
+																	 .SD$X5D.REGION.trimmed.nt.nb,
+																	 .SD$X3D.REGION.trimmed.nt.nb,
+																	 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+											   Total.N=num_median(  c(.SD$N1.REGION.nt.nb,
+																	.SD$N2.REGION.nt.nb), na.rm=T),
+											   Total.P=num_median(c(.SD$P3V.nt.nb,
+																 .SD$P5D.nt.nb,
+																 .SD$P3D.nt.nb,
+																 .SD$P5J.nt.nb), na.rm=T)),
+                                         by=c("Sample")])
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
   
   newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
                                                 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
@@ -696,7 +723,33 @@
 						           mean(.SD$P5J.nt.nb, na.rm=T))),
                                           by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
-  write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+    newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
+                                                VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                                P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                                N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+                                                P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                                DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                                DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                                P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                                N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+                                                P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                                DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+                                                Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 
+																	 .SD$X5D.REGION.trimmed.nt.nb,
+																	 .SD$X3D.REGION.trimmed.nt.nb,
+																	 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+                                                Total.N=num_median(  c(.SD$N1.REGION.nt.nb,
+																	.SD$N2.REGION.nt.nb), na.rm=T),
+                                                Total.P=num_median(c(.SD$P3V.nt.nb,
+																 .SD$P5D.nt.nb,
+																 .SD$P3D.nt.nb,
+																 .SD$P5J.nt.nb), na.rm=T)),
+															by=c("Sample")])
+															
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
 }
 
 # ---------------------- AA composition in CDR3 ----------------------
diff -r 5ba0377b7737 -r 67627d77d63b report_clonality/r_wrapper.sh
--- a/report_clonality/r_wrapper.sh	Fri Jan 29 08:10:21 2016 -0500
+++ b/report_clonality/r_wrapper.sh	Wed Mar 16 11:17:49 2016 -0400
@@ -200,20 +200,34 @@
 #hasJunctionData="$(if head -n 1 $inputFile | grep -qE '3V.REGION.trimmed.nt.nb'; then echo 'Yes'; else echo 'No'; fi)"
 
 #if [[ "$hasJunctionData" == "Yes" ]] ; then
-if [ -a "$outputDir/junctionAnalysisProd.csv" ] ; then
+if [ -a "$outputDir/junctionAnalysisProd_mean.csv" ] ; then
 	echo "
" >> $outputFile
-	echo "
 Productive| Sample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P | 
|---|
" >> $outputFile
+	echo " Productive mean| Sample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P | 
|---|
" >> $outputFile
+	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
+	do
+		echo "| $Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP | 
" >> $outputFile
+	done < $outputDir/junctionAnalysisProd_mean.csv
+	echo "
" >> $outputFile
+	
+	echo " Unproductive mean| Sample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P | 
|---|
" >> $outputFile
 	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
 	do
 		echo "| $Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP | 
" >> $outputFile
-	done < $outputDir/junctionAnalysisProd.csv
+	done < $outputDir/junctionAnalysisUnProd_mean.csv
 	echo "
" >> $outputFile
 	
-	echo " Unproductive| Sample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P | 
|---|
" >> $outputFile
+	echo " Productive median| Sample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P | 
|---|
" >> $outputFile
 	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
 	do
 		echo "| $Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP | 
" >> $outputFile
-	done < $outputDir/junctionAnalysisUnProd.csv
+	done < $outputDir/junctionAnalysisProd_median.csv
+	echo "
" >> $outputFile
+	
+	echo " Unproductive median| Sample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P | 
|---|
" >> $outputFile
+	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
+	do
+		echo "| $Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP | 
" >> $outputFile
+	done < $outputDir/junctionAnalysisUnProd_median.csv
 	echo "
" >> $outputFile
 	
 	echo "" >> $outputFile