changeset 55:67627d77d63b draft

Uploaded
author davidvanzessen
date Wed, 16 Mar 2016 11:17:49 -0400
parents 5ba0377b7737
children 2eb94c08e550
files report_clonality/RScript.r report_clonality/r_wrapper.sh
diffstat 2 files changed, 74 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/report_clonality/RScript.r	Fri Jan 29 08:10:21 2016 -0500
+++ b/report_clonality/RScript.r	Wed Mar 16 11:17:49 2016 -0400
@@ -646,6 +646,8 @@
 	  PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb
   }
   
+  num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) }
+  
   newData = data.frame(data.table(PRODF)[,list(unique=.N, 
                                                VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
                                                P1=mean(.SD$P3V.nt.nb, na.rm=T),
@@ -671,7 +673,32 @@
                                                            mean(.SD$P5J.nt.nb, na.rm=T))),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
-  write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+  newData = data.frame(data.table(PRODF)[,list(unique=.N, 
+                                               VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                               P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                               N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+                                               P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                               DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                               DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                               P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                               N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+                                               P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                               DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+											   Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 
+																	 .SD$X5D.REGION.trimmed.nt.nb,
+																	 .SD$X3D.REGION.trimmed.nt.nb,
+																	 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+											   Total.N=num_median(  c(.SD$N1.REGION.nt.nb,
+																	.SD$N2.REGION.nt.nb), na.rm=T),
+											   Total.P=num_median(c(.SD$P3V.nt.nb,
+																 .SD$P5D.nt.nb,
+																 .SD$P3D.nt.nb,
+																 .SD$P5J.nt.nb), na.rm=T)),
+                                         by=c("Sample")])
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
   
   newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
                                                 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
@@ -696,7 +723,33 @@
 						           mean(.SD$P5J.nt.nb, na.rm=T))),
                                           by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
-  write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+    newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
+                                                VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                                P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                                N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+                                                P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                                DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                                DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                                P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                                N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+                                                P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                                DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+                                                Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 
+																	 .SD$X5D.REGION.trimmed.nt.nb,
+																	 .SD$X3D.REGION.trimmed.nt.nb,
+																	 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+                                                Total.N=num_median(  c(.SD$N1.REGION.nt.nb,
+																	.SD$N2.REGION.nt.nb), na.rm=T),
+                                                Total.P=num_median(c(.SD$P3V.nt.nb,
+																 .SD$P5D.nt.nb,
+																 .SD$P3D.nt.nb,
+																 .SD$P5J.nt.nb), na.rm=T)),
+															by=c("Sample")])
+															
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
 }
 
 # ---------------------- AA composition in CDR3 ----------------------
--- a/report_clonality/r_wrapper.sh	Fri Jan 29 08:10:21 2016 -0500
+++ b/report_clonality/r_wrapper.sh	Wed Mar 16 11:17:49 2016 -0400
@@ -200,20 +200,34 @@
 #hasJunctionData="$(if head -n 1 $inputFile | grep -qE '3V.REGION.trimmed.nt.nb'; then echo 'Yes'; else echo 'No'; fi)"
 
 #if [[ "$hasJunctionData" == "Yes" ]] ; then
-if [ -a "$outputDir/junctionAnalysisProd.csv" ] ; then
+if [ -a "$outputDir/junctionAnalysisProd_mean.csv" ] ; then
 	echo "<div class='tabbertab' title='Junction Analysis'>" >> $outputFile
-	echo "<table border='1' id='junction_table'> <caption>Productive</caption><thead><tr><th>Sample</th><th>count</th><th>VH.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.DH</th><th>DH.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.JH</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><thead></tr><tbody>" >> $outputFile
+	echo "<table border='1' id='junction_table'> <caption>Productive mean</caption><thead><tr><th>Sample</th><th>count</th><th>VH.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.DH</th><th>DH.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.JH</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><thead></tr><tbody>" >> $outputFile
+	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
+	do
+		echo "<tr><td>$Sample</td><td>$unique</td><td>$VHDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELDH</td><td>$DHDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJH</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td></tr>" >> $outputFile
+	done < $outputDir/junctionAnalysisProd_mean.csv
+	echo "</tbody></table>" >> $outputFile
+	
+	echo "<table border='1' id='junction_table'> <caption>Unproductive mean</caption><thead><tr><th>Sample</th><th>count</th><th>VH.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.DH</th><th>DH.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.JH</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><thead></tr><tbody>" >> $outputFile
 	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
 	do
 		echo "<tr><td>$Sample</td><td>$unique</td><td>$VHDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELDH</td><td>$DHDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJH</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td></tr>" >> $outputFile
-	done < $outputDir/junctionAnalysisProd.csv
+	done < $outputDir/junctionAnalysisUnProd_mean.csv
 	echo "</tbody></table>" >> $outputFile
 	
-	echo "<table border='1' id='junction_table'> <caption>Unproductive</caption><thead><tr><th>Sample</th><th>count</th><th>VH.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.DH</th><th>DH.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.JH</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><thead></tr><tbody>" >> $outputFile
+	echo "<table border='1' id='junction_table'> <caption>Productive median</caption><thead><tr><th>Sample</th><th>count</th><th>VH.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.DH</th><th>DH.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.JH</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><thead></tr><tbody>" >> $outputFile
 	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
 	do
 		echo "<tr><td>$Sample</td><td>$unique</td><td>$VHDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELDH</td><td>$DHDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJH</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td></tr>" >> $outputFile
-	done < $outputDir/junctionAnalysisUnProd.csv
+	done < $outputDir/junctionAnalysisProd_median.csv
+	echo "</tbody></table>" >> $outputFile
+	
+	echo "<table border='1' id='junction_table'> <caption>Unproductive median</caption><thead><tr><th>Sample</th><th>count</th><th>VH.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.DH</th><th>DH.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.JH</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><thead></tr><tbody>" >> $outputFile
+	while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
+	do
+		echo "<tr><td>$Sample</td><td>$unique</td><td>$VHDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELDH</td><td>$DHDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJH</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td></tr>" >> $outputFile
+	done < $outputDir/junctionAnalysisUnProd_median.csv
 	echo "</tbody></table>" >> $outputFile
 	
 	echo "</div>" >> $outputFile