# HG changeset patch # User davidvanzessen # Date 1458141469 14400 # Node ID 67627d77d63b18646fab2451a2cb9f4ed1bbe90a # Parent 5ba0377b77374dd889ad847a9083cd42988aa45d Uploaded diff -r 5ba0377b7737 -r 67627d77d63b report_clonality/RScript.r --- a/report_clonality/RScript.r Fri Jan 29 08:10:21 2016 -0500 +++ b/report_clonality/RScript.r Wed Mar 16 11:17:49 2016 -0400 @@ -646,6 +646,8 @@ PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb } + num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) } + newData = data.frame(data.table(PRODF)[,list(unique=.N, VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), P1=mean(.SD$P3V.nt.nb, na.rm=T), @@ -671,7 +673,32 @@ mean(.SD$P5J.nt.nb, na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) - write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) + write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) + + newData = data.frame(data.table(PRODF)[,list(unique=.N, + VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), + P1=num_median(.SD$P3V.nt.nb, na.rm=T), + N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), + P2=num_median(.SD$P5D.nt.nb, na.rm=T), + DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), + DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), + P3=num_median(.SD$P3D.nt.nb, na.rm=T), + N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), + P4=num_median(.SD$P5J.nt.nb, na.rm=T), + DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), + Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, + .SD$X5D.REGION.trimmed.nt.nb, + .SD$X3D.REGION.trimmed.nt.nb, + .SD$X5J.REGION.trimmed.nt.nb), na.rm=T), + Total.N=num_median( c(.SD$N1.REGION.nt.nb, + .SD$N2.REGION.nt.nb), na.rm=T), + Total.P=num_median(c(.SD$P3V.nt.nb, + .SD$P5D.nt.nb, + .SD$P3D.nt.nb, + .SD$P5J.nt.nb), na.rm=T)), + by=c("Sample")]) + newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) + write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) newData = data.frame(data.table(UNPROD)[,list(unique=.N, VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), @@ -696,7 +723,33 @@ mean(.SD$P5J.nt.nb, na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) - write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) + write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) + + newData = data.frame(data.table(UNPROD)[,list(unique=.N, + VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), + P1=num_median(.SD$P3V.nt.nb, na.rm=T), + N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), + P2=num_median(.SD$P5D.nt.nb, na.rm=T), + DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), + DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), + P3=num_median(.SD$P3D.nt.nb, na.rm=T), + N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), + P4=num_median(.SD$P5J.nt.nb, na.rm=T), + DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), + Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, + .SD$X5D.REGION.trimmed.nt.nb, + .SD$X3D.REGION.trimmed.nt.nb, + .SD$X5J.REGION.trimmed.nt.nb), na.rm=T), + Total.N=num_median( c(.SD$N1.REGION.nt.nb, + .SD$N2.REGION.nt.nb), na.rm=T), + Total.P=num_median(c(.SD$P3V.nt.nb, + .SD$P5D.nt.nb, + .SD$P3D.nt.nb, + .SD$P5J.nt.nb), na.rm=T)), + by=c("Sample")]) + + newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) + write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) } # ---------------------- AA composition in CDR3 ---------------------- diff -r 5ba0377b7737 -r 67627d77d63b report_clonality/r_wrapper.sh --- a/report_clonality/r_wrapper.sh Fri Jan 29 08:10:21 2016 -0500 +++ b/report_clonality/r_wrapper.sh Wed Mar 16 11:17:49 2016 -0400 @@ -200,20 +200,34 @@ #hasJunctionData="$(if head -n 1 $inputFile | grep -qE '3V.REGION.trimmed.nt.nb'; then echo 'Yes'; else echo 'No'; fi)" #if [[ "$hasJunctionData" == "Yes" ]] ; then -if [ -a "$outputDir/junctionAnalysisProd.csv" ] ; then +if [ -a "$outputDir/junctionAnalysisProd_mean.csv" ] ; then echo "
" >> $outputFile - echo "" >> $outputFile + echo "
Productive
SamplecountVH.DELP1N1P2DEL.DHDH.DELP3N2P4DEL.JHTotal.DelTotal.NTotal.P
" >> $outputFile + while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisProd_mean.csv + echo "
Productive mean
SamplecountVH.DELP1N1P2DEL.DHDH.DELP3N2P4DEL.JHTotal.DelTotal.NTotal.P
$Sample$unique$VHDEL$P1$N1$P2$DELDH$DHDEL$P3$N2$P4$DELJH$TotalDel$TotalN$TotalP
" >> $outputFile + + echo "" >> $outputFile while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP do echo "" >> $outputFile - done < $outputDir/junctionAnalysisProd.csv + done < $outputDir/junctionAnalysisUnProd_mean.csv echo "
Unproductive mean
SamplecountVH.DELP1N1P2DEL.DHDH.DELP3N2P4DEL.JHTotal.DelTotal.NTotal.P
$Sample$unique$VHDEL$P1$N1$P2$DELDH$DHDEL$P3$N2$P4$DELJH$TotalDel$TotalN$TotalP
" >> $outputFile - echo "" >> $outputFile + echo "
Unproductive
SamplecountVH.DELP1N1P2DEL.DHDH.DELP3N2P4DEL.JHTotal.DelTotal.NTotal.P
" >> $outputFile while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP do echo "" >> $outputFile - done < $outputDir/junctionAnalysisUnProd.csv + done < $outputDir/junctionAnalysisProd_median.csv + echo "
Productive median
SamplecountVH.DELP1N1P2DEL.DHDH.DELP3N2P4DEL.JHTotal.DelTotal.NTotal.P
$Sample$unique$VHDEL$P1$N1$P2$DELDH$DHDEL$P3$N2$P4$DELJH$TotalDel$TotalN$TotalP
" >> $outputFile + + echo "" >> $outputFile + while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP + do + echo "" >> $outputFile + done < $outputDir/junctionAnalysisUnProd_median.csv echo "
Unproductive median
SamplecountVH.DELP1N1P2DEL.DHDH.DELP3N2P4DEL.JHTotal.DelTotal.NTotal.P
$Sample$unique$VHDEL$P1$N1$P2$DELDH$DHDEL$P3$N2$P4$DELJH$TotalDel$TotalN$TotalP
" >> $outputFile echo "
" >> $outputFile