# HG changeset patch
# User davidvanzessen
# Date 1458141469 14400
# Node ID 67627d77d63b18646fab2451a2cb9f4ed1bbe90a
# Parent 5ba0377b77374dd889ad847a9083cd42988aa45d
Uploaded
diff -r 5ba0377b7737 -r 67627d77d63b report_clonality/RScript.r
--- a/report_clonality/RScript.r Fri Jan 29 08:10:21 2016 -0500
+++ b/report_clonality/RScript.r Wed Mar 16 11:17:49 2016 -0400
@@ -646,6 +646,8 @@
PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb
}
+ num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) }
+
newData = data.frame(data.table(PRODF)[,list(unique=.N,
VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
P1=mean(.SD$P3V.nt.nb, na.rm=T),
@@ -671,7 +673,32 @@
mean(.SD$P5J.nt.nb, na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
- write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+ write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+
+ newData = data.frame(data.table(PRODF)[,list(unique=.N,
+ VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+ P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+ N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+ P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+ DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+ DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+ P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+ N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+ P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+ DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+ Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb,
+ .SD$X5D.REGION.trimmed.nt.nb,
+ .SD$X3D.REGION.trimmed.nt.nb,
+ .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+ Total.N=num_median( c(.SD$N1.REGION.nt.nb,
+ .SD$N2.REGION.nt.nb), na.rm=T),
+ Total.P=num_median(c(.SD$P3V.nt.nb,
+ .SD$P5D.nt.nb,
+ .SD$P3D.nt.nb,
+ .SD$P5J.nt.nb), na.rm=T)),
+ by=c("Sample")])
+ newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+ write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
newData = data.frame(data.table(UNPROD)[,list(unique=.N,
VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
@@ -696,7 +723,33 @@
mean(.SD$P5J.nt.nb, na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
- write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+ write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+
+ newData = data.frame(data.table(UNPROD)[,list(unique=.N,
+ VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+ P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+ N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
+ P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+ DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+ DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+ P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+ N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
+ P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+ DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+ Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb,
+ .SD$X5D.REGION.trimmed.nt.nb,
+ .SD$X3D.REGION.trimmed.nt.nb,
+ .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
+ Total.N=num_median( c(.SD$N1.REGION.nt.nb,
+ .SD$N2.REGION.nt.nb), na.rm=T),
+ Total.P=num_median(c(.SD$P3V.nt.nb,
+ .SD$P5D.nt.nb,
+ .SD$P3D.nt.nb,
+ .SD$P5J.nt.nb), na.rm=T)),
+ by=c("Sample")])
+
+ newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+ write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
}
# ---------------------- AA composition in CDR3 ----------------------
diff -r 5ba0377b7737 -r 67627d77d63b report_clonality/r_wrapper.sh
--- a/report_clonality/r_wrapper.sh Fri Jan 29 08:10:21 2016 -0500
+++ b/report_clonality/r_wrapper.sh Wed Mar 16 11:17:49 2016 -0400
@@ -200,20 +200,34 @@
#hasJunctionData="$(if head -n 1 $inputFile | grep -qE '3V.REGION.trimmed.nt.nb'; then echo 'Yes'; else echo 'No'; fi)"
#if [[ "$hasJunctionData" == "Yes" ]] ; then
-if [ -a "$outputDir/junctionAnalysisProd.csv" ] ; then
+if [ -a "$outputDir/junctionAnalysisProd_mean.csv" ] ; then
echo "
" >> $outputFile
- echo "
ProductiveSample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P |
" >> $outputFile
+ echo " Productive meanSample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P |
" >> $outputFile
+ while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
+ do
+ echo "$Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP |
" >> $outputFile
+ done < $outputDir/junctionAnalysisProd_mean.csv
+ echo "
" >> $outputFile
+
+ echo " Unproductive meanSample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P |
" >> $outputFile
while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
do
echo "$Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP |
" >> $outputFile
- done < $outputDir/junctionAnalysisProd.csv
+ done < $outputDir/junctionAnalysisUnProd_mean.csv
echo "
" >> $outputFile
- echo " UnproductiveSample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P |
" >> $outputFile
+ echo " Productive medianSample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P |
" >> $outputFile
while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
do
echo "$Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP |
" >> $outputFile
- done < $outputDir/junctionAnalysisUnProd.csv
+ done < $outputDir/junctionAnalysisProd_median.csv
+ echo "
" >> $outputFile
+
+ echo " Unproductive medianSample | count | VH.DEL | P1 | N1 | P2 | DEL.DH | DH.DEL | P3 | N2 | P4 | DEL.JH | Total.Del | Total.N | Total.P |
" >> $outputFile
+ while IFS=, read Sample unique VHDEL P1 N1 P2 DELDH DHDEL P3 N2 P4 DELJH TotalDel TotalN TotalP
+ do
+ echo "$Sample | $unique | $VHDEL | $P1 | $N1 | $P2 | $DELDH | $DHDEL | $P3 | $N2 | $P4 | $DELJH | $TotalDel | $TotalN | $TotalP |
" >> $outputFile
+ done < $outputDir/junctionAnalysisUnProd_median.csv
echo "
" >> $outputFile
echo "" >> $outputFile