comparison report_clonality/RScript.r @ 55:67627d77d63b draft

Uploaded
author davidvanzessen
date Wed, 16 Mar 2016 11:17:49 -0400
parents 5ba0377b7737
children
comparison
equal deleted inserted replaced
54:5ba0377b7737 55:67627d77d63b
644 print(names(PRODF)) 644 print(names(PRODF))
645 print(head(PRODF$N.REGION.nt.nb, 30)) 645 print(head(PRODF$N.REGION.nt.nb, 30))
646 PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb 646 PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb
647 } 647 }
648 648
649 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) }
650
649 newData = data.frame(data.table(PRODF)[,list(unique=.N, 651 newData = data.frame(data.table(PRODF)[,list(unique=.N,
650 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 652 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
651 P1=mean(.SD$P3V.nt.nb, na.rm=T), 653 P1=mean(.SD$P3V.nt.nb, na.rm=T),
652 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), 654 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T),
653 P2=mean(.SD$P5D.nt.nb, na.rm=T), 655 P2=mean(.SD$P5D.nt.nb, na.rm=T),
669 mean(.SD$P5D.nt.nb, na.rm=T) + 671 mean(.SD$P5D.nt.nb, na.rm=T) +
670 mean(.SD$P3D.nt.nb, na.rm=T) + 672 mean(.SD$P3D.nt.nb, na.rm=T) +
671 mean(.SD$P5J.nt.nb, na.rm=T))), 673 mean(.SD$P5J.nt.nb, na.rm=T))),
672 by=c("Sample")]) 674 by=c("Sample")])
673 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 675 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
674 write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 676 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
677
678 newData = data.frame(data.table(PRODF)[,list(unique=.N,
679 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
680 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
681 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
682 P2=num_median(.SD$P5D.nt.nb, na.rm=T),
683 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
684 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
685 P3=num_median(.SD$P3D.nt.nb, na.rm=T),
686 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
687 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
688 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
689 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb,
690 .SD$X5D.REGION.trimmed.nt.nb,
691 .SD$X3D.REGION.trimmed.nt.nb,
692 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
693 Total.N=num_median( c(.SD$N1.REGION.nt.nb,
694 .SD$N2.REGION.nt.nb), na.rm=T),
695 Total.P=num_median(c(.SD$P3V.nt.nb,
696 .SD$P5D.nt.nb,
697 .SD$P3D.nt.nb,
698 .SD$P5J.nt.nb), na.rm=T)),
699 by=c("Sample")])
700 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
701 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
675 702
676 newData = data.frame(data.table(UNPROD)[,list(unique=.N, 703 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
677 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 704 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
678 P1=mean(.SD$P3V.nt.nb, na.rm=T), 705 P1=mean(.SD$P3V.nt.nb, na.rm=T),
679 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), 706 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T),
694 mean(.SD$P5D.nt.nb, na.rm=T) + 721 mean(.SD$P5D.nt.nb, na.rm=T) +
695 mean(.SD$P3D.nt.nb, na.rm=T) + 722 mean(.SD$P3D.nt.nb, na.rm=T) +
696 mean(.SD$P5J.nt.nb, na.rm=T))), 723 mean(.SD$P5J.nt.nb, na.rm=T))),
697 by=c("Sample")]) 724 by=c("Sample")])
698 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 725 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
699 write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 726 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
727
728 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
729 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
730 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
731 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T),
732 P2=num_median(.SD$P5D.nt.nb, na.rm=T),
733 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
734 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
735 P3=num_median(.SD$P3D.nt.nb, na.rm=T),
736 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T),
737 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
738 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
739 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb,
740 .SD$X5D.REGION.trimmed.nt.nb,
741 .SD$X3D.REGION.trimmed.nt.nb,
742 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
743 Total.N=num_median( c(.SD$N1.REGION.nt.nb,
744 .SD$N2.REGION.nt.nb), na.rm=T),
745 Total.P=num_median(c(.SD$P3V.nt.nb,
746 .SD$P5D.nt.nb,
747 .SD$P3D.nt.nb,
748 .SD$P5J.nt.nb), na.rm=T)),
749 by=c("Sample")])
750
751 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
752 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
700 } 753 }
701 754
702 # ---------------------- AA composition in CDR3 ---------------------- 755 # ---------------------- AA composition in CDR3 ----------------------
703 756
704 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] 757 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]