Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 63:bd6fb6c03948 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 28 Apr 2016 08:11:04 -0400 |
parents | 14ea4c464435 |
children | 55f18bf19d72 |
comparison
equal
deleted
inserted
replaced
62:14ea4c464435 | 63:bd6fb6c03948 |
---|---|
654 fltr = nchar(PRODF$Top.D.Gene) < 4 | 654 fltr = nchar(PRODF$Top.D.Gene) < 4 |
655 print(paste("Removing", sum(fltr), "sequences without a identified D")) | 655 print(paste("Removing", sum(fltr), "sequences without a identified D")) |
656 PRODF = PRODF[!fltr,] | 656 PRODF = PRODF[!fltr,] |
657 } | 657 } |
658 | 658 |
659 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) } | 659 |
660 | 660 print(names(PRODF)) |
661 if(locus %in% c("IGH", "TRB", "TRD")){ | 661 #ensure certain columns are in the data (files generated with older versions of IMGT Loader) |
662 PRODF$new.n = PRODF$N1.REGION.nt.nb + PRODF$N2.REGION.nt.nb | 662 col.checks = c("N3.REGION.nt.nb", "N4.REGION.nt.nb") |
663 } else { | 663 for(col.check in col.checks){ |
664 PRODF$new.n = PRODF$N.REGION.nt.nb | 664 if(!(col.check %in% names(PRODF))){ |
665 } | 665 print(paste(col.check, "not found adding new column")) |
666 if(nrow(PRODF) > 0){ #because R is anoying... | |
667 PRODF[,col.check] = 0 | |
668 } else { | |
669 PRODF = cbind(PRODF, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) | |
670 } | |
671 if(nrow(UNPROD) > 0){ | |
672 UNPROD[,col.check] = 0 | |
673 } else { | |
674 UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) | |
675 } | |
676 } | |
677 } | |
678 | |
679 print(names(PRODF)) | |
680 | |
681 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } | |
666 | 682 |
667 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 683 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
668 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 684 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
669 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 685 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
670 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), | 686 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
671 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 687 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
672 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 688 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
673 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 689 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
674 P3=mean(.SD$P3D.nt.nb, na.rm=T), | 690 P3=mean(.SD$P3D.nt.nb, na.rm=T), |
675 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 691 N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
676 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 692 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
677 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 693 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
678 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 694 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
679 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 695 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
680 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), | 696 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))), |
681 by=c("Sample")]) | 697 by=c("Sample")]) |
682 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 698 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
683 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 699 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
684 | 700 |
685 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 701 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
686 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 702 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
687 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 703 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
688 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), | 704 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
689 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 705 P2=num_median(.SD$P5D.nt.nb, na.rm=T), |
690 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 706 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
691 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 707 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
692 P3=num_median(.SD$P3D.nt.nb, na.rm=T), | 708 P3=num_median(.SD$P3D.nt.nb, na.rm=T), |
693 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 709 N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
694 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 710 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
695 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 711 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
696 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 712 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
697 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 713 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
698 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), | 714 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))), |
699 by=c("Sample")]) | 715 by=c("Sample")]) |
700 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 716 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
701 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 717 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
702 | 718 |
703 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 719 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
704 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 720 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
705 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 721 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
706 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), | 722 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
707 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 723 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
708 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 724 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
709 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 725 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
710 P3=mean(.SD$P3D.nt.nb, na.rm=T), | 726 P3=mean(.SD$P3D.nt.nb, na.rm=T), |
711 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 727 N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
712 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 728 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
713 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 729 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
714 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 730 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
715 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 731 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
716 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), | 732 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))), |
717 by=c("Sample")]) | 733 by=c("Sample")]) |
718 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 734 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
719 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 735 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
720 | 736 |
721 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 737 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
722 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 738 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
723 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 739 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
724 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), | 740 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
725 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 741 P2=num_median(.SD$P5D.nt.nb, na.rm=T), |
726 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 742 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
727 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 743 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
728 P3=num_median(.SD$P3D.nt.nb, na.rm=T), | 744 P3=num_median(.SD$P3D.nt.nb, na.rm=T), |
729 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 745 N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
730 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 746 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
731 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 747 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
732 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 748 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
733 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), | 749 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
734 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), | 750 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))), |
735 by=c("Sample")]) | 751 by=c("Sample")]) |
736 | 752 |
737 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 753 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
738 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 754 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
739 } | 755 } |