comparison report_clonality/RScript.r @ 10:edbf4fba5fc7 draft

Uploaded
author davidvanzessen
date Fri, 31 Jul 2015 08:08:05 -0400
parents f90fbc15b35a
children
comparison
equal deleted inserted replaced
9:079eed22fdb6 10:edbf4fba5fc7
99 } 99 }
100 100
101 101
102 102
103 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive 103 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive
104 write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) 104 write.table(PRODF, "allUnique.txt", sep=",",quote=F,row.names=F,col.names=T)
105 write.table(PRODF, "allUnique.csv", sep="\t",quote=F,row.names=F,col.names=T)
105 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) 106 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T)
106 107
107 #write the samples to a file 108 #write the samples to a file
108 sampleFile <- file("samples.txt") 109 sampleFile <- file("samples.txt")
109 un = unique(inputdata$Sample) 110 un = unique(inputdata$Sample)
552 } 553 }
553 554
554 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") 555 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")
555 if(all(imgtcolumns %in% colnames(inputdata))) 556 if(all(imgtcolumns %in% colnames(inputdata)))
556 { 557 {
558 print("MEAN P3V.nt.nb:")
559 print(PRODF$P3V.nt.nb)
560 print(mean(PRODF$P3V.nt.nb, na.rm=T))
561 print(head(PRODF))
557 newData = data.frame(data.table(PRODF)[,list(unique=.N, 562 newData = data.frame(data.table(PRODF)[,list(unique=.N,
558 VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T), 563 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
559 P1=mean(P3V.nt.nb, na.rm=T), 564 P1=mean(.SD$P3V.nt.nb, na.rm=T),
560 N1=mean(N1.REGION.nt.nb, na.rm=T), 565 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T),
561 P2=mean(P5D.nt.nb, na.rm=T), 566 P2=mean(.SD$P5D.nt.nb, na.rm=T),
562 DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T), 567 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
563 DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T), 568 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
564 P3=mean(P3D.nt.nb, na.rm=T), 569 P3=mean(.SD$P3D.nt.nb, na.rm=T),
565 N2=mean(N2.REGION.nt.nb, na.rm=T), 570 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T),
566 P4=mean(P5J.nt.nb, na.rm=T), 571 P4=mean(.SD$P5J.nt.nb, na.rm=T),
567 DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T), 572 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
568 Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) + 573 Total.Del=( mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) +
569 mean(X5D.REGION.trimmed.nt.nb, na.rm=T) + 574 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) +
570 mean(X3D.REGION.trimmed.nt.nb, na.rm=T) + 575 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) +
571 mean(X5J.REGION.trimmed.nt.nb, na.rm=T)), 576 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)),
572 577
573 Total.N=( mean(N1.REGION.nt.nb, na.rm=T) + 578 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) +
574 mean(N2.REGION.nt.nb, na.rm=T)), 579 mean(.SD$N2.REGION.nt.nb, na.rm=T)),
575 580
576 Total.P=( mean(P3V.nt.nb, na.rm=T) + 581 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) +
577 mean(P5D.nt.nb, na.rm=T) + 582 mean(.SD$P5D.nt.nb, na.rm=T) +
578 mean(P3D.nt.nb, na.rm=T) + 583 mean(.SD$P3D.nt.nb, na.rm=T) +
579 mean(P5J.nt.nb, na.rm=T))), 584 mean(.SD$P5J.nt.nb, na.rm=T))),
580 by=c("Sample")]) 585 by=c("Sample")])
581 write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 586 write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
582 587
583 newData = data.frame(data.table(UNPROD)[,list(unique=.N, 588 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
584 VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T), 589 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
585 P1=mean(P3V.nt.nb, na.rm=T), 590 P1=mean(.SD$P3V.nt.nb, na.rm=T),
586 N1=mean(N1.REGION.nt.nb, na.rm=T), 591 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T),
587 P2=mean(P5D.nt.nb, na.rm=T), 592 P2=mean(.SD$P5D.nt.nb, na.rm=T),
588 DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T), 593 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
589 DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T), 594 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
590 P3=mean(P3D.nt.nb, na.rm=T), 595 P3=mean(.SD$P3D.nt.nb, na.rm=T),
591 N2=mean(N2.REGION.nt.nb, na.rm=T), 596 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T),
592 P4=mean(P5J.nt.nb, na.rm=T), 597 P4=mean(.SD$P5J.nt.nb, na.rm=T),
593 DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T), 598 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
594 Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) + 599 Total.Del=( mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) +
595 mean(X5D.REGION.trimmed.nt.nb, na.rm=T) + 600 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) +
596 mean(X3D.REGION.trimmed.nt.nb, na.rm=T) + 601 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) +
597 mean(X5J.REGION.trimmed.nt.nb, na.rm=T)), 602 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)),
598 603
599 Total.N=( mean(N1.REGION.nt.nb, na.rm=T) + 604 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) +
600 mean(N2.REGION.nt.nb, na.rm=T)), 605 mean(.SD$N2.REGION.nt.nb, na.rm=T)),
601 606
602 Total.P=( mean(P3V.nt.nb, na.rm=T) + 607 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) +
603 mean(P5D.nt.nb, na.rm=T) + 608 mean(.SD$P5D.nt.nb, na.rm=T) +
604 mean(P3D.nt.nb, na.rm=T) + 609 mean(.SD$P3D.nt.nb, na.rm=T) +
605 mean(P5J.nt.nb, na.rm=T))), 610 mean(.SD$P5J.nt.nb, na.rm=T))),
606 by=c("Sample")]) 611 by=c("Sample")])
607 write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 612 write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
608 } 613 }
609 614
610 # ---------------------- AA composition in CDR3 ---------------------- 615 # ---------------------- AA composition in CDR3 ----------------------