comparison report_clonality/RScript.r @ 62:14ea4c464435 draft

Uploaded
author davidvanzessen
date Thu, 28 Apr 2016 05:24:14 -0400
parents 11ec9edfefee
children bd6fb6c03948
comparison
equal deleted inserted replaced
61:1349412c89c6 62:14ea4c464435
47 47
48 print("Report Clonality - Data preperation") 48 print("Report Clonality - Data preperation")
49 49
50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="")
51 51
52 print(paste("nrows: ", nrow(inputdata)))
53
52 setwd(outdir) 54 setwd(outdir)
53 55
54 # remove weird rows 56 # remove weird rows
55 inputdata = inputdata[inputdata$Sample != "",] 57 inputdata = inputdata[inputdata$Sample != "",]
58
59 print(paste("nrows: ", nrow(inputdata)))
56 60
57 #remove the allele from the V,D and J genes 61 #remove the allele from the V,D and J genes
58 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) 62 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene)
59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) 63 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene)
60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) 64 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene)
61 65
66 print(paste("nrows: ", nrow(inputdata)))
67
62 #filter uniques 68 #filter uniques
63 inputdata.removed = inputdata[NULL,] 69 inputdata.removed = inputdata[NULL,]
70
71 print(paste("nrows: ", nrow(inputdata)))
64 72
65 inputdata$clonaltype = 1:nrow(inputdata) 73 inputdata$clonaltype = 1:nrow(inputdata)
66 74
67 #keep track of the count of sequences in samples or samples/replicates for the front page overview 75 #keep track of the count of sequences in samples or samples/replicates for the front page overview
68 input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")]) 76 input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")])
142 150
143 print("Report Clonality - counting productive/unproductive/unique") 151 print("Report Clonality - counting productive/unproductive/unique")
144 152
145 #create the table on the overview page with the productive/unique counts per sample/replicate 153 #create the table on the overview page with the productive/unique counts per sample/replicate
146 #first for sample 154 #first for sample
147 sample.count = merge(input.sample.count, prod.sample.count, by="Sample") 155 sample.count = merge(input.sample.count, prod.sample.count, by="Sample", all.x=T)
148 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) 156 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100)
149 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample") 157 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T)
150 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) 158 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100)
151 159
152 sample.count = merge(sample.count , unprod.sample.count, by="Sample") 160 sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T)
161 print(sample.count)
153 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) 162 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100)
154 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample") 163 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T)
155 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) 164 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100)
156 165
166 print(sample.count)
167
157 #then sample/replicate 168 #then sample/replicate
158 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate")) 169 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T)
159 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) 170 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100)
160 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate")) 171 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T)
161 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100) 172 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100)
162 173
163 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate")) 174 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate"), all.x=T)
164 rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100) 175 rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100)
165 rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate")) 176 rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T)
166 rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100) 177 rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100)
167 178
168 rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_") 179 rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_")
169 rep.count = rep.count[,names(rep.count) != "Replicate"] 180 rep.count = rep.count[,names(rep.count) != "Replicate"]
170 181
171 count = rbind(sample.count, rep.count) 182 count = rbind(sample.count, rep.count)
183
184
172 185
173 write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) 186 write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F)
174 187
175 # ---------------------- Frequency calculation for V, D and J ---------------------- 188 # ---------------------- Frequency calculation for V, D and J ----------------------
176 189
625 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) 638 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
626 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) 639 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
627 } 640 }
628 } 641 }
629 642
643 bak = PRODF
644
630 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") 645 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")
631 if(all(imgtcolumns %in% colnames(inputdata))) 646 if(all(imgtcolumns %in% colnames(inputdata)))
632 { 647 {
633 print("found IMGT columns, running junction analysis") 648 print("found IMGT columns, running junction analysis")
634 649
635 if(locus %in% c("IGK","IGL", "TRA", "TRG")){ 650 if(locus %in% c("IGK","IGL", "TRA", "TRG")){
636 print("VJ recombination, using N column for junction analysis") 651 print("VJ recombination, no filtering on absent D")
637 print(names(PRODF)) 652 } else {
638 print(head(PRODF$N.REGION.nt.nb, 30)) 653 print("VDJ recombination, using N column for junction analysis")
639 PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb 654 fltr = nchar(PRODF$Top.D.Gene) < 4
655 print(paste("Removing", sum(fltr), "sequences without a identified D"))
656 PRODF = PRODF[!fltr,]
640 } 657 }
641 658
642 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) } 659 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) }
660
661 if(locus %in% c("IGH", "TRB", "TRD")){
662 PRODF$new.n = PRODF$N1.REGION.nt.nb + PRODF$N2.REGION.nt.nb
663 } else {
664 PRODF$new.n = PRODF$N.REGION.nt.nb
665 }
643 666
644 newData = data.frame(data.table(PRODF)[,list(unique=.N, 667 newData = data.frame(data.table(PRODF)[,list(unique=.N,
645 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 668 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
646 P1=mean(.SD$P3V.nt.nb, na.rm=T), 669 P1=mean(.SD$P3V.nt.nb, na.rm=T),
647 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), 670 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
648 P2=mean(.SD$P5D.nt.nb, na.rm=T), 671 P2=mean(.SD$P5D.nt.nb, na.rm=T),
649 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), 672 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
650 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), 673 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
651 P3=mean(.SD$P3D.nt.nb, na.rm=T), 674 P3=mean(.SD$P3D.nt.nb, na.rm=T),
652 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T), 675 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
653 P4=mean(.SD$P5J.nt.nb, na.rm=T), 676 P4=mean(.SD$P5J.nt.nb, na.rm=T),
654 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 677 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
655 Total.Del=( mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) + 678 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
656 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) + 679 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
657 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) + 680 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
658 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)),
659
660 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) +
661 mean(.SD$N2.REGION.nt.nb, na.rm=T)),
662
663 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) +
664 mean(.SD$P5D.nt.nb, na.rm=T) +
665 mean(.SD$P3D.nt.nb, na.rm=T) +
666 mean(.SD$P5J.nt.nb, na.rm=T))),
667 by=c("Sample")]) 681 by=c("Sample")])
668 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 682 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
669 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 683 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
670 684
671 newData = data.frame(data.table(PRODF)[,list(unique=.N, 685 newData = data.frame(data.table(PRODF)[,list(unique=.N,
672 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 686 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
673 P1=num_median(.SD$P3V.nt.nb, na.rm=T), 687 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
674 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), 688 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
675 P2=num_median(.SD$P5D.nt.nb, na.rm=T), 689 P2=num_median(.SD$P5D.nt.nb, na.rm=T),
676 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), 690 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
677 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), 691 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
678 P3=num_median(.SD$P3D.nt.nb, na.rm=T), 692 P3=num_median(.SD$P3D.nt.nb, na.rm=T),
679 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), 693 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
680 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 694 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
681 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 695 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
682 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 696 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
683 .SD$X5D.REGION.trimmed.nt.nb, 697 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
684 .SD$X3D.REGION.trimmed.nt.nb, 698 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
685 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
686 Total.N=num_median( c(.SD$N1.REGION.nt.nb,
687 .SD$N2.REGION.nt.nb), na.rm=T),
688 Total.P=num_median(c(.SD$P3V.nt.nb,
689 .SD$P5D.nt.nb,
690 .SD$P3D.nt.nb,
691 .SD$P5J.nt.nb), na.rm=T)),
692 by=c("Sample")]) 699 by=c("Sample")])
693 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 700 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
694 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 701 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
695 702
696 newData = data.frame(data.table(UNPROD)[,list(unique=.N, 703 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
697 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 704 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
698 P1=mean(.SD$P3V.nt.nb, na.rm=T), 705 P1=mean(.SD$P3V.nt.nb, na.rm=T),
699 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), 706 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
700 P2=mean(.SD$P5D.nt.nb, na.rm=T), 707 P2=mean(.SD$P5D.nt.nb, na.rm=T),
701 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), 708 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
702 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), 709 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
703 P3=mean(.SD$P3D.nt.nb, na.rm=T), 710 P3=mean(.SD$P3D.nt.nb, na.rm=T),
704 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T), 711 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
705 P4=mean(.SD$P5J.nt.nb, na.rm=T), 712 P4=mean(.SD$P5J.nt.nb, na.rm=T),
706 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 713 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
707 Total.Del=(mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) + 714 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
708 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) + 715 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
709 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) + 716 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
710 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)),
711 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) +
712 mean(.SD$N2.REGION.nt.nb, na.rm=T)),
713 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) +
714 mean(.SD$P5D.nt.nb, na.rm=T) +
715 mean(.SD$P3D.nt.nb, na.rm=T) +
716 mean(.SD$P5J.nt.nb, na.rm=T))),
717 by=c("Sample")]) 717 by=c("Sample")])
718 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 718 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
719 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 719 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
720 720
721 newData = data.frame(data.table(UNPROD)[,list(unique=.N, 721 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
722 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 722 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
723 P1=num_median(.SD$P3V.nt.nb, na.rm=T), 723 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
724 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), 724 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T),
725 P2=num_median(.SD$P5D.nt.nb, na.rm=T), 725 P2=num_median(.SD$P5D.nt.nb, na.rm=T),
726 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), 726 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
727 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), 727 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
728 P3=num_median(.SD$P3D.nt.nb, na.rm=T), 728 P3=num_median(.SD$P3D.nt.nb, na.rm=T),
729 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), 729 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
730 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 730 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
731 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 731 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
732 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, 732 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
733 .SD$X5D.REGION.trimmed.nt.nb, 733 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T),
734 .SD$X3D.REGION.trimmed.nt.nb, 734 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)),
735 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T),
736 Total.N=num_median( c(.SD$N1.REGION.nt.nb,
737 .SD$N2.REGION.nt.nb), na.rm=T),
738 Total.P=num_median(c(.SD$P3V.nt.nb,
739 .SD$P5D.nt.nb,
740 .SD$P3D.nt.nb,
741 .SD$P5J.nt.nb), na.rm=T)),
742 by=c("Sample")]) 735 by=c("Sample")])
743 736
744 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 737 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
745 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 738 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
746 } 739 }
740
741 PRODF = bak
747 742
748 # ---------------------- AA composition in CDR3 ---------------------- 743 # ---------------------- AA composition in CDR3 ----------------------
749 744
750 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] 745 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]
751 746