Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 62:14ea4c464435 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 28 Apr 2016 05:24:14 -0400 |
parents | 11ec9edfefee |
children | bd6fb6c03948 |
comparison
equal
deleted
inserted
replaced
61:1349412c89c6 | 62:14ea4c464435 |
---|---|
47 | 47 |
48 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") |
49 | 49 |
50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") | 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") |
51 | 51 |
52 print(paste("nrows: ", nrow(inputdata))) | |
53 | |
52 setwd(outdir) | 54 setwd(outdir) |
53 | 55 |
54 # remove weird rows | 56 # remove weird rows |
55 inputdata = inputdata[inputdata$Sample != "",] | 57 inputdata = inputdata[inputdata$Sample != "",] |
58 | |
59 print(paste("nrows: ", nrow(inputdata))) | |
56 | 60 |
57 #remove the allele from the V,D and J genes | 61 #remove the allele from the V,D and J genes |
58 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) | 62 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) |
59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) | 63 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) |
60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) | 64 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) |
61 | 65 |
66 print(paste("nrows: ", nrow(inputdata))) | |
67 | |
62 #filter uniques | 68 #filter uniques |
63 inputdata.removed = inputdata[NULL,] | 69 inputdata.removed = inputdata[NULL,] |
70 | |
71 print(paste("nrows: ", nrow(inputdata))) | |
64 | 72 |
65 inputdata$clonaltype = 1:nrow(inputdata) | 73 inputdata$clonaltype = 1:nrow(inputdata) |
66 | 74 |
67 #keep track of the count of sequences in samples or samples/replicates for the front page overview | 75 #keep track of the count of sequences in samples or samples/replicates for the front page overview |
68 input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")]) | 76 input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")]) |
142 | 150 |
143 print("Report Clonality - counting productive/unproductive/unique") | 151 print("Report Clonality - counting productive/unproductive/unique") |
144 | 152 |
145 #create the table on the overview page with the productive/unique counts per sample/replicate | 153 #create the table on the overview page with the productive/unique counts per sample/replicate |
146 #first for sample | 154 #first for sample |
147 sample.count = merge(input.sample.count, prod.sample.count, by="Sample") | 155 sample.count = merge(input.sample.count, prod.sample.count, by="Sample", all.x=T) |
148 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) | 156 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) |
149 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample") | 157 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T) |
150 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) | 158 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) |
151 | 159 |
152 sample.count = merge(sample.count , unprod.sample.count, by="Sample") | 160 sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T) |
161 print(sample.count) | |
153 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) | 162 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) |
154 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample") | 163 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) |
155 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) | 164 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) |
156 | 165 |
166 print(sample.count) | |
167 | |
157 #then sample/replicate | 168 #then sample/replicate |
158 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate")) | 169 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) |
159 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) | 170 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) |
160 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate")) | 171 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) |
161 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100) | 172 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100) |
162 | 173 |
163 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate")) | 174 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate"), all.x=T) |
164 rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100) | 175 rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100) |
165 rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate")) | 176 rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) |
166 rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100) | 177 rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100) |
167 | 178 |
168 rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_") | 179 rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_") |
169 rep.count = rep.count[,names(rep.count) != "Replicate"] | 180 rep.count = rep.count[,names(rep.count) != "Replicate"] |
170 | 181 |
171 count = rbind(sample.count, rep.count) | 182 count = rbind(sample.count, rep.count) |
183 | |
184 | |
172 | 185 |
173 write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) | 186 write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) |
174 | 187 |
175 # ---------------------- Frequency calculation for V, D and J ---------------------- | 188 # ---------------------- Frequency calculation for V, D and J ---------------------- |
176 | 189 |
625 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) | 638 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) |
626 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | 639 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) |
627 } | 640 } |
628 } | 641 } |
629 | 642 |
643 bak = PRODF | |
644 | |
630 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") | 645 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") |
631 if(all(imgtcolumns %in% colnames(inputdata))) | 646 if(all(imgtcolumns %in% colnames(inputdata))) |
632 { | 647 { |
633 print("found IMGT columns, running junction analysis") | 648 print("found IMGT columns, running junction analysis") |
634 | 649 |
635 if(locus %in% c("IGK","IGL", "TRA", "TRG")){ | 650 if(locus %in% c("IGK","IGL", "TRA", "TRG")){ |
636 print("VJ recombination, using N column for junction analysis") | 651 print("VJ recombination, no filtering on absent D") |
637 print(names(PRODF)) | 652 } else { |
638 print(head(PRODF$N.REGION.nt.nb, 30)) | 653 print("VDJ recombination, using N column for junction analysis") |
639 PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb | 654 fltr = nchar(PRODF$Top.D.Gene) < 4 |
655 print(paste("Removing", sum(fltr), "sequences without a identified D")) | |
656 PRODF = PRODF[!fltr,] | |
640 } | 657 } |
641 | 658 |
642 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) } | 659 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) } |
660 | |
661 if(locus %in% c("IGH", "TRB", "TRD")){ | |
662 PRODF$new.n = PRODF$N1.REGION.nt.nb + PRODF$N2.REGION.nt.nb | |
663 } else { | |
664 PRODF$new.n = PRODF$N.REGION.nt.nb | |
665 } | |
643 | 666 |
644 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 667 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
645 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 668 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
646 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 669 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
647 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), | 670 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
648 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 671 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
649 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 672 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
650 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 673 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
651 P3=mean(.SD$P3D.nt.nb, na.rm=T), | 674 P3=mean(.SD$P3D.nt.nb, na.rm=T), |
652 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T), | 675 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
653 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 676 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
654 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 677 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
655 Total.Del=( mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) + | 678 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
656 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) + | 679 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
657 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) + | 680 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
658 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)), | |
659 | |
660 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) + | |
661 mean(.SD$N2.REGION.nt.nb, na.rm=T)), | |
662 | |
663 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) + | |
664 mean(.SD$P5D.nt.nb, na.rm=T) + | |
665 mean(.SD$P3D.nt.nb, na.rm=T) + | |
666 mean(.SD$P5J.nt.nb, na.rm=T))), | |
667 by=c("Sample")]) | 681 by=c("Sample")]) |
668 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 682 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
669 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 683 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
670 | 684 |
671 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 685 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
672 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 686 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
673 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 687 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
674 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), | 688 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
675 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 689 P2=num_median(.SD$P5D.nt.nb, na.rm=T), |
676 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 690 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
677 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 691 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
678 P3=num_median(.SD$P3D.nt.nb, na.rm=T), | 692 P3=num_median(.SD$P3D.nt.nb, na.rm=T), |
679 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), | 693 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
680 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 694 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
681 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 695 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
682 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, | 696 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
683 .SD$X5D.REGION.trimmed.nt.nb, | 697 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
684 .SD$X3D.REGION.trimmed.nt.nb, | 698 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
685 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T), | |
686 Total.N=num_median( c(.SD$N1.REGION.nt.nb, | |
687 .SD$N2.REGION.nt.nb), na.rm=T), | |
688 Total.P=num_median(c(.SD$P3V.nt.nb, | |
689 .SD$P5D.nt.nb, | |
690 .SD$P3D.nt.nb, | |
691 .SD$P5J.nt.nb), na.rm=T)), | |
692 by=c("Sample")]) | 699 by=c("Sample")]) |
693 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 700 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
694 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 701 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
695 | 702 |
696 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 703 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
697 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 704 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
698 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 705 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
699 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), | 706 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
700 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 707 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
701 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 708 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
702 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 709 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
703 P3=mean(.SD$P3D.nt.nb, na.rm=T), | 710 P3=mean(.SD$P3D.nt.nb, na.rm=T), |
704 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T), | 711 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
705 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 712 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
706 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 713 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
707 Total.Del=(mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) + | 714 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
708 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) + | 715 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
709 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) + | 716 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
710 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)), | |
711 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) + | |
712 mean(.SD$N2.REGION.nt.nb, na.rm=T)), | |
713 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) + | |
714 mean(.SD$P5D.nt.nb, na.rm=T) + | |
715 mean(.SD$P3D.nt.nb, na.rm=T) + | |
716 mean(.SD$P5J.nt.nb, na.rm=T))), | |
717 by=c("Sample")]) | 717 by=c("Sample")]) |
718 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 718 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
719 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 719 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
720 | 720 |
721 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 721 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
722 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 722 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
723 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 723 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
724 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), | 724 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
725 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 725 P2=num_median(.SD$P5D.nt.nb, na.rm=T), |
726 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 726 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
727 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 727 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
728 P3=num_median(.SD$P3D.nt.nb, na.rm=T), | 728 P3=num_median(.SD$P3D.nt.nb, na.rm=T), |
729 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), | 729 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
730 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 730 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
731 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 731 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
732 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, | 732 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
733 .SD$X5D.REGION.trimmed.nt.nb, | 733 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
734 .SD$X3D.REGION.trimmed.nt.nb, | 734 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
735 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T), | |
736 Total.N=num_median( c(.SD$N1.REGION.nt.nb, | |
737 .SD$N2.REGION.nt.nb), na.rm=T), | |
738 Total.P=num_median(c(.SD$P3V.nt.nb, | |
739 .SD$P5D.nt.nb, | |
740 .SD$P3D.nt.nb, | |
741 .SD$P5J.nt.nb), na.rm=T)), | |
742 by=c("Sample")]) | 735 by=c("Sample")]) |
743 | 736 |
744 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 737 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
745 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 738 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
746 } | 739 } |
740 | |
741 PRODF = bak | |
747 | 742 |
748 # ---------------------- AA composition in CDR3 ---------------------- | 743 # ---------------------- AA composition in CDR3 ---------------------- |
749 | 744 |
750 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] | 745 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] |
751 | 746 |