comparison report_clonality/RScript.r @ 64:55f18bf19d72 draft

Uploaded
author davidvanzessen
date Fri, 29 Apr 2016 09:08:45 -0400
parents bd6fb6c03948
children 7696dd687f39
comparison
equal deleted inserted replaced
63:bd6fb6c03948 64:55f18bf19d72
156 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) 156 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100)
157 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T) 157 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T)
158 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) 158 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100)
159 159
160 sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T) 160 sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T)
161 print(sample.count)
162 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) 161 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100)
163 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) 162 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T)
164 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) 163 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100)
165
166 print(sample.count)
167 164
168 #then sample/replicate 165 #then sample/replicate
169 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) 166 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T)
170 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) 167 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100)
171 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) 168 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T)
545 # ---------------------- calculating the clonality score ---------------------- 542 # ---------------------- calculating the clonality score ----------------------
546 543
547 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available 544 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available
548 { 545 {
549 print("Report Clonality - Clonality") 546 print("Report Clonality - Clonality")
547 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)
550 if(clonality_method == "boyd"){ 548 if(clonality_method == "boyd"){
551 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) 549 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T)
552 550
553 for (sample in samples){ 551 for (sample in samples){
554 res = data.frame(paste=character(0)) 552 res = data.frame(paste=character(0))
571 coincidence.table = data.frame(table(res$type)) 569 coincidence.table = data.frame(table(res$type))
572 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") 570 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq")
573 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) 571 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
574 } 572 }
575 } else { 573 } else {
576 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)
577
578 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) 574 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
575
576 #write files for every coincidence group of >1
577 samples = unique(clonalFreq$Sample)
578 for(sample in samples){
579 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,]
580 if(max(clonalFreqSample$Type) > 1){
581 for(i in 2:max(clonalFreqSample$Type)){
582 clonalFreqSample = clonalFreqSample[clonalFreqSample$Type == i,]
583 print(clonalFreqSample[clonalFreqSample$Type == i,])
584 PRODF.sub = PRODF[PRODF$clonaltype %in% clonalFreqSample$clonaltype,]
585 PRODF.sub = PRODF.sub[order(PRODF.sub$clonaltype),]
586 write.table(PRODF.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
587 }
588 }
589 }
590
579 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) 591 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
580 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count 592 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
581 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) 593 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
582 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") 594 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")
583 595
655 print(paste("Removing", sum(fltr), "sequences without a identified D")) 667 print(paste("Removing", sum(fltr), "sequences without a identified D"))
656 PRODF = PRODF[!fltr,] 668 PRODF = PRODF[!fltr,]
657 } 669 }
658 670
659 671
660 print(names(PRODF))
661 #ensure certain columns are in the data (files generated with older versions of IMGT Loader) 672 #ensure certain columns are in the data (files generated with older versions of IMGT Loader)
662 col.checks = c("N3.REGION.nt.nb", "N4.REGION.nt.nb") 673 col.checks = c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb")
663 for(col.check in col.checks){ 674 for(col.check in col.checks){
664 if(!(col.check %in% names(PRODF))){ 675 if(!(col.check %in% names(PRODF))){
665 print(paste(col.check, "not found adding new column")) 676 print(paste(col.check, "not found adding new column"))
666 if(nrow(PRODF) > 0){ #because R is anoying... 677 if(nrow(PRODF) > 0){ #because R is anoying...
667 PRODF[,col.check] = 0 678 PRODF[,col.check] = 0
673 } else { 684 } else {
674 UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) 685 UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0)))
675 } 686 }
676 } 687 }
677 } 688 }
678
679 print(names(PRODF))
680 689
681 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } 690 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) }
682 691
683 newData = data.frame(data.table(PRODF)[,list(unique=.N, 692 newData = data.frame(data.table(PRODF)[,list(unique=.N,
684 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 693 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),