Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 64:55f18bf19d72 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 29 Apr 2016 09:08:45 -0400 |
parents | bd6fb6c03948 |
children | 7696dd687f39 |
comparison
equal
deleted
inserted
replaced
63:bd6fb6c03948 | 64:55f18bf19d72 |
---|---|
156 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) | 156 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) |
157 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T) | 157 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T) |
158 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) | 158 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) |
159 | 159 |
160 sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T) | 160 sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T) |
161 print(sample.count) | |
162 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) | 161 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) |
163 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) | 162 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) |
164 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) | 163 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) |
165 | |
166 print(sample.count) | |
167 | 164 |
168 #then sample/replicate | 165 #then sample/replicate |
169 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) | 166 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) |
170 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) | 167 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) |
171 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) | 168 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) |
545 # ---------------------- calculating the clonality score ---------------------- | 542 # ---------------------- calculating the clonality score ---------------------- |
546 | 543 |
547 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available | 544 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available |
548 { | 545 { |
549 print("Report Clonality - Clonality") | 546 print("Report Clonality - Clonality") |
547 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) | |
550 if(clonality_method == "boyd"){ | 548 if(clonality_method == "boyd"){ |
551 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) | 549 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) |
552 | 550 |
553 for (sample in samples){ | 551 for (sample in samples){ |
554 res = data.frame(paste=character(0)) | 552 res = data.frame(paste=character(0)) |
571 coincidence.table = data.frame(table(res$type)) | 569 coincidence.table = data.frame(table(res$type)) |
572 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") | 570 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") |
573 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) | 571 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) |
574 } | 572 } |
575 } else { | 573 } else { |
576 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) | |
577 | |
578 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) | 574 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) |
575 | |
576 #write files for every coincidence group of >1 | |
577 samples = unique(clonalFreq$Sample) | |
578 for(sample in samples){ | |
579 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] | |
580 if(max(clonalFreqSample$Type) > 1){ | |
581 for(i in 2:max(clonalFreqSample$Type)){ | |
582 clonalFreqSample = clonalFreqSample[clonalFreqSample$Type == i,] | |
583 print(clonalFreqSample[clonalFreqSample$Type == i,]) | |
584 PRODF.sub = PRODF[PRODF$clonaltype %in% clonalFreqSample$clonaltype,] | |
585 PRODF.sub = PRODF.sub[order(PRODF.sub$clonaltype),] | |
586 write.table(PRODF.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | |
587 } | |
588 } | |
589 } | |
590 | |
579 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) | 591 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) |
580 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count | 592 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count |
581 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) | 593 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) |
582 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") | 594 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") |
583 | 595 |
655 print(paste("Removing", sum(fltr), "sequences without a identified D")) | 667 print(paste("Removing", sum(fltr), "sequences without a identified D")) |
656 PRODF = PRODF[!fltr,] | 668 PRODF = PRODF[!fltr,] |
657 } | 669 } |
658 | 670 |
659 | 671 |
660 print(names(PRODF)) | |
661 #ensure certain columns are in the data (files generated with older versions of IMGT Loader) | 672 #ensure certain columns are in the data (files generated with older versions of IMGT Loader) |
662 col.checks = c("N3.REGION.nt.nb", "N4.REGION.nt.nb") | 673 col.checks = c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb") |
663 for(col.check in col.checks){ | 674 for(col.check in col.checks){ |
664 if(!(col.check %in% names(PRODF))){ | 675 if(!(col.check %in% names(PRODF))){ |
665 print(paste(col.check, "not found adding new column")) | 676 print(paste(col.check, "not found adding new column")) |
666 if(nrow(PRODF) > 0){ #because R is anoying... | 677 if(nrow(PRODF) > 0){ #because R is anoying... |
667 PRODF[,col.check] = 0 | 678 PRODF[,col.check] = 0 |
673 } else { | 684 } else { |
674 UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) | 685 UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) |
675 } | 686 } |
676 } | 687 } |
677 } | 688 } |
678 | |
679 print(names(PRODF)) | |
680 | 689 |
681 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } | 690 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } |
682 | 691 |
683 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 692 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
684 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 693 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |