comparison RScript.r @ 25:ea5c2a2cc1f3 draft

Uploaded
author davidvanzessen
date Mon, 09 Feb 2015 05:02:33 -0500
parents 5454af6fece1
children 01f05258f672
comparison
equal deleted inserted replaced
24:5454af6fece1 25:ea5c2a2cc1f3
62 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] 62 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ]
63 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] 63 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ]
64 } 64 }
65 } 65 }
66 66
67 clonalityFrame = PRODF
68
67 #remove duplicates based on the clonaltype 69 #remove duplicates based on the clonaltype
68 if(clonaltype != "none"){ 70 if(clonaltype != "none"){
69 clonaltype = paste(clonaltype, ",Sample", sep="") #add sample column to clonaltype, unique within samples 71 clonaltype = paste(clonaltype, ",Sample", sep="") #add sample column to clonaltype, unique within samples
70 PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":")) 72 PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":"))
71 PRODF = PRODF[!duplicated(PRODF$clonaltype), ] 73 PRODF = PRODF[!duplicated(PRODF$clonaltype), ]
74
75 #again for clonalityFrame but with sample+replicate
76 clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":"))
77 clonalityFrame$clonality_clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(paste(clonaltype, ",Replicate", sep=""), ","))], sep = ":"))
78 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$clonality_clonaltype), ]
72 } 79 }
73 80
74 PRODF$freq = 1 81 PRODF$freq = 1
75 82
76 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*" 83 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*"
441 448
442 # ---------------------- calculating the clonality score ---------------------- 449 # ---------------------- calculating the clonality score ----------------------
443 450
444 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available 451 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available
445 { 452 {
446 clonalityFrame = inputdata
447 if(clonaltype != "none"){
448 clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":"))
449 clonalityFrame$ReplicateConcat = paste(clonalityFrame$clonaltype, clonalityFrame$Sample, clonalityFrame$Replicate, sep = ":")
450 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$ReplicateConcat), ]
451 }
452 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) 453 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)
453 454
454 ClonalitySampleReplicatePrint <- function(dat){ 455 ClonalitySampleReplicatePrint <- function(dat){
455 write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) 456 write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
456 } 457 }
476 CT = read.table(tcct, sep="\t", header=TRUE) 477 CT = read.table(tcct, sep="\t", header=TRUE)
477 close(tcct) 478 close(tcct)
478 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) 479 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)
479 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight 480 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight
480 481
481 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) 482 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonality_clonaltype")])
482 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) 483 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])
483 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) 484 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)
484 ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads 485 ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads
485 486
486 ReplicatePrint <- function(dat){ 487 ReplicatePrint <- function(dat){