# HG changeset patch # User davidvanzessen # Date 1424357548 18000 # Node ID f23d3be6fbc832780006d98ee9195fdff9013fac # Parent 7a9debac9a975af405c8981efcafbc3f1c36231b Uploaded diff -r 7a9debac9a97 -r f23d3be6fbc8 RScript.r --- a/RScript.r Thu Feb 05 08:22:19 2015 -0500 +++ b/RScript.r Thu Feb 19 09:52:28 2015 -0500 @@ -66,6 +66,8 @@ if(on == "Frequency"){ onShort = "freq" } + onx = paste(on, ".x", sep="") + ony = paste(on, ".y", sep="") splt = split(x, x$Sample, drop=T) type="pair" if(length(splt) == 1){ @@ -108,6 +110,7 @@ #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") + patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) res1 = vector() res2 = vector() resBoth = vector() @@ -122,9 +125,10 @@ threshhold = product[iter,threshholdIndex] V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") - both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) - one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) - two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) + #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold + both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold + one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) + two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) res1 = append(res1, sum(one)) @@ -226,6 +230,9 @@ if(on == "Frequency"){ onShort = "freq" } + onx = paste(on, ".x", sep="") + ony = paste(on, ".y", sep="") + onz = paste(on, ".z", sep="") type="triplet" threshholdIndex = which(colnames(product) == "interval") @@ -249,6 +256,7 @@ patientMerge = merge(patient1, patient2, by="merge") patientMerge = merge(patientMerge, patient3, by="merge") colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="") + patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) res1 = vector() res2 = vector() res3 = vector() @@ -264,14 +272,15 @@ threshhold = product[iter,threshholdIndex] V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") - all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) - one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) - two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) - three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) + #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) + all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) + one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) + two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) + three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) - read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) - read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) - read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count)) + read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) + read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) + read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) res1 = append(res1, sum(one)) res2 = append(res2, sum(two)) res3 = append(res3, sum(three)) @@ -279,27 +288,27 @@ #threshhold = 0 if(threshhold != 0){ if(sum(one) > 0){ - dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] - colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") + dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] + colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } if(sum(two) > 0){ - dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] - colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") + dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] + colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } if(sum(three) > 0){ - dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] - colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") + dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] + colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) } } if(sum(all) > 0){ - dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] - colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) + dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] + colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) }