changeset 18:f23d3be6fbc8 draft

Uploaded
author davidvanzessen
date Thu, 19 Feb 2015 09:52:28 -0500
parents 7a9debac9a97
children 5f7ed60975bd
files RScript.r
diffstat 1 files changed, 27 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/RScript.r	Thu Feb 05 08:22:19 2015 -0500
+++ b/RScript.r	Thu Feb 19 09:52:28 2015 -0500
@@ -66,6 +66,8 @@
   if(on == "Frequency"){
     onShort = "freq"
   }
+  onx = paste(on, ".x", sep="")
+  ony = paste(on, ".y", sep="")
   splt = split(x, x$Sample, drop=T)
   type="pair"
   if(length(splt) == 1){
@@ -108,6 +110,7 @@
   
   #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
   patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
+  patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
   res1 = vector()
   res2 = vector()
   resBoth = vector()
@@ -122,9 +125,10 @@
     threshhold = product[iter,threshholdIndex]
     V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
     J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
-    both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold)
-    one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x))
-    two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x))
+    #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
+    both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold
+    one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence))
+    two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence))
     read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
     read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
     res1 = append(res1, sum(one))
@@ -226,6 +230,9 @@
   if(on == "Frequency"){
     onShort = "freq"
   }
+  onx = paste(on, ".x", sep="")
+  ony = paste(on, ".y", sep="")
+  onz = paste(on, ".z", sep="")
   type="triplet"
   
   threshholdIndex = which(colnames(product) == "interval")
@@ -249,6 +256,7 @@
   patientMerge = merge(patient1, patient2, by="merge")
   patientMerge = merge(patientMerge, patient3, by="merge")
   colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="")
+  patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
   res1 = vector()
   res2 = vector()
   res3 = vector()
@@ -264,14 +272,15 @@
     threshhold = product[iter,threshholdIndex]
     V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
     J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
-    all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold)
-    one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x))
-    two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x))
-    three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x))
+    #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) 
+    all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
+    one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
+    two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
+    three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
     
-    read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
-    read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
-    read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count))
+    read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
+    read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
+    read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
     res1 = append(res1, sum(one))
     res2 = append(res2, sum(two))
     res3 = append(res3, sum(three))
@@ -279,27 +288,27 @@
     #threshhold = 0
     if(threshhold != 0){
       if(sum(one) > 0){
-        dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
-        colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
+        dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
+        colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
         filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
         write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
       }
       if(sum(two) > 0){
-        dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
-        colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
+        dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
+        colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
         filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
         write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
       }
       if(sum(three) > 0){
-        dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
-        colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
+        dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
+        colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
         filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
         write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
       }
     }
     if(sum(all) > 0){
-      dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
-      colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
+      dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
+      colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
       filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
       write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
     }