changeset 51:17e677c72e49 draft

Uploaded
author davidvanzessen
date Fri, 09 Oct 2015 06:58:17 -0400
parents 7dd7cefcf72d
children c5c2a790d476
files RScript.r
diffstat 1 files changed, 30 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/RScript.r	Thu Oct 08 10:07:28 2015 -0400
+++ b/RScript.r	Fri Oct 09 06:58:17 2015 -0400
@@ -65,6 +65,9 @@
 
 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
 
+patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
+patient.merge.list.second = list()
+
 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
   if (!is.data.frame(x) & is.list(x)){
     x = x[[1]]
@@ -110,7 +113,7 @@
   if(appendtxt){
     cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
   }
-  cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
+  cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
   
   if(mergeOn == "Clone_Sequence"){
     patient1$merge = paste(patient1$Clone_Sequence)
@@ -131,11 +134,19 @@
 
   cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
 
-  
-  #fuzzy matching here...
-  if(mergeOn == "Clone_Sequence"){
+  start.time = proc.time()
+  merge.list = c()
+
+  if(patient %in% names(patient.merge.list)){
+    patientMerge = patient.merge.list[[patient]]
+    merge.list[["second"]] = patient.merge.list.second[[patient]]
+    cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
+
+    print(names(patient.merge.list))
+  } else {
+    #fuzzy matching here...
     #merge.list = patientMerge$merge
-    
+
     #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
     #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
 
@@ -144,19 +155,19 @@
 
     #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
     #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
-    
+
     #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
     #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
-    
+
     patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
     patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
-    
+
     #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
     #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
-    
+
     #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
     #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
-    
+
     patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
     patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
 
@@ -170,15 +181,15 @@
       first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
       first.sample = patient.fuzzy[1,"Sample"]
       merge.filter = first.merge == patient.fuzzy$merge
-      
+
       #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
-      
+
       first.sample.filter = first.sample == patient.fuzzy$Sample
       second.sample.filter = first.sample != patient.fuzzy$Sample
 
       #first match same sample, sum to a single row, same for other sample
       #then merge rows like 'normal'
-      
+
       sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
 
 
@@ -266,9 +277,13 @@
         patient.fuzzy = patient.fuzzy[-1,]
       }
     }
-    
+    patient.merge.list[[patient]] <<- patientMerge
+    patient.merge.list.second[[patient]] <<- merge.list[["second"]]
+    cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
   }
-  
+
+  print(names(patient.merge.list))
+
   
   patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
   res1 = vector()