changeset 11:bc4612998d50 draft

Uploaded
author davidvanzessen
date Mon, 06 Oct 2014 05:57:55 -0400
parents 974febc99fd4
children eb5b569b44dd
files RScript.r
diffstat 1 files changed, 13 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/RScript.r	Wed Oct 01 08:11:47 2014 -0400
+++ b/RScript.r	Mon Oct 06 05:57:55 2014 -0400
@@ -25,7 +25,19 @@
 
 str(dat)
 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T)
-dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
+#dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
+
+most.common = function(x){
+  ux = unique(x)
+  if(length(ux) > 1){
+    xtdf = data.frame(table(x))
+    return(xtdf$Var1[which.max(xtdf$Freq)])
+    #print(xtdf)
+  }
+  return(unique(x))
+}
+
+dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")])
 
 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
 dat$Frequency = ((10^dat$Log10_Frequency)*100)