Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples

--- a/RScript.r	Tue Jun 02 05:33:58 2015 -0400
+++ b/RScript.r	Fri Jul 24 05:33:02 2015 -0400
@@ -288,23 +288,36 @@

 cat("</table></html>", file=logfile, append=T)

-scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
-p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
-p = p + geom_point(aes(colour=type), position="jitter")
-p = p + xlab("In one or both samples") + ylab("Reads")
-p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
-png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
-print(p)
-dev.off()
+
+if(nrow(single_patients) > 0){
+	scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
+	p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
+	p = p + geom_point(aes(colour=type), position="jitter")
+	p = p + xlab("In one or both samples") + ylab("Reads")
+	p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
+	png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
+	print(p)
+	dev.off()

-p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
-p = p + geom_point(aes(colour=type), position="jitter")
-p = p + xlab("In one or both samples") + ylab("Frequency")
-p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
-png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
-print(p)
-dev.off()
-
+	p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
+	p = p + geom_point(aes(colour=type), position="jitter")
+	p = p + xlab("In one or both samples") + ylab("Frequency")
+	p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
+	png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
+	print(p)
+	dev.off()
+} else {
+	empty <- data.frame()
+	p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
+
+	png("singles_reads_scatterplot.png", width=400, height=300)
+	print(p)
+	dev.off()
+
+	png("singles_freq_scatterplot.png", width=400, height=300)
+	print(p)
+	dev.off()
+}
 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
   onShort = "reads"
   if(on == "Frequency"){
@@ -514,85 +527,89 @@
   dev.off()
 }

-triplets$uniqueID = "ID"
-
-triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
-triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
-triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
-
-triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
-triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
-triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
-
-triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
-
-triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
-triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
-min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
-
-triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
-min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
-
-min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
-
-triplets = merge(triplets, min_cell_count, by="min_cell_paste")
-
-triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
-
-triplets = triplets[triplets$normalized_read_count >= min_cells,]
-
-column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
-
-triplets = triplets[,!(colnames(triplets) %in% column_drops)]
-
-#remove duplicate V+J+CDR3, add together numerical values
-triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
-                                                 Cell_Count=unique(.SD$Cell_Count),
-                                                 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
-                                                 Total_Read_Count=sum(.SD$Total_Read_Count),
-                                                 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
-                                                 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
-                                                 Frequency=sum(.SD$Frequency),
-                                                 normalized_read_count=sum(.SD$normalized_read_count),
-                                                 Log10_Frequency=sum(.SD$Log10_Frequency),
-                                                 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
-
-
-interval = intervalReads
-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
-
-one = triplets[triplets$Sample == "14696_reg_BM",]
-two = triplets[triplets$Sample == "24536_reg_BM",]
-three = triplets[triplets$Sample == "24062_reg_BM",]
-tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
-
-one = triplets[triplets$Sample == "16278_Left",]
-two = triplets[triplets$Sample == "26402_Left",]
-three = triplets[triplets$Sample == "26759_Left",]
-tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
-
-one = triplets[triplets$Sample == "16278_Right",]
-two = triplets[triplets$Sample == "26402_Right",]
-three = triplets[triplets$Sample == "26759_Right",]
-tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
-
-
-interval = intervalFreq
-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
-
-one = triplets[triplets$Sample == "14696_reg_BM",]
-two = triplets[triplets$Sample == "24536_reg_BM",]
-three = triplets[triplets$Sample == "24062_reg_BM",]
-tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
-
-one = triplets[triplets$Sample == "16278_Left",]
-two = triplets[triplets$Sample == "26402_Left",]
-three = triplets[triplets$Sample == "26759_Left",]
-tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
-
-one = triplets[triplets$Sample == "16278_Right",]
-two = triplets[triplets$Sample == "26402_Right",]
-three = triplets[triplets$Sample == "26759_Right",]
-tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
+if(nrow(triplets) != 0){
+  triplets$uniqueID = "ID"
+
+  triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
+  triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
+  triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
+
+  triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
+  triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
+  triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
+
+  triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
+
+  triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
+  triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
+  min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
+
+  triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
+  min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
+
+  min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
+
+  triplets = merge(triplets, min_cell_count, by="min_cell_paste")
+
+  triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
+
+  triplets = triplets[triplets$normalized_read_count >= min_cells,]
+
+  column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
+
+  triplets = triplets[,!(colnames(triplets) %in% column_drops)]
+
+  #remove duplicate V+J+CDR3, add together numerical values
+  triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
+                                                   Cell_Count=unique(.SD$Cell_Count),
+                                                   Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
+                                                   Total_Read_Count=sum(.SD$Total_Read_Count),
+                                                   dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
+                                                   Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
+                                                   Frequency=sum(.SD$Frequency),
+                                                   normalized_read_count=sum(.SD$normalized_read_count),
+                                                   Log10_Frequency=sum(.SD$Log10_Frequency),
+                                                   Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
+
+
+  interval = intervalReads
+  intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
+  product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
+
+  one = triplets[triplets$Sample == "14696_reg_BM",]
+  two = triplets[triplets$Sample == "24536_reg_BM",]
+  three = triplets[triplets$Sample == "24062_reg_BM",]
+  tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
+
+  one = triplets[triplets$Sample == "16278_Left",]
+  two = triplets[triplets$Sample == "26402_Left",]
+  three = triplets[triplets$Sample == "26759_Left",]
+  tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
+
+  one = triplets[triplets$Sample == "16278_Right",]
+  two = triplets[triplets$Sample == "26402_Right",]
+  three = triplets[triplets$Sample == "26759_Right",]
+  tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
+
+
+  interval = intervalFreq
+  intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
+  product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
+
+  one = triplets[triplets$Sample == "14696_reg_BM",]
+  two = triplets[triplets$Sample == "24536_reg_BM",]
+  three = triplets[triplets$Sample == "24062_reg_BM",]
+  tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
+
+  one = triplets[triplets$Sample == "16278_Left",]
+  two = triplets[triplets$Sample == "26402_Left",]
+  three = triplets[triplets$Sample == "26759_Left",]
+  tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
+
+  one = triplets[triplets$Sample == "16278_Right",]
+  two = triplets[triplets$Sample == "26402_Right",]
+  three = triplets[triplets$Sample == "26759_Right",]
+  tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
+} else {
+  cat("", file="triplets.txt")
+}