0
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 inFile = args[1]
|
|
4 outDir = args[2]
|
3
|
5 logfile = args[3]
|
|
6 min_freq = as.numeric(args[4])
|
|
7 min_cells = as.numeric(args[5])
|
29
|
8 mergeOn = args[6]
|
3
|
9
|
|
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
|
0
|
11
|
4
|
12 library(ggplot2)
|
|
13 library(reshape2)
|
|
14 library(data.table)
|
|
15 library(grid)
|
|
16 library(parallel)
|
0
|
17 #require(xtable)
|
3
|
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
|
13
|
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
|
9
|
20 dat = dat[!is.na(dat$Patient),]
|
13
|
21 dat$Related_to_leukemia_clone = F
|
9
|
22
|
0
|
23 setwd(outDir)
|
3
|
24 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
|
2
|
25 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
|
|
26 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
|
|
27
|
3
|
28 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
|
12
|
29
|
13
|
30 dat$Frequency = ((10^dat$Log10_Frequency)*100)
|
2
|
31
|
3
|
32 dat = dat[dat$Frequency >= min_freq,]
|
|
33
|
13
|
34 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
|
|
35
|
|
36 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
37
|
|
38 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
|
|
39 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
|
|
40 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
|
|
41
|
|
42 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
|
|
43 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
44
|
|
45 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
46
|
|
47 dat = merge(dat, min_cell_count, by="min_cell_paste")
|
|
48
|
|
49 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
50
|
3
|
51 dat = dat[dat$normalized_read_count >= min_cells,]
|
13
|
52
|
|
53 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
|
9
|
54
|
29
|
55 cat("<tr><td>Adding duplicate V+J+CDR3 sequences</td></tr>", file=logfile, append=T)
|
|
56 #remove duplicate V+J+CDR3, add together numerical values
|
|
57 dat= data.frame(data.table(dat)[, list(Receptor=unique(.SD$Receptor),
|
|
58 Cell_Count=unique(.SD$Cell_Count),
|
|
59 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
|
|
60 Total_Read_Count=sum(.SD$Total_Read_Count),
|
|
61 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
|
|
62 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
|
|
63 Frequency=sum(.SD$Frequency),
|
|
64 locus_V=unique(.SD$locus_V),
|
|
65 locus_J=unique(.SD$locus_J),
|
|
66 min_cell_count=unique(.SD$min_cell_count),
|
|
67 normalized_read_count=sum(.SD$normalized_read_count),
|
|
68 Log10_Frequency=sum(.SD$Log10_Frequency),
|
|
69 Clone_Sequence=.SD$Clone_Sequence[1],
|
|
70 min_cell_paste=.SD$min_cell_paste[1],
|
|
71 paste=unique(.SD$paste)), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
|
|
72
|
|
73
|
22
|
74 patients = split(dat, dat$Patient, drop=T)
|
9
|
75 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
|
6
|
76 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
|
0
|
77 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
|
|
78 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
|
|
79 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
|
|
80 Titles = factor(Titles, levels=Titles)
|
|
81 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
|
|
82
|
29
|
83 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
|
|
84
|
0
|
85 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
|
28
|
86 if (!is.data.frame(x) & is.list(x)){
|
|
87 x = x[[1]]
|
|
88 }
|
0
|
89 x$Sample = factor(x$Sample, levels=unique(x$Sample))
|
|
90 onShort = "reads"
|
|
91 if(on == "Frequency"){
|
|
92 onShort = "freq"
|
|
93 }
|
18
|
94 onx = paste(on, ".x", sep="")
|
|
95 ony = paste(on, ".y", sep="")
|
0
|
96 splt = split(x, x$Sample, drop=T)
|
4
|
97 type="pair"
|
2
|
98 if(length(splt) == 1){
|
3
|
99 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
|
4
|
100 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
|
|
101 type="single"
|
2
|
102 }
|
0
|
103 patient1 = splt[[1]]
|
|
104 patient2 = splt[[2]]
|
|
105
|
|
106 threshholdIndex = which(colnames(product) == "interval")
|
|
107 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
108 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
109 titleIndex = which(colnames(product) == "Titles")
|
|
110 sampleIndex = which(colnames(x) == "Sample")
|
|
111 patientIndex = which(colnames(x) == "Patient")
|
|
112 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
113 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
114 patient = paste(x[1,patientIndex])
|
3
|
115
|
0
|
116 switched = F
|
|
117 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
|
|
118 tmp = twoSample
|
|
119 twoSample = oneSample
|
|
120 oneSample = tmp
|
|
121 tmp = patient1
|
|
122 patient1 = patient2
|
|
123 patient2 = tmp
|
|
124 switched = T
|
|
125 }
|
|
126 if(appendtxt){
|
4
|
127 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
|
0
|
128 }
|
3
|
129 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
|
9
|
130
|
29
|
131 if(mergeOn == "Clone_Sequence"){
|
|
132 patient1$merge = paste(patient1$Clone_Sequence)
|
|
133 patient2$merge = paste(patient2$Clone_Sequence)
|
|
134 } else {
|
|
135 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
136 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
137 }
|
9
|
138
|
30
|
139 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
29
|
140 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
|
30
|
141 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
|
142 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
|
29
|
143 scatterplot_data$on = onShort
|
|
144
|
9
|
145 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
|
18
|
146 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
|
0
|
147 res1 = vector()
|
|
148 res2 = vector()
|
|
149 resBoth = vector()
|
|
150 read1Count = vector()
|
|
151 read2Count = vector()
|
2
|
152 locussum1 = vector()
|
|
153 locussum2 = vector()
|
9
|
154
|
0
|
155 #for(iter in 1){
|
|
156 for(iter in 1:length(product[,1])){
|
|
157 threshhold = product[iter,threshholdIndex]
|
|
158 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
159 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
160 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
|
29
|
161 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
|
30
|
162 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
|
|
163 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
|
14
|
164 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
|
|
165 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
|
0
|
166 res1 = append(res1, sum(one))
|
2
|
167 res2 = append(res2, sum(two))
|
0
|
168 resBoth = append(resBoth, sum(both))
|
2
|
169 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
|
|
170 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
|
0
|
171 #threshhold = 0
|
|
172 if(threshhold != 0){
|
|
173 if(sum(one) > 0){
|
15
|
174 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
175 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
176 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
177 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
178 }
|
|
179 if(sum(two) > 0){
|
15
|
180 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
181 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
182 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
183 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
184 }
|
29
|
185 } else {
|
|
186 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
|
187 if(nrow(scatterplot_locus_data) > 0){
|
|
188 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
|
|
189 }
|
30
|
190 in_both = (scatterplot_locus_data$merge %in% patientMerge[both,]$merge)
|
|
191 if(any(in_both)){
|
|
192 scatterplot_locus_data[in_both,]$type = "In Both"
|
|
193 }
|
|
194 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
|
|
195 not_in_one = !in_one
|
|
196 if(any(not_in_one)){
|
|
197 scatterplot_locus_data[not_in_one,]$type = twoSample
|
29
|
198 }
|
|
199 if(type == "single"){
|
|
200 single_patients <<- rbind(single_patients, scatterplot_locus_data)
|
|
201 }
|
|
202 p = NULL
|
|
203 if(nrow(scatterplot_locus_data) != 0){
|
|
204 if(on == "normalized_read_count"){
|
30
|
205 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
206 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
29
|
207 } else {
|
30
|
208 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
29
|
209 }
|
|
210 p = p + geom_point(aes(colour=type), position="jitter")
|
|
211 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
212 } else {
|
|
213 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
214 }
|
|
215 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
216 print(p)
|
|
217 dev.off()
|
0
|
218 }
|
|
219 if(sum(both) > 0){
|
15
|
220 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
221 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
0
|
222 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
223 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
29
|
224 }
|
0
|
225 }
|
2
|
226 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
|
0
|
227 if(sum(is.na(patientResult$percentage)) > 0){
|
|
228 patientResult[is.na(patientResult$percentage),]$percentage = 0
|
|
229 }
|
|
230 colnames(patientResult)[6] = oneSample
|
|
231 colnames(patientResult)[8] = twoSample
|
|
232 colnamesBak = colnames(patientResult)
|
2
|
233 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
|
0
|
234 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
235 colnames(patientResult) = colnamesBak
|
|
236
|
|
237 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
238 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
239
|
|
240 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
|
|
241 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
|
|
242 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
243 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
|
|
244 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
|
|
245 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
246 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
247 print(plt)
|
|
248 dev.off()
|
|
249 #(t,r,b,l)
|
|
250 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
|
|
251 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
|
|
252 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
253 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
|
|
254 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
|
|
255 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
256 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
257 print(plt)
|
|
258 dev.off()
|
|
259
|
|
260 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
|
|
261 patientResult$relativeValue = patientResult$value * 10
|
|
262 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
263 plt = ggplot(patientResult)
|
|
264 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
265 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
266 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
267 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
|
|
268 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
|
|
269 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
|
|
270 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
|
|
271 print(plt)
|
|
272 dev.off()
|
|
273 }
|
|
274
|
3
|
275 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
276
|
0
|
277 interval = intervalFreq
|
|
278 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
279 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
280 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
|
0
|
281
|
3
|
282 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
283
|
0
|
284 interval = intervalReads
|
|
285 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
286 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
287 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
|
0
|
288
|
3
|
289 cat("</table></html>", file=logfile, append=T)
|
|
290
|
33
|
291
|
|
292 if(nrow(single_patients) > 0){
|
|
293 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
294 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
|
295 p = p + geom_point(aes(colour=type), position="jitter")
|
|
296 p = p + xlab("In one or both samples") + ylab("Reads")
|
|
297 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
|
|
298 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
299 print(p)
|
|
300 dev.off()
|
7
|
301
|
33
|
302 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
|
303 p = p + geom_point(aes(colour=type), position="jitter")
|
|
304 p = p + xlab("In one or both samples") + ylab("Frequency")
|
|
305 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
306 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
307 print(p)
|
|
308 dev.off()
|
|
309 } else {
|
|
310 empty <- data.frame()
|
|
311 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
312
|
|
313 png("singles_reads_scatterplot.png", width=400, height=300)
|
|
314 print(p)
|
|
315 dev.off()
|
|
316
|
|
317 png("singles_freq_scatterplot.png", width=400, height=300)
|
|
318 print(p)
|
|
319 dev.off()
|
|
320 }
|
7
|
321 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
|
|
322 onShort = "reads"
|
|
323 if(on == "Frequency"){
|
|
324 onShort = "freq"
|
|
325 }
|
18
|
326 onx = paste(on, ".x", sep="")
|
|
327 ony = paste(on, ".y", sep="")
|
|
328 onz = paste(on, ".z", sep="")
|
7
|
329 type="triplet"
|
|
330
|
|
331 threshholdIndex = which(colnames(product) == "interval")
|
|
332 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
333 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
334 titleIndex = which(colnames(product) == "Titles")
|
|
335 sampleIndex = which(colnames(patient1) == "Sample")
|
|
336 patientIndex = which(colnames(patient1) == "Patient")
|
|
337 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
338 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
339 threeSample = paste(patient3[1,sampleIndex], sep="")
|
|
340
|
29
|
341 if(mergeOn == "Clone_Sequence"){
|
|
342 patient1$merge = paste(patient1$Clone_Sequence)
|
|
343 patient2$merge = paste(patient2$Clone_Sequence)
|
|
344 patient3$merge = paste(patient3$Clone_Sequence)
|
|
345
|
|
346 } else {
|
|
347 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
348 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
349 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
|
|
350 }
|
9
|
351
|
|
352 patientMerge = merge(patient1, patient2, by="merge")
|
|
353 patientMerge = merge(patientMerge, patient3, by="merge")
|
28
|
354 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
18
|
355 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
20
|
356 patientMerge12 = merge(patient1, patient2, by="merge")
|
|
357 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
358 patientMerge13 = merge(patient1, patient3, by="merge")
|
|
359 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
360 patientMerge23 = merge(patient2, patient3, by="merge")
|
|
361 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
362
|
28
|
363
|
30
|
364 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
20
|
365 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
|
30
|
366 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
27
|
367 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
|
20
|
368
|
7
|
369 res1 = vector()
|
|
370 res2 = vector()
|
|
371 res3 = vector()
|
20
|
372 res12 = vector()
|
|
373 res13 = vector()
|
|
374 res23 = vector()
|
7
|
375 resAll = vector()
|
|
376 read1Count = vector()
|
|
377 read2Count = vector()
|
|
378 read3Count = vector()
|
|
379
|
|
380 if(appendTriplets){
|
9
|
381 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
|
7
|
382 }
|
|
383 for(iter in 1:length(product[,1])){
|
|
384 threshhold = product[iter,threshholdIndex]
|
|
385 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
386 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
387 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
|
|
388 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
|
7
|
389
|
30
|
390 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
|
|
391 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
|
|
392 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
|
24
|
393
|
30
|
394 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
|
|
395 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
|
|
396 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
|
20
|
397
|
18
|
398 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
|
|
399 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
|
|
400 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
|
7
|
401 res1 = append(res1, sum(one))
|
|
402 res2 = append(res2, sum(two))
|
|
403 res3 = append(res3, sum(three))
|
|
404 resAll = append(resAll, sum(all))
|
20
|
405 res12 = append(res12, sum(one_two))
|
|
406 res13 = append(res13, sum(one_three))
|
|
407 res23 = append(res23, sum(two_three))
|
7
|
408 #threshhold = 0
|
|
409 if(threshhold != 0){
|
|
410 if(sum(one) > 0){
|
20
|
411 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
412 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
413 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
414 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
415 }
|
|
416 if(sum(two) > 0){
|
20
|
417 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
418 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
419 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
420 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
421 }
|
|
422 if(sum(three) > 0){
|
20
|
423 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
424 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
425 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
426 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
427 }
|
20
|
428 if(sum(one_two) > 0){
|
|
429 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
430 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
|
431 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
432 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
433 }
|
|
434 if(sum(one_three) > 0){
|
|
435 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
436 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
437 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
438 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
439 }
|
|
440 if(sum(two_three) > 0){
|
|
441 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
442 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
443 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
444 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
445 }
|
|
446 } else { #scatterplot data
|
24
|
447 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
30
|
448 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
|
27
|
449 if(sum(in_two) > 0){
|
|
450 scatterplot_locus_data[in_two,]$type = "In two"
|
|
451 }
|
30
|
452 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
|
27
|
453 if(sum(in_three)> 0){
|
|
454 scatterplot_locus_data[in_three,]$type = "In three"
|
|
455 }
|
|
456 not_in_one = scatterplot_locus_data$type != "In one"
|
|
457 if(sum(not_in_one) > 0){
|
|
458 scatterplot_locus_data[not_in_one,]$type = "In multiple"
|
|
459 }
|
20
|
460 p = NULL
|
|
461 if(nrow(scatterplot_locus_data) != 0){
|
|
462 if(on == "normalized_read_count"){
|
31
|
463 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
32
|
464 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
20
|
465 } else {
|
32
|
466 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
20
|
467 }
|
|
468 p = p + geom_point(aes(colour=type), position="jitter")
|
25
|
469 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
20
|
470 } else {
|
|
471 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
472 }
|
|
473 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
474 print(p)
|
|
475 dev.off()
|
|
476 }
|
7
|
477 if(sum(all) > 0){
|
20
|
478 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
|
|
479 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
7
|
480 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
481 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
482 }
|
|
483 }
|
20
|
484 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
|
|
485 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
|
7
|
486 colnames(patientResult)[6] = oneSample
|
20
|
487 colnames(patientResult)[7] = twoSample
|
|
488 colnames(patientResult)[8] = threeSample
|
|
489 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
|
|
490 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
|
|
491 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
|
7
|
492
|
|
493 colnamesBak = colnames(patientResult)
|
20
|
494 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
|
7
|
495 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
496 colnames(patientResult) = colnamesBak
|
|
497
|
|
498 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
499 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
500
|
|
501 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
|
|
502 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
|
|
503 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
504 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
|
|
505 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
|
|
506 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
507 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
|
|
508 print(plt)
|
|
509 dev.off()
|
|
510
|
|
511 fontSize = 4
|
|
512
|
|
513 bak = patientResult
|
|
514 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
|
|
515 patientResult$relativeValue = patientResult$value * 10
|
|
516 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
517 plt = ggplot(patientResult)
|
|
518 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
519 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
520 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
521 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
|
|
522 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
|
|
523 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
|
|
524 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
|
|
525 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
|
|
526 print(plt)
|
|
527 dev.off()
|
|
528 }
|
|
529
|
33
|
530 if(nrow(triplets) != 0){
|
|
531 triplets$uniqueID = "ID"
|
|
532
|
|
533 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
534 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
535 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
536
|
|
537 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
538 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
539 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
540
|
|
541 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
|
|
542
|
|
543 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
|
|
544 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
|
|
545 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
|
|
546
|
|
547 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
|
|
548 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
549
|
|
550 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
551
|
|
552 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
|
|
553
|
|
554 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
555
|
|
556 triplets = triplets[triplets$normalized_read_count >= min_cells,]
|
|
557
|
|
558 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
|
|
559
|
|
560 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
|
|
561
|
|
562 #remove duplicate V+J+CDR3, add together numerical values
|
|
563 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
|
|
564 Cell_Count=unique(.SD$Cell_Count),
|
|
565 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
|
|
566 Total_Read_Count=sum(.SD$Total_Read_Count),
|
|
567 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
|
|
568 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
|
|
569 Frequency=sum(.SD$Frequency),
|
|
570 normalized_read_count=sum(.SD$normalized_read_count),
|
|
571 Log10_Frequency=sum(.SD$Log10_Frequency),
|
|
572 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
|
|
573
|
|
574
|
|
575 interval = intervalReads
|
|
576 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
577 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
578
|
|
579 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
580 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
581 three = triplets[triplets$Sample == "24062_reg_BM",]
|
|
582 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
|
|
583
|
|
584 one = triplets[triplets$Sample == "16278_Left",]
|
|
585 two = triplets[triplets$Sample == "26402_Left",]
|
|
586 three = triplets[triplets$Sample == "26759_Left",]
|
|
587 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
|
|
588
|
|
589 one = triplets[triplets$Sample == "16278_Right",]
|
|
590 two = triplets[triplets$Sample == "26402_Right",]
|
|
591 three = triplets[triplets$Sample == "26759_Right",]
|
|
592 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
|
|
593
|
|
594
|
|
595 interval = intervalFreq
|
|
596 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
597 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
598
|
|
599 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
600 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
601 three = triplets[triplets$Sample == "24062_reg_BM",]
|
|
602 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
|
|
603
|
|
604 one = triplets[triplets$Sample == "16278_Left",]
|
|
605 two = triplets[triplets$Sample == "26402_Left",]
|
|
606 three = triplets[triplets$Sample == "26759_Left",]
|
|
607 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
|
|
608
|
|
609 one = triplets[triplets$Sample == "16278_Right",]
|
|
610 two = triplets[triplets$Sample == "26402_Right",]
|
|
611 three = triplets[triplets$Sample == "26759_Right",]
|
|
612 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
|
|
613 } else {
|
|
614 cat("", file="triplets.txt")
|
|
615 }
|