0
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 inFile = args[1]
|
|
4 outDir = args[2]
|
3
|
5 logfile = args[3]
|
|
6 min_freq = as.numeric(args[4])
|
|
7 min_cells = as.numeric(args[5])
|
29
|
8 mergeOn = args[6]
|
3
|
9
|
|
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
|
0
|
11
|
4
|
12 library(ggplot2)
|
|
13 library(reshape2)
|
|
14 library(data.table)
|
|
15 library(grid)
|
|
16 library(parallel)
|
0
|
17 #require(xtable)
|
3
|
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
|
13
|
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
|
34
|
20 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
|
|
21 dat$dsPerM = 0
|
9
|
22 dat = dat[!is.na(dat$Patient),]
|
13
|
23 dat$Related_to_leukemia_clone = F
|
9
|
24
|
0
|
25 setwd(outDir)
|
3
|
26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
|
2
|
27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
|
|
28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
|
|
29
|
3
|
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
|
12
|
31
|
13
|
32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
|
2
|
33
|
3
|
34 dat = dat[dat$Frequency >= min_freq,]
|
|
35
|
13
|
36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
|
|
37
|
|
38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
39
|
|
40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
|
|
41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
|
|
42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
|
|
43
|
|
44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
|
|
45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
46
|
|
47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
34
|
48 print(paste("rows:", nrow(dat)))
|
13
|
49 dat = merge(dat, min_cell_count, by="min_cell_paste")
|
34
|
50 print(paste("rows:", nrow(dat)))
|
13
|
51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
52
|
3
|
53 dat = dat[dat$normalized_read_count >= min_cells,]
|
13
|
54
|
|
55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
|
9
|
56
|
22
|
57 patients = split(dat, dat$Patient, drop=T)
|
9
|
58 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
|
6
|
59 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
|
0
|
60 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
|
|
61 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
|
|
62 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
|
|
63 Titles = factor(Titles, levels=Titles)
|
|
64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
|
|
65
|
29
|
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
|
|
67
|
0
|
68 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
|
28
|
69 if (!is.data.frame(x) & is.list(x)){
|
|
70 x = x[[1]]
|
|
71 }
|
34
|
72 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
|
|
73 x = data.frame(x,stringsAsFactors=F)
|
0
|
74 onShort = "reads"
|
|
75 if(on == "Frequency"){
|
|
76 onShort = "freq"
|
|
77 }
|
18
|
78 onx = paste(on, ".x", sep="")
|
|
79 ony = paste(on, ".y", sep="")
|
0
|
80 splt = split(x, x$Sample, drop=T)
|
4
|
81 type="pair"
|
2
|
82 if(length(splt) == 1){
|
3
|
83 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
|
4
|
84 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
|
|
85 type="single"
|
2
|
86 }
|
0
|
87 patient1 = splt[[1]]
|
|
88 patient2 = splt[[2]]
|
|
89
|
|
90 threshholdIndex = which(colnames(product) == "interval")
|
|
91 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
92 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
93 titleIndex = which(colnames(product) == "Titles")
|
|
94 sampleIndex = which(colnames(x) == "Sample")
|
|
95 patientIndex = which(colnames(x) == "Patient")
|
|
96 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
97 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
98 patient = paste(x[1,patientIndex])
|
3
|
99
|
0
|
100 switched = F
|
|
101 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
|
|
102 tmp = twoSample
|
|
103 twoSample = oneSample
|
|
104 oneSample = tmp
|
|
105 tmp = patient1
|
|
106 patient1 = patient2
|
|
107 patient2 = tmp
|
|
108 switched = T
|
|
109 }
|
|
110 if(appendtxt){
|
4
|
111 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
|
0
|
112 }
|
3
|
113 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
|
9
|
114
|
29
|
115 if(mergeOn == "Clone_Sequence"){
|
|
116 patient1$merge = paste(patient1$Clone_Sequence)
|
|
117 patient2$merge = paste(patient2$Clone_Sequence)
|
|
118 } else {
|
|
119 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
120 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
121 }
|
9
|
122
|
30
|
123 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
29
|
124 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
|
30
|
125 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
|
126 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
|
29
|
127 scatterplot_data$on = onShort
|
|
128
|
49
|
129 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
|
|
130 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
|
|
131
|
|
132 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
|
|
133
|
34
|
134
|
|
135 #fuzzy matching here...
|
|
136 if(mergeOn == "Clone_Sequence"){
|
49
|
137 #merge.list = patientMerge$merge
|
34
|
138
|
49
|
139 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
|
|
140 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
|
|
141
|
|
142 patient1.fuzzy = patient1
|
|
143 patient2.fuzzy = patient2
|
|
144
|
36
|
145 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
|
|
146 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
|
|
147
|
48
|
148 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
|
|
149 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
|
|
150
|
|
151 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
|
|
152 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
|
34
|
153
|
49
|
154 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
|
|
155 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
|
34
|
156
|
49
|
157 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
|
|
158 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
|
34
|
159
|
37
|
160 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
|
|
161 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
|
49
|
162
|
|
163 merge.list = list()
|
|
164
|
|
165 merge.list[["second"]] = vector()
|
|
166
|
|
167
|
37
|
168 while(nrow(patient.fuzzy) > 1){
|
|
169 first.merge = patient.fuzzy[1,"merge"]
|
|
170 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
|
49
|
171 first.sample = patient.fuzzy[1,"Sample"]
|
37
|
172 merge.filter = first.merge == patient.fuzzy$merge
|
|
173
|
49
|
174 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
|
42
|
175
|
49
|
176 first.sample.filter = first.sample == patient.fuzzy$Sample
|
|
177 second.sample.filter = first.sample != patient.fuzzy$Sample
|
|
178
|
|
179 #first match same sample, sum to a single row, same for other sample
|
|
180 #then merge rows like 'normal'
|
43
|
181
|
48
|
182 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
|
49
|
183
|
|
184
|
|
185
|
44
|
186 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
|
49
|
187 first.match.filter = merge.filter & sequence.filter & first.sample.filter
|
|
188 second.match.filter = merge.filter & sequence.filter & second.sample.filter
|
|
189
|
|
190 first.rows = patient.fuzzy[first.match.filter,]
|
|
191 second.rows = patient.fuzzy[second.match.filter,]
|
|
192
|
|
193 first.sum = data.frame(merge = first.clone.sequence,
|
|
194 Patient = patient,
|
|
195 Receptor = first.rows[1,"Receptor"],
|
|
196 Sample = first.rows[1,"Sample"],
|
|
197 Cell_Count = first.rows[1,"Cell_Count"],
|
|
198 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
|
|
199 Log10_Frequency = log10(sum(first.rows$Frequency)),
|
|
200 Total_Read_Count = sum(first.rows$Total_Read_Count),
|
|
201 dsPerM = sum(first.rows$dsPerM),
|
|
202 J_Segment_Major_Gene = sort(table(first.rows$J_Segment_Major_Gene),decreasing=TRUE)[1],
|
|
203 V_Segment_Major_Gene = sort(table(first.rows$V_Segment_Major_Gene),decreasing=TRUE)[1],
|
|
204 Clone_Sequence = first.clone.sequence,
|
|
205 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
|
|
206 Related_to_leukemia_clone = F,
|
|
207 Frequency = sum(first.rows$Frequency),
|
|
208 locus_V = first.rows[1,"locus_V"],
|
|
209 locus_J = first.rows[1,"locus_J"],
|
|
210 min_cell_count = first.rows[1,"min_cell_count"],
|
|
211 normalized_read_count = sum(first.rows$normalized_read_count),
|
|
212 paste = first.rows[1,"paste"],
|
|
213 min_cell_paste = first.rows[1,"min_cell_paste"])
|
|
214
|
|
215 if(nrow(second.rows) > 0){
|
|
216 second.sum = data.frame(merge = first.clone.sequence,
|
|
217 Patient = patient,
|
|
218 Receptor = second.rows[1,"Receptor"],
|
|
219 Sample = second.rows[1,"Sample"],
|
|
220 Cell_Count = second.rows[1,"Cell_Count"],
|
|
221 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
|
|
222 Log10_Frequency = log10(sum(second.rows$Frequency)),
|
|
223 Total_Read_Count = sum(second.rows$Total_Read_Count),
|
|
224 dsPerM = sum(second.rows$dsPerM),
|
|
225 J_Segment_Major_Gene = sort(table(second.rows$J_Segment_Major_Gene),decreasing=TRUE)[1],
|
|
226 V_Segment_Major_Gene = sort(table(second.rows$V_Segment_Major_Gene),decreasing=TRUE)[1],
|
|
227 Clone_Sequence = first.clone.sequence,
|
|
228 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
|
|
229 Related_to_leukemia_clone = F,
|
|
230 Frequency = sum(second.rows$Frequency),
|
|
231 locus_V = second.rows[1,"locus_V"],
|
|
232 locus_J = second.rows[1,"locus_J"],
|
|
233 min_cell_count = second.rows[1,"min_cell_count"],
|
|
234 normalized_read_count = sum(second.rows$normalized_read_count),
|
|
235 paste = second.rows[1,"paste"],
|
|
236 min_cell_paste = second.rows[1,"min_cell_paste"])
|
|
237
|
|
238 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
|
|
239 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
|
|
240
|
|
241
|
|
242 if(sum(first.match.filter) == 1 & sum(second.match.filter) == 1){
|
|
243 second.clone.sequence = patient.fuzzy[second.match.filter, "Clone_Sequence"]
|
|
244 if(nchar(first.clone.sequence) == nchar(second.clone.sequence)){
|
|
245 merge.list[["second"]] = append(merge.list[["second"]], second.clone.sequence)
|
|
246 }
|
|
247 }
|
|
248
|
|
249 if(nrow(first.rows) > 1 | nrow(second.rows) > 1){
|
|
250
|
|
251 }
|
|
252
|
37
|
253 } else {
|
|
254 patient.fuzzy = patient.fuzzy[-1,]
|
34
|
255 }
|
|
256 }
|
|
257
|
|
258 }
|
|
259
|
37
|
260
|
18
|
261 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
|
0
|
262 res1 = vector()
|
|
263 res2 = vector()
|
|
264 resBoth = vector()
|
|
265 read1Count = vector()
|
|
266 read2Count = vector()
|
2
|
267 locussum1 = vector()
|
|
268 locussum2 = vector()
|
9
|
269
|
0
|
270 #for(iter in 1){
|
|
271 for(iter in 1:length(product[,1])){
|
|
272 threshhold = product[iter,threshholdIndex]
|
|
273 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
274 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
275 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
|
29
|
276 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
|
30
|
277 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
|
|
278 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
|
14
|
279 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
|
|
280 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
|
0
|
281 res1 = append(res1, sum(one))
|
2
|
282 res2 = append(res2, sum(two))
|
0
|
283 resBoth = append(resBoth, sum(both))
|
2
|
284 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
|
|
285 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
|
0
|
286 #threshhold = 0
|
|
287 if(threshhold != 0){
|
|
288 if(sum(one) > 0){
|
15
|
289 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
290 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
291 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
292 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
293 }
|
|
294 if(sum(two) > 0){
|
15
|
295 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
296 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
297 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
298 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
299 }
|
29
|
300 } else {
|
|
301 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
49
|
302 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
|
|
303 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
|
29
|
304 if(nrow(scatterplot_locus_data) > 0){
|
|
305 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
|
|
306 }
|
34
|
307 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
|
|
308 in_two = (scatterplot_locus_data$merge %in% patient2$merge)
|
|
309 if(any(in_two)){
|
49
|
310 scatterplot_locus_data[in_two,]$type = twoSample
|
34
|
311 }
|
49
|
312 in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
|
|
313 #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
|
|
314 #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
|
30
|
315 if(any(in_both)){
|
|
316 scatterplot_locus_data[in_both,]$type = "In Both"
|
|
317 }
|
29
|
318 if(type == "single"){
|
|
319 single_patients <<- rbind(single_patients, scatterplot_locus_data)
|
|
320 }
|
|
321 p = NULL
|
|
322 if(nrow(scatterplot_locus_data) != 0){
|
|
323 if(on == "normalized_read_count"){
|
30
|
324 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
49
|
325 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
|
29
|
326 } else {
|
49
|
327 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
|
29
|
328 }
|
|
329 p = p + geom_point(aes(colour=type), position="jitter")
|
|
330 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
331 } else {
|
|
332 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
333 }
|
|
334 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
335 print(p)
|
|
336 dev.off()
|
0
|
337 }
|
|
338 if(sum(both) > 0){
|
15
|
339 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
340 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
0
|
341 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
342 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
29
|
343 }
|
0
|
344 }
|
2
|
345 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
|
0
|
346 if(sum(is.na(patientResult$percentage)) > 0){
|
|
347 patientResult[is.na(patientResult$percentage),]$percentage = 0
|
|
348 }
|
|
349 colnames(patientResult)[6] = oneSample
|
|
350 colnames(patientResult)[8] = twoSample
|
|
351 colnamesBak = colnames(patientResult)
|
2
|
352 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
|
0
|
353 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
354 colnames(patientResult) = colnamesBak
|
|
355
|
|
356 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
357 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
358
|
|
359 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
|
|
360 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
|
|
361 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
362 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
|
|
363 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
|
|
364 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
365 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
366 print(plt)
|
|
367 dev.off()
|
|
368 #(t,r,b,l)
|
|
369 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
|
|
370 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
|
|
371 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
372 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
|
|
373 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
|
|
374 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
375 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
376 print(plt)
|
|
377 dev.off()
|
|
378
|
|
379 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
|
|
380 patientResult$relativeValue = patientResult$value * 10
|
|
381 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
382 plt = ggplot(patientResult)
|
|
383 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
384 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
385 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
386 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
|
|
387 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
|
|
388 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
|
|
389 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
|
|
390 print(plt)
|
|
391 dev.off()
|
|
392 }
|
|
393
|
3
|
394 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
395
|
0
|
396 interval = intervalFreq
|
|
397 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
398 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
399 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
|
0
|
400
|
3
|
401 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
402
|
0
|
403 interval = intervalReads
|
|
404 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
405 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
406 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
|
0
|
407
|
3
|
408 cat("</table></html>", file=logfile, append=T)
|
|
409
|
33
|
410
|
|
411 if(nrow(single_patients) > 0){
|
|
412 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
413 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
|
414 p = p + geom_point(aes(colour=type), position="jitter")
|
|
415 p = p + xlab("In one or both samples") + ylab("Reads")
|
|
416 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
|
|
417 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
418 print(p)
|
|
419 dev.off()
|
7
|
420
|
33
|
421 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
|
422 p = p + geom_point(aes(colour=type), position="jitter")
|
|
423 p = p + xlab("In one or both samples") + ylab("Frequency")
|
|
424 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
425 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
426 print(p)
|
|
427 dev.off()
|
|
428 } else {
|
|
429 empty <- data.frame()
|
|
430 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
431
|
|
432 png("singles_reads_scatterplot.png", width=400, height=300)
|
|
433 print(p)
|
|
434 dev.off()
|
|
435
|
|
436 png("singles_freq_scatterplot.png", width=400, height=300)
|
|
437 print(p)
|
|
438 dev.off()
|
|
439 }
|
7
|
440 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
|
|
441 onShort = "reads"
|
|
442 if(on == "Frequency"){
|
|
443 onShort = "freq"
|
|
444 }
|
18
|
445 onx = paste(on, ".x", sep="")
|
|
446 ony = paste(on, ".y", sep="")
|
|
447 onz = paste(on, ".z", sep="")
|
7
|
448 type="triplet"
|
|
449
|
|
450 threshholdIndex = which(colnames(product) == "interval")
|
|
451 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
452 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
453 titleIndex = which(colnames(product) == "Titles")
|
|
454 sampleIndex = which(colnames(patient1) == "Sample")
|
|
455 patientIndex = which(colnames(patient1) == "Patient")
|
|
456 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
457 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
458 threeSample = paste(patient3[1,sampleIndex], sep="")
|
|
459
|
29
|
460 if(mergeOn == "Clone_Sequence"){
|
|
461 patient1$merge = paste(patient1$Clone_Sequence)
|
|
462 patient2$merge = paste(patient2$Clone_Sequence)
|
|
463 patient3$merge = paste(patient3$Clone_Sequence)
|
|
464
|
|
465 } else {
|
|
466 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
467 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
468 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
|
|
469 }
|
9
|
470
|
|
471 patientMerge = merge(patient1, patient2, by="merge")
|
|
472 patientMerge = merge(patientMerge, patient3, by="merge")
|
28
|
473 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
18
|
474 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
20
|
475 patientMerge12 = merge(patient1, patient2, by="merge")
|
|
476 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
477 patientMerge13 = merge(patient1, patient3, by="merge")
|
|
478 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
479 patientMerge23 = merge(patient2, patient3, by="merge")
|
|
480 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
481
|
28
|
482
|
30
|
483 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
20
|
484 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
|
30
|
485 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
27
|
486 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
|
20
|
487
|
7
|
488 res1 = vector()
|
|
489 res2 = vector()
|
|
490 res3 = vector()
|
20
|
491 res12 = vector()
|
|
492 res13 = vector()
|
|
493 res23 = vector()
|
7
|
494 resAll = vector()
|
|
495 read1Count = vector()
|
|
496 read2Count = vector()
|
|
497 read3Count = vector()
|
|
498
|
|
499 if(appendTriplets){
|
9
|
500 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
|
7
|
501 }
|
|
502 for(iter in 1:length(product[,1])){
|
|
503 threshhold = product[iter,threshholdIndex]
|
|
504 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
505 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
506 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
|
|
507 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
|
7
|
508
|
30
|
509 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
|
|
510 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
|
|
511 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
|
24
|
512
|
30
|
513 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
|
|
514 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
|
|
515 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
|
20
|
516
|
18
|
517 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
|
|
518 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
|
|
519 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
|
7
|
520 res1 = append(res1, sum(one))
|
|
521 res2 = append(res2, sum(two))
|
|
522 res3 = append(res3, sum(three))
|
|
523 resAll = append(resAll, sum(all))
|
20
|
524 res12 = append(res12, sum(one_two))
|
|
525 res13 = append(res13, sum(one_three))
|
|
526 res23 = append(res23, sum(two_three))
|
7
|
527 #threshhold = 0
|
|
528 if(threshhold != 0){
|
|
529 if(sum(one) > 0){
|
20
|
530 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
531 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
532 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
533 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
534 }
|
|
535 if(sum(two) > 0){
|
20
|
536 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
537 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
538 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
539 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
540 }
|
|
541 if(sum(three) > 0){
|
20
|
542 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
543 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
544 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
545 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
546 }
|
20
|
547 if(sum(one_two) > 0){
|
|
548 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
549 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
|
550 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
551 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
552 }
|
|
553 if(sum(one_three) > 0){
|
|
554 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
555 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
556 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
557 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
558 }
|
|
559 if(sum(two_three) > 0){
|
|
560 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
561 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
562 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
563 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
564 }
|
|
565 } else { #scatterplot data
|
24
|
566 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
30
|
567 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
|
27
|
568 if(sum(in_two) > 0){
|
|
569 scatterplot_locus_data[in_two,]$type = "In two"
|
|
570 }
|
30
|
571 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
|
27
|
572 if(sum(in_three)> 0){
|
|
573 scatterplot_locus_data[in_three,]$type = "In three"
|
|
574 }
|
|
575 not_in_one = scatterplot_locus_data$type != "In one"
|
|
576 if(sum(not_in_one) > 0){
|
|
577 scatterplot_locus_data[not_in_one,]$type = "In multiple"
|
|
578 }
|
20
|
579 p = NULL
|
|
580 if(nrow(scatterplot_locus_data) != 0){
|
|
581 if(on == "normalized_read_count"){
|
31
|
582 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
32
|
583 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
20
|
584 } else {
|
32
|
585 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
20
|
586 }
|
|
587 p = p + geom_point(aes(colour=type), position="jitter")
|
25
|
588 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
20
|
589 } else {
|
|
590 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
591 }
|
|
592 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
593 print(p)
|
|
594 dev.off()
|
|
595 }
|
7
|
596 if(sum(all) > 0){
|
20
|
597 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
|
|
598 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
7
|
599 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
600 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
601 }
|
|
602 }
|
20
|
603 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
|
|
604 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
|
7
|
605 colnames(patientResult)[6] = oneSample
|
20
|
606 colnames(patientResult)[7] = twoSample
|
|
607 colnames(patientResult)[8] = threeSample
|
|
608 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
|
|
609 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
|
|
610 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
|
7
|
611
|
|
612 colnamesBak = colnames(patientResult)
|
20
|
613 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
|
7
|
614 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
615 colnames(patientResult) = colnamesBak
|
|
616
|
|
617 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
618 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
619
|
|
620 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
|
|
621 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
|
|
622 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
623 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
|
|
624 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
|
|
625 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
626 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
|
|
627 print(plt)
|
|
628 dev.off()
|
|
629
|
|
630 fontSize = 4
|
|
631
|
|
632 bak = patientResult
|
|
633 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
|
|
634 patientResult$relativeValue = patientResult$value * 10
|
|
635 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
636 plt = ggplot(patientResult)
|
|
637 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
638 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
639 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
640 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
|
|
641 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
|
|
642 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
|
|
643 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
|
|
644 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
|
|
645 print(plt)
|
|
646 dev.off()
|
|
647 }
|
|
648
|
33
|
649 if(nrow(triplets) != 0){
|
|
650 triplets$uniqueID = "ID"
|
|
651
|
|
652 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
653 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
654 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
655
|
|
656 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
657 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
658 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
659
|
|
660 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
|
|
661
|
|
662 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
|
|
663 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
|
|
664 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
|
|
665
|
|
666 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
|
|
667 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
668
|
|
669 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
670
|
|
671 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
|
|
672
|
|
673 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
674
|
|
675 triplets = triplets[triplets$normalized_read_count >= min_cells,]
|
|
676
|
|
677 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
|
|
678
|
|
679 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
|
|
680
|
|
681 #remove duplicate V+J+CDR3, add together numerical values
|
|
682 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
|
|
683 Cell_Count=unique(.SD$Cell_Count),
|
|
684 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
|
|
685 Total_Read_Count=sum(.SD$Total_Read_Count),
|
|
686 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
|
|
687 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
|
|
688 Frequency=sum(.SD$Frequency),
|
|
689 normalized_read_count=sum(.SD$normalized_read_count),
|
|
690 Log10_Frequency=sum(.SD$Log10_Frequency),
|
|
691 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
|
|
692
|
|
693
|
|
694 interval = intervalReads
|
|
695 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
696 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
697
|
|
698 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
699 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
700 three = triplets[triplets$Sample == "24062_reg_BM",]
|
|
701 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
|
|
702
|
|
703 one = triplets[triplets$Sample == "16278_Left",]
|
|
704 two = triplets[triplets$Sample == "26402_Left",]
|
|
705 three = triplets[triplets$Sample == "26759_Left",]
|
|
706 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
|
|
707
|
|
708 one = triplets[triplets$Sample == "16278_Right",]
|
|
709 two = triplets[triplets$Sample == "26402_Right",]
|
|
710 three = triplets[triplets$Sample == "26759_Right",]
|
|
711 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
|
|
712
|
|
713
|
|
714 interval = intervalFreq
|
|
715 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
716 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
717
|
|
718 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
719 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
720 three = triplets[triplets$Sample == "24062_reg_BM",]
|
|
721 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
|
|
722
|
|
723 one = triplets[triplets$Sample == "16278_Left",]
|
|
724 two = triplets[triplets$Sample == "26402_Left",]
|
|
725 three = triplets[triplets$Sample == "26759_Left",]
|
|
726 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
|
|
727
|
|
728 one = triplets[triplets$Sample == "16278_Right",]
|
|
729 two = triplets[triplets$Sample == "26402_Right",]
|
|
730 three = triplets[triplets$Sample == "26759_Right",]
|
|
731 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
|
|
732 } else {
|
|
733 cat("", file="triplets.txt")
|
|
734 }
|