annotate RScript.r @ 44:07278582b735 draft

Uploaded
author davidvanzessen
date Tue, 15 Sep 2015 05:57:27 -0400
parents 2eef2c438500
children 570fef3d876f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
8 mergeOn = args[6]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
11
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
16 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
17 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
20 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
21 dat$dsPerM = 0
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
22 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
23 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
24
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
25 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
29
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
31
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
33
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
34 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
35
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
48 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
49 dat = merge(dat, min_cell_count, by="min_cell_paste")
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
50 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
53 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
54
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
56
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
57 #remove duplicate V+J+CDR3, add together numerical values
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
58 if(mergeOn != "Clone_Sequence"){
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
59 cat("<tr><td>Adding duplicate V+J+CDR3 sequences</td></tr>", file=logfile, append=T)
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
60 dat= data.frame(data.table(dat)[, list(Receptor=unique(.SD$Receptor),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
61 Cell_Count=unique(.SD$Cell_Count),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
62 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
63 Total_Read_Count=sum(.SD$Total_Read_Count),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
64 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
65 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
66 Frequency=sum(.SD$Frequency),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
67 locus_V=unique(.SD$locus_V),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
68 locus_J=unique(.SD$locus_J),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
69 min_cell_count=unique(.SD$min_cell_count),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
70 normalized_read_count=sum(.SD$normalized_read_count),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
71 Log10_Frequency=sum(.SD$Log10_Frequency),
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
72 Clone_Sequence=.SD$Clone_Sequence[1],
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
73 min_cell_paste=.SD$min_cell_paste[1],
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
74 paste=unique(.SD$paste)), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
75 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
76
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
77 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
78 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
79 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
84 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
85
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
86 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
87
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
88 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
89 if (!is.data.frame(x) & is.list(x)){
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
90 x = x[[1]]
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
91 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
92 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
93 x = data.frame(x,stringsAsFactors=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
98 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
99 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
101 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
102 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
103 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
104 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
105 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
106 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
107 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
111 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
112 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
113 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
117 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
119
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
120 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
121 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
122 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
123 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
124 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
125 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
126 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
127 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
128 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
129 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
130 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
131 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
132 }
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
133 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
134
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
135 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
136 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
137 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
138 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
139 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
140 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
141 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
142
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
143 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
144 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
145 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
146 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
147 scatterplot_data$on = onShort
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
148
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
149 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
150
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
151
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
152 #fuzzy matching here...
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
153 if(mergeOn == "Clone_Sequence"){
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
154 merge.list = patientMerge$merge
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
155
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
156 patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
157 patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
158
36
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
159 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
160 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
161
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
164
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
165 merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"])))
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
166 merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
167
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
168 patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
169 patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
170
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
171 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
172 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
173
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
174 while(nrow(patient.fuzzy) > 1){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
175 first.merge = patient.fuzzy[1,"merge"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
176 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
177
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
178 merge.filter = first.merge == patient.fuzzy$merge
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
179
42
7d4eda663c29 Uploaded
davidvanzessen
parents: 41
diff changeset
180 length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
7d4eda663c29 Uploaded
davidvanzessen
parents: 41
diff changeset
181
43
2eef2c438500 Uploaded
davidvanzessen
parents: 42
diff changeset
182 sample.filter = patient.fuzzy[1,"Sample"] != patient.fuzzy$Sample
2eef2c438500 Uploaded
davidvanzessen
parents: 42
diff changeset
183
44
07278582b735 Uploaded
davidvanzessen
parents: 43
diff changeset
184 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
07278582b735 Uploaded
davidvanzessen
parents: 43
diff changeset
185 match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & sample.filter
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
186
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
187 if(sum(match.filter) == 2){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
188 second.match = which(match.filter)[2]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
189 second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
190 first.sample = patient.fuzzy[1,"Sample"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
191 second.sample = patient.fuzzy[second.match,"Sample"]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
192
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
193 if(((nchar(second.clone.sequence) - nchar(first.clone.sequence)) <= 9) & (first.sample != second.sample)){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
194 first.match.row = patient.fuzzy[1,]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
195 second.match.row = patient.fuzzy[second.match,]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
196 print(paste(first.merge, first.match.row$normalized_read_count, second.match.row$normalized_read_count, first.clone.sequence, second.clone.sequence))
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
197 patientMerge.new.row = data.frame(merge=first.clone.sequence,
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
198 min_cell_paste.x=first.match.row[1,"min_cell_paste"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
199 Patient.x=first.match.row[1,"Patient"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
200 Receptor.x=first.match.row[1,"Receptor"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
201 Sample.x=first.match.row[1,"Sample"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
202 Cell_Count.x=first.match.row[1,"Cell_Count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
203 Clone_Molecule_Count_From_Spikes.x=first.match.row[1,"Clone_Molecule_Count_From_Spikes"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
204 Log10_Frequency.x=first.match.row[1,"Log10_Frequency"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
205 Total_Read_Count.x=first.match.row[1,"Total_Read_Count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
206 dsPerM.x=first.match.row[1,"dsPerM"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
207 J_Segment_Major_Gene.x=first.match.row[1,"J_Segment_Major_Gene"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
208 V_Segment_Major_Gene.x=first.match.row[1,"V_Segment_Major_Gene"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
209 Clone_Sequence.x=first.match.row[1,"Clone_Sequence"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
210 CDR3_Sense_Sequence.x=first.match.row[1,"CDR3_Sense_Sequence"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
211 Related_to_leukemia_clone.x=first.match.row[1,"Related_to_leukemia_clone"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
212 Frequency.x=first.match.row[1,"Frequency"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
213 locus_V.x=first.match.row[1,"locus_V"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
214 locus_J.x=first.match.row[1,"locus_J"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
215 min_cell_count.x=first.match.row[1,"min_cell_count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
216 normalized_read_count.x=first.match.row[1,"normalized_read_count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
217 paste.x=first.match.row[1,"paste"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
218 min_cell_paste.y=second.match.row[1,"min_cell_paste"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
219 Patient.y=second.match.row[1,"Patient"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
220 Receptor.y=second.match.row[1,"Receptor"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
221 Sample.y=second.match.row[1,"Sample"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
222 Cell_Count.y=second.match.row[1,"Cell_Count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
223 Clone_Molecule_Count_From_Spikes.y=second.match.row[1,"Clone_Molecule_Count_From_Spikes"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
224 Log10_Frequency.y=second.match.row[1,"Log10_Frequency"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
225 Total_Read_Count.y=second.match.row[1,"Total_Read_Count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
226 dsPerM.y=second.match.row[1,"dsPerM"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
227 J_Segment_Major_Gene.y=second.match.row[1,"J_Segment_Major_Gene"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
228 V_Segment_Major_Gene.y=second.match.row[1,"V_Segment_Major_Gene"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
229 Clone_Sequence.y=second.match.row[1,"Clone_Sequence"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
230 CDR3_Sense_Sequence.y=second.match.row[1,"CDR3_Sense_Sequence"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
231 Related_to_leukemia_clone.y=second.match.row[1,"Related_to_leukemia_clone"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
232 Frequency.y=second.match.row[1,"Frequency"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
233 locus_V.y=second.match.row[1,"locus_V"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
234 locus_J.y=second.match.row[1,"locus_J"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
235 min_cell_count.y=second.match.row[1,"min_cell_count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
236 normalized_read_count.y=second.match.row[1,"normalized_read_count"],
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
237 paste.y=first.match.row[1,"paste"])
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
238
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
239
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
240 patientMerge = rbind(patientMerge, patientMerge.new.row)
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
241 patient.fuzzy = patient.fuzzy[-match.filter,]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
242
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
243 patient1 = patient1[!(patient1$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
244 patient2 = patient2[!(patient2$Clone_Sequence %in% c(first.clone.sequence, second.clone.sequence)),]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
245
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
246 scatterplot_data = scatterplot_data[scatterplot_data$merge != second.clone.sequence,]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
247
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
248 } else {
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
249 patient.fuzzy = patient.fuzzy[-1,]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
250 }
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
251
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
252 } else if (sum(match.filter) > 2){
41
f7c07faaa3e8 Uploaded
davidvanzessen
parents: 40
diff changeset
253 cat(paste("<tr><td>", "Multiple matches found for", first.merge, "in", patient, "</td></tr>", sep=""), file=logfile, append=T)
39
9fdcf6bfe024 Uploaded
davidvanzessen
parents: 38
diff changeset
254 patient.fuzzy = patient.fuzzy[-1,]
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
255 } else {
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
256 patient.fuzzy = patient.fuzzy[-1,]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
257 }
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
258
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
259
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
260 }
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
261
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
262 }
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
263
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
264
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
265 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
266 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
267 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
268 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
269 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
270 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
271 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
272 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
273
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
274 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
275 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
276 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
277 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
278 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
279 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
280 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
281 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
282 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
283 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
284 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
285 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
286 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
287 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
288 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
289 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
290 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
291 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
292 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
293 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
294 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
295 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
296 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
297 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
298 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
299 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
300 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
301 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
302 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
303 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
304 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
305 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
306 if(nrow(scatterplot_locus_data) > 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
307 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
308 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
309 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
310 in_two = (scatterplot_locus_data$merge %in% patient2$merge)
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
311 not_in_one = !in_one
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
312 if(any(in_two)){
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
313 scatterplot_locus_data[not_in_one,]$type = twoSample
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
314 }
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
315 in_both = (scatterplot_locus_data$merge %in% patientMerge[both,]$merge)
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
316 if(any(in_both)){
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
317 scatterplot_locus_data[in_both,]$type = "In Both"
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
318 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
319 if(type == "single"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
320 single_patients <<- rbind(single_patients, scatterplot_locus_data)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
321 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
322 p = NULL
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
323 if(nrow(scatterplot_locus_data) != 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
324 if(on == "normalized_read_count"){
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
325 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
38
f5b242a5337f Uploaded
davidvanzessen
parents: 37
diff changeset
326 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
327 } else {
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
328 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
329 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
330 p = p + geom_point(aes(colour=type), position="jitter")
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
331 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
332 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
333 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
334 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
335 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
336 print(p)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
337 dev.off()
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
338 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
339 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
340 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
341 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
342 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
343 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
344 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
345 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
346 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
347 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
348 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
349 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
350 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
351 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
352 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
353 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
354 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
355 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
356
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
357 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
358 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
359
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
360 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
361 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
362 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
363 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
364 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
365 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
366 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
367 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
368 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
369 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
370 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
371 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
372 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
373 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
374 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
375 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
376 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
377 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
378 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
379
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
380 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
381 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
382 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
383 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
384 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
385 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
386 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
387 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
388 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
389 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
390 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
391 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
392 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
393 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
394
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
395 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
396
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
397 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
398 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
399 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
400 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
401
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
402 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
403
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
404 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
405 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
406 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
407 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
408
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
409 cat("</table></html>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
410
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
411
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
412 if(nrow(single_patients) > 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
413 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
414 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
415 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
416 p = p + xlab("In one or both samples") + ylab("Reads")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
417 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
418 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
419 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
420 dev.off()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
421
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
422 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
423 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
424 p = p + xlab("In one or both samples") + ylab("Frequency")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
425 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
426 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
427 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
428 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
429 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
430 empty <- data.frame()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
431 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
432
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
433 png("singles_reads_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
434 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
435 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
436
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
437 png("singles_freq_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
438 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
439 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
440 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
441 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
442 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
443 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
444 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
445 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
446 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
447 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
448 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
449 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
450
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
451 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
452 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
453 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
454 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
455 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
456 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
457 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
458 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
459 threeSample = paste(patient3[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
460
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
461 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
462 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
463 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
464 patient3$merge = paste(patient3$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
465
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
466 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
467 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
468 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
469 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
470 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
471
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
472 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
473 patientMerge = merge(patientMerge, patient3, by="merge")
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
474 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
475 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
476 patientMerge12 = merge(patient1, patient2, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
477 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
478 patientMerge13 = merge(patient1, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
479 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
480 patientMerge23 = merge(patient2, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
481 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
482
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
483
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
484 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
485 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
486 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
487 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
488
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
489 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
490 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
491 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
492 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
493 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
494 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
495 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
496 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
497 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
498 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
499
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
500 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
501 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
502 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
503 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
504 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
505 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
506 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
507 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
508 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
509
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
510 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
511 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
512 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
513
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
514 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
515 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
516 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
517
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
518 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
519 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
520 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
521 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
522 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
523 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
524 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
525 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
526 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
527 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
528 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
529 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
530 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
531 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
532 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
533 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
534 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
535 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
536 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
537 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
538 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
539 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
540 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
541 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
542 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
543 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
544 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
545 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
546 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
547 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
548 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
549 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
550 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
551 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
552 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
553 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
554 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
555 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
556 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
557 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
558 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
559 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
560 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
561 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
562 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
563 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
564 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
565 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
566 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
567 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
568 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
569 if(sum(in_two) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
570 scatterplot_locus_data[in_two,]$type = "In two"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
571 }
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
572 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
573 if(sum(in_three)> 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
574 scatterplot_locus_data[in_three,]$type = "In three"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
575 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
576 not_in_one = scatterplot_locus_data$type != "In one"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
577 if(sum(not_in_one) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
578 scatterplot_locus_data[not_in_one,]$type = "In multiple"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
579 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
580 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
581 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
582 if(on == "normalized_read_count"){
31
ce8bd23d0335 Uploaded
davidvanzessen
parents: 30
diff changeset
583 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
584 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
585 } else {
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
586 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
587 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
588 p = p + geom_point(aes(colour=type), position="jitter")
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
589 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
590 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
591 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
592 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
593 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
594 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
595 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
596 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
597 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
598 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
599 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
600 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
601 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
602 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
603 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
604 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
605 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
606 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
607 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
608 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
609 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
610 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
611 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
612
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
613 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
614 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
615 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
616 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
617
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
618 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
619 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
620
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
621 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
622 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
623 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
624 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
625 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
626 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
627 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
628 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
629 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
630
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
631 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
632
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
633 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
634 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
635 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
636 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
637 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
638 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
639 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
640 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
641 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
642 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
643 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
644 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
645 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
646 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
647 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
648 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
649
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
650 if(nrow(triplets) != 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
651 triplets$uniqueID = "ID"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
652
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
653 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
654 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
655 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
656
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
657 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
658 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
659 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
660
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
661 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
662
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
663 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
664 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
665 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
666
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
667 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
668 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
669
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
670 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
671
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
672 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
673
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
674 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
675
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
676 triplets = triplets[triplets$normalized_read_count >= min_cells,]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
677
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
678 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
679
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
680 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
681
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
682 #remove duplicate V+J+CDR3, add together numerical values
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
683 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
684 Cell_Count=unique(.SD$Cell_Count),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
685 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
686 Total_Read_Count=sum(.SD$Total_Read_Count),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
687 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
688 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
689 Frequency=sum(.SD$Frequency),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
690 normalized_read_count=sum(.SD$normalized_read_count),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
691 Log10_Frequency=sum(.SD$Log10_Frequency),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
692 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
693
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
694
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
695 interval = intervalReads
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
696 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
697 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
698
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
699 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
700 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
701 three = triplets[triplets$Sample == "24062_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
702 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
703
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
704 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
705 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
706 three = triplets[triplets$Sample == "26759_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
707 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
708
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
709 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
710 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
711 three = triplets[triplets$Sample == "26759_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
712 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
713
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
714
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
715 interval = intervalFreq
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
716 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
717 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
718
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
719 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
720 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
721 three = triplets[triplets$Sample == "24062_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
722 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
723
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
724 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
725 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
726 three = triplets[triplets$Sample == "26759_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
727 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
728
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
729 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
730 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
731 three = triplets[triplets$Sample == "26759_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
732 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
733 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
734 cat("", file="triplets.txt")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
735 }