annotate RScript.r @ 70:9643b1fd9c45 draft default tip

Uploaded
author davidvanzessen
date Thu, 26 May 2016 09:37:48 -0400
parents c532b3f8dc97
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
2 options(scipen=999)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8 min_cells = as.numeric(args[5])
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
9 mergeOn = args[6]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
10
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
12
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
16 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
17 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
66
ef3ac4f431bb Uploaded
davidvanzessen
parents: 65
diff changeset
21 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
22 dat$dsPerM = 0
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
23 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
24 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
25
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
26 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
27 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
28 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
29 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
30
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
31 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
32
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
33 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
34
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
35 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
36
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
48 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
49 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
50 dat = merge(dat, min_cell_count, by="min_cell_paste")
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
51 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
53
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
54 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
55
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
56 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
57
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
58 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
59 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
60 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
67 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
68
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
69 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
70 patient.merge.list.second = list()
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
71 scatter_locus_data_list = list()
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
72 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
73 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
74 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
75 if (!is.data.frame(x) & is.list(x)){
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
76 x = x[[1]]
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
77 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
78 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
79 x = data.frame(x,stringsAsFactors=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
84 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
85 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
87 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
88 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
89 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
90 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
91 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
92 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
102 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
104 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
105
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
106 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
107 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
111 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
112 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
113 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
117 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
119 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
120
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
121 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
122 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
123 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
124 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
125 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
126 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
127 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
128
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
129 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
130 #scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
131 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
132 #scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
133 #scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
134 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
135 #scatterplot_data$type = factor(x=NULL, levels=scatterplot.data.type.factor)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
136 scatterplot_data$type = character(0)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
137 scatterplot_data$link = numeric(0)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
138 scatterplot_data$on = character(0)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
139
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
140 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
141 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
142
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
143 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
144
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
145 start.time = proc.time()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
146 merge.list = c()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
147
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
148 if(patient %in% names(patient.merge.list)){
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
149 patientMerge = patient.merge.list[[patient]]
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
150 merge.list[["second"]] = patient.merge.list.second[[patient]]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
151 scatterplot_data = scatter_locus_data_list[[patient]]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
152 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
153
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
154 print(names(patient.merge.list))
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
155 } else {
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
156 #fuzzy matching here...
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
157 #merge.list = patientMerge$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
158
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
159 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
160 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
161
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
162 patient1.fuzzy = patient1
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
163 patient2.fuzzy = patient2
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
164
36
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
165 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
166 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
167
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
168 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
169 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
170
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
171 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
172 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
173
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
174 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
175 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
176
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
177 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
178 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
179
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
180 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
181 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
182
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
183 merge.list = list()
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
184
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
185 merge.list[["second"]] = vector()
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
186
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
187 link.count = 1
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
188
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
189 while(nrow(patient.fuzzy) > 1){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
190 first.merge = patient.fuzzy[1,"merge"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
191 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
192 first.sample = patient.fuzzy[1,"Sample"]
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
193 merge.filter = first.merge == patient.fuzzy$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
194
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
195 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
196
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
197 first.sample.filter = first.sample == patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
198 second.sample.filter = first.sample != patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
199
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
200 #first match same sample, sum to a single row, same for other sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
201 #then merge rows like 'normal'
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
202
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
203 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
204
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
205
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
206
44
07278582b735 Uploaded
davidvanzessen
parents: 43
diff changeset
207 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
208 first.match.filter = merge.filter & sequence.filter & first.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
209 second.match.filter = merge.filter & sequence.filter & second.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
210
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
211 first.rows = patient.fuzzy[first.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
212 second.rows = patient.fuzzy[second.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
213
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
214 first.rows.v = table(first.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
215 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
216 first.rows.j = table(first.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
217 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
218
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
219 first.sum = data.frame(merge = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
220 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
221 Receptor = first.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
222 Sample = first.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
223 Cell_Count = first.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
224 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
225 Log10_Frequency = log10(sum(first.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
226 Total_Read_Count = sum(first.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
227 dsPerM = sum(first.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
228 J_Segment_Major_Gene = first.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
229 V_Segment_Major_Gene = first.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
230 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
231 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
232 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
233 Frequency = sum(first.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
234 locus_V = first.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
235 locus_J = first.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
236 min_cell_count = first.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
237 normalized_read_count = sum(first.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
238 paste = first.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
239 min_cell_paste = first.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
240
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
241 if(nrow(second.rows) > 0){
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
242 second.rows.v = table(second.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
243 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
244 second.rows.j = table(second.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
245 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
246
55
43ce3ebfc9b5 Uploaded
davidvanzessen
parents: 54
diff changeset
247 second.sum = data.frame(merge = first.clone.sequence,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
248 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
249 Receptor = second.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
250 Sample = second.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
251 Cell_Count = second.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
252 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
253 Log10_Frequency = log10(sum(second.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
254 Total_Read_Count = sum(second.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
255 dsPerM = sum(second.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
256 J_Segment_Major_Gene = second.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
257 V_Segment_Major_Gene = second.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
258 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
259 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
260 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
261 Frequency = sum(second.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
262 locus_V = second.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
263 locus_J = second.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
264 min_cell_count = second.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
265 normalized_read_count = sum(second.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
266 paste = second.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
267 min_cell_paste = second.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
268
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
269 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
270 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
271
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
272 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
273 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
274
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
275 tmp.rows = rbind(first.rows, second.rows)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
276 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
277
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
278
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
279 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
280 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
281 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
282 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
283 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
284
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
285 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
286
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
287 scatterplot.row = second.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
288 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
289 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
290 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
291
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
292 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
293
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
294 #write some information about the match to a log file
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
295 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
296 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
297 } else {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
298 second.clone.sequence = second.rows[1,"Clone_Sequence"]
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
299 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
300 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
301 } else {
58
3ed7878f75c3 Uploaded
davidvanzessen
parents: 56
diff changeset
302 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
303 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
304 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
305
59
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
306 } else if(nrow(first.rows) > 1) {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
307 if(patient1[1,"Sample"] == first.sample){
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
308 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
309 patient1 = rbind(patient1, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
310 } else {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
311 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
312 patient2 = rbind(patient2, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
313 }
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
314
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
315 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
316 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
317
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
318 patient.fuzzy = patient.fuzzy[-first.match.filter,]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
319
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
320 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
321 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
322 scatterplot.row$type = first.sum[,"Sample"]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
323 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
324 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
325
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
326 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
59
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
327
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
328 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
329 } else {
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
330 patient.fuzzy = patient.fuzzy[-1,]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
331
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
332 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
333 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
334 scatterplot.row$type = first.sum[,"Sample"]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
335 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
336 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
337
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
338 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
339 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
340 link.count = link.count + 1
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
341 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
342 patient.merge.list[[patient]] <<- patientMerge
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
343 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
69
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
344
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
345 sample.order = data.frame(type = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")),type.order = 1:4)
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
346 scatterplot_data = merge(scatterplot_data, sample.order, by="type")
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
347
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
348 scatter_locus_data_list[[patient]] <<- scatterplot_data
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
349 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
350 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
351
52
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
352 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
353 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
354
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
355
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
356 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
357 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
358 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
359 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
360 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
361 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
362 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
363 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
364 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
365
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
366 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
367 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
368 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
369 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
370 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
371 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
372 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
373 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
374 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
375 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
376 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
377 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
378 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
379 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
380 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
381 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
382 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
383 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
384 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
385 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
386 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
387 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
388 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
389 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
390 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
391 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
392 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
393 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
394 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
395 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
396 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
397 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
398 if(nrow(scatterplot_locus_data) > 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
399 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
400 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
401
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
402
69
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
403
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
404 p = NULL
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
405 print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
406 if(nrow(scatterplot_locus_data) != 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
407 if(on == "normalized_read_count"){
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
408 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
70
9643b1fd9c45 Uploaded
davidvanzessen
parents: 69
diff changeset
409 #p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,10^6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
9643b1fd9c45 Uploaded
davidvanzessen
parents: 69
diff changeset
410 print(paste("scatterplot_locus_data:"))
9643b1fd9c45 Uploaded
davidvanzessen
parents: 69
diff changeset
411 print(head(scatterplot_locus_data))
9643b1fd9c45 Uploaded
davidvanzessen
parents: 69
diff changeset
412 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales, limits=c(0,10^6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
413 } else {
69
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
414 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
415 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
416 p = p + geom_point(aes(colour=type), position="dodge")
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
417 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
418 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
419 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
420 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
421 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
422 print(p)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
423 dev.off()
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
424 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
425 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
426 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
427 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
428 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
429 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
430 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
431 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
432 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
433 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
434 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
435 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
436 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
437 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
438 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
439 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
440 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
441 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
442
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
443 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
444 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
445
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
446 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
447 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
448 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
449 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
450 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
451 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
452 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
453 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
454 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
455 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
456 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
457 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
458 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
459 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
460 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
461 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
462 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
463 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
464 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
465
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
466 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
467 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
468 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
469 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
470 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
471 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
472 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
473 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
474 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
475 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
476 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
477 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
478 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
479 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
480
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
481 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
482
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
483 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
484 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
485 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
486 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
487
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
488 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
489
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
490 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
491 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
492 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
493 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
494
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
495 if(nrow(single_patients) > 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
496 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
497 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
498 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
499 p = p + xlab("In one or both samples") + ylab("Reads")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
500 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
501 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
502 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
503 dev.off()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
504
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
505 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
506 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
507 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
508 p = p + xlab("In one or both samples") + ylab("Frequency")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
509 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
510 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
511 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
512 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
513 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
514 empty <- data.frame()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
515 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
516
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
517 png("singles_reads_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
518 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
519 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
520
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
521 png("singles_freq_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
522 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
523 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
524 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
525
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
526 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
527 patient.merge.list.second = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
528
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
529 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
530 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
531 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
532 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
533 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
534 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
535 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
536 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
537 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
538
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
539 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
540 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
541 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
542 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
543 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
544 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
545 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
546 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
547 threeSample = paste(patient3[1,sampleIndex], sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
548
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
549 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
550 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
551 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
552 patient3$merge = paste(patient3$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
553
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
554 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
555 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
556 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
557 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
558 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
559
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
560 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
561 patient1.fuzzy = patient1
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
562 patient2.fuzzy = patient2
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
563 patient3.fuzzy = patient3
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
564
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
565 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
566
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
567 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
568 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
569 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
570
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
571 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
65
7a63e90cfc3d Uploaded
davidvanzessen
parents: 64
diff changeset
572 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
573
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
574 other.sample.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
575 other.sample.list[[oneSample]] = c(twoSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
576 other.sample.list[[twoSample]] = c(oneSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
577 other.sample.list[[threeSample]] = c(oneSample, twoSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
578
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
579 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
580 patientMerge = merge(patientMerge, patient3, by="merge")
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
581 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
582 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
583 patientMerge = patientMerge[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
584
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
585 duo.merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
586
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
587 patientMerge12 = merge(patient1, patient2, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
588 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
589 patientMerge12 = patientMerge12[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
590 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
591 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
592
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
593 patientMerge13 = merge(patient1, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
594 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
595 patientMerge13 = patientMerge13[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
596 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
597 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
598
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
599 patientMerge23 = merge(patient2, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
600 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
601 patientMerge23 = patientMerge23[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
602 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
603 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
604
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
605 merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
606 merge.list[["second"]] = vector()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
607
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
608 start.time = proc.time()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
609 if(paste(label1, "123") %in% names(patient.merge.list)){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
610 patientMerge = patient.merge.list[[paste(label1, "123")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
611 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
612 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
613 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
614
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
615 merge.list[["second"]] = patient.merge.list.second[[label1]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
616
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
617 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
618 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
619 while(nrow(patient.fuzzy) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
620 first.merge = patient.fuzzy[1,"merge"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
621 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
622 first.sample = patient.fuzzy[1,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
623
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
624 merge.filter = first.merge == patient.fuzzy$merge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
625
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
626 second.sample = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
627 third.sample = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
628
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
629 sample.filter.1 = first.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
630 sample.filter.2 = second.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
631 sample.filter.3 = third.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
632
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
633 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
634
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
635 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
636 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
637 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
638
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
639 matches.in.1 = any(match.filter.1)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
640 matches.in.2 = any(match.filter.2)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
641 matches.in.3 = any(match.filter.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
642
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
643
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
644
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
645 rows.1 = patient.fuzzy[match.filter.1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
646
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
647 sum.1 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
648 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
649 Receptor = rows.1[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
650 Sample = rows.1[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
651 Cell_Count = rows.1[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
652 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
653 Log10_Frequency = log10(sum(rows.1$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
654 Total_Read_Count = sum(rows.1$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
655 dsPerM = sum(rows.1$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
656 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
657 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
658 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
659 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
660 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
661 Frequency = sum(rows.1$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
662 locus_V = rows.1[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
663 locus_J = rows.1[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
664 uniqueID = rows.1[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
665 normalized_read_count = sum(rows.1$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
666 sum.2 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
667 rows.2 = patient.fuzzy[match.filter.2,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
668 if(matches.in.2){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
669 sum.2 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
670 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
671 Receptor = rows.2[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
672 Sample = rows.2[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
673 Cell_Count = rows.2[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
674 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
675 Log10_Frequency = log10(sum(rows.2$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
676 Total_Read_Count = sum(rows.2$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
677 dsPerM = sum(rows.2$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
678 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
679 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
680 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
681 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
682 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
683 Frequency = sum(rows.2$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
684 locus_V = rows.2[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
685 locus_J = rows.2[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
686 uniqueID = rows.2[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
687 normalized_read_count = sum(rows.2$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
688 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
689
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
690 sum.3 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
691 rows.3 = patient.fuzzy[match.filter.3,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
692 if(matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
693 sum.3 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
694 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
695 Receptor = rows.3[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
696 Sample = rows.3[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
697 Cell_Count = rows.3[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
698 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
699 Log10_Frequency = log10(sum(rows.3$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
700 Total_Read_Count = sum(rows.3$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
701 dsPerM = sum(rows.3$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
702 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
703 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
704 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
705 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
706 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
707 Frequency = sum(rows.3$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
708 locus_V = rows.3[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
709 locus_J = rows.3[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
710 uniqueID = rows.3[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
711 normalized_read_count = sum(rows.3$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
712 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
713
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
714 if(matches.in.2 & matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
715 merge.123 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
716 merge.123 = merge(merge.123, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
717 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
718 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
719
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
720 patientMerge = rbind(patientMerge, merge.123)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
721 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
722
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
723 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
724 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
725
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
726 } else if (matches.in.2) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
727 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
728 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
729
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
730 second.sample = sum.2[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
731
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
732 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
733
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
734 merge.12 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
735
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
736 current.merge.list = rbind(current.merge.list, merge.12)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
737 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
738
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
739 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
740
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
741 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
742 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
743
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
744 } else if (matches.in.3) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
745
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
746 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
747 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
748
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
749 second.sample = sum.3[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
750
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
751 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
752
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
753 merge.13 = merge(sum.1, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
754
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
755 current.merge.list = rbind(current.merge.list, merge.13)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
756 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
757
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
758 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
759
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
760 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
761 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
762
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
763 } else if(nrow(rows.1) > 1){
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
764 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
765 print(names(patient1)[names(patient1) %in% sum.1])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
766 print(names(patient1)[!(names(patient1) %in% sum.1)])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
767 print(names(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
768 print(names(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
769 print(summary(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
770 print(summary(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
771 print(dim(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
772 print(dim(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
773 print(head(sum.1[,names(patient1)]))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
774 patient1 = rbind(patient1, sum.1[,names(patient1)])
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
775 patient.fuzzy = patient.fuzzy[-match.filter.1,]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
776 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
777 patient.fuzzy = patient.fuzzy[-1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
778 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
779
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
780 tmp.rows = rbind(rows.1, rows.2, rows.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
781 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
782
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
783 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
784 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
785 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
786 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
787
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
788 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
789 patient.merge.list[[paste(label1, "123")]] = patientMerge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
790
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
791 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
792 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
793 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
794
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
795 patient.merge.list[[paste(label1, "12")]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
796 patient.merge.list[[paste(label1, "13")]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
797 patient.merge.list[[paste(label1, "23")]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
798
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
799 patient.merge.list.second[[label1]] = merge.list[["second"]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
800 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
801 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
802 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
803 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
804 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
805 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
806
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
807 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
808 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
809 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
810 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
811
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
812 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
813 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
814 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
62
b1ad6f515338 Uploaded
davidvanzessen
parents: 61
diff changeset
815
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
816 if(F){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
817 patientMerge = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
818 patientMerge = merge(patientMerge, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
819 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
820 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
821 patientMerge12 = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
822 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
823 patientMerge13 = merge(patient1, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
824 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
825 patientMerge23 = merge(patient2, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
826 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
827 }
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
828
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
829 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
830 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
831 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
832 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
833
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
834 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
835 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
836 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
837 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
838 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
839 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
840 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
841 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
842 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
843 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
844
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
845 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
846 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
847 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
848 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
849 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
850 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
851 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
852 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
853 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
854
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
855 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
856 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
857 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
858
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
859 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
860 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
861 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
862
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
863 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
864 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
865 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
866 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
867 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
868 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
869 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
870 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
871 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
872 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
873 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
874 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
875 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
876 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
877 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
878 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
879 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
880 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
881 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
882 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
883 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
884 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
885 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
886 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
887 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
888 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
889 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
890 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
891 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
892 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
893 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
894 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
895 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
896 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
897 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
898 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
899 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
900 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
901 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
902 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
903 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
904 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
905 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
906 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
907 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
908 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
909 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
910 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
911 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
912 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
913 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
914 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
915 if(sum(in_two) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
916 scatterplot_locus_data[in_two,]$type = "In two"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
917 }
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
918 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
919 if(sum(in_three)> 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
920 scatterplot_locus_data[in_three,]$type = "In three"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
921 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
922 not_in_one = scatterplot_locus_data$type != "In one"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
923 if(sum(not_in_one) > 0){
70
9643b1fd9c45 Uploaded
davidvanzessen
parents: 69
diff changeset
924 #scatterplot_locus_data[not_in_one,]$type = "In multiple"
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
925 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
926 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
927 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
928 if(on == "normalized_read_count"){
70
9643b1fd9c45 Uploaded
davidvanzessen
parents: 69
diff changeset
929 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
930 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
931 } else {
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
932 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
933 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
934 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
935 p = p + geom_point(aes(colour=type), position="jitter")
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
936 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
937 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
938 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
939 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
940 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
941 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
942 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
943 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
944 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
945 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
946 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
947 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
948 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
949 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
950 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
951 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
952 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
953 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
954 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
955 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
956 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
957 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
958 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
959
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
960 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
961 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
962 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
963 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
964
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
965 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
966 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
967
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
968 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
969 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
970 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
971 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
972 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
973 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
974 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
975 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
976 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
977
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
978 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
979
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
980 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
981 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
982 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
983 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
984 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
985 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
986 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
987 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
988 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
989 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
990 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
991 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
992 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
993 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
994 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
995 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
996
70
9643b1fd9c45 Uploaded
davidvanzessen
parents: 69
diff changeset
997 if(nrow(triplets) != 0){
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
998
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
999 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1000
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1001 triplets$uniqueID = "ID"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1002
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1003 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1004 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1005 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1006
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1007 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1008 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1009 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1010
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1011 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1012
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1013 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1014
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1015 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1016 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1017 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1018
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1019 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1020 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1021
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1022 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1023
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1024 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1025
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1026 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1027
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1028 triplets = triplets[triplets$normalized_read_count >= min_cells,]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1029
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1030 column_drops = c("min_cell_count", "min_cell_paste")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1031
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1032 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1033
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1034 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1035
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1036 interval = intervalReads
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1037 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1038 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1039
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1040 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1041 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1042 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1043 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1044
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1045 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1046 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1047 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1048 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1049
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1050 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1051 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1052 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1053 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1054
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1055 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1056
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1057 interval = intervalFreq
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1058 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1059 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1060
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1061 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1062 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1063 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1064 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1065
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1066 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1067 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1068 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1069 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1070
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1071 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1072 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1073 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1074 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1075 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1076 cat("", file="triplets.txt")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1077 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
1078 cat("</table></html>", file=logfile, append=T)