annotate RScript.r @ 24:6904186d13b9 draft

Uploaded
author davidvanzessen
date Tue, 24 Feb 2015 09:12:28 -0500
parents b662fdc7eff4
children 99020e5ce46c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
10
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
11 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
16 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
18 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
19 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
20 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
21
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
22 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
23 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
24 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
25 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
26
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
27 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
28
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
29 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
30
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
31 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
32
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
33 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
34
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
35 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
36
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46 dat = merge(dat, min_cell_count, by="min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
48 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
49
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
50 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
51
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
53
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
54 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
55 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
56 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
57 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
58 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
59 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
60 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 x$Sample = factor(x$Sample, levels=unique(x$Sample))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
67 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
68 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
69 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
70 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
71 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
72 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
73 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
74 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
75 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
76 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
77 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
78 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
79 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
84 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
85 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
87 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
88 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
89 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
90
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
91 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
92 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
102 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 }
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
104 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
105
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
106 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
107 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
108 patient1$merge = paste(patient1$Clone_Sequence)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
109 patient2$merge = paste(patient2$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
110
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
111 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
112 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
113 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
117 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
119 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
120 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
121
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
122 print(patient)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
123 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
124 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
125 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
126 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
127 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
128 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
129 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold
19
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
130 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge))
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
131 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
132 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
133 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
134 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
135 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
136 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
137 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
138 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
139 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
140 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
141 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
142 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
143 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
144 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
145 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
146 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
147 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
148 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
149 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
150 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
151 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
152 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
153 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
154 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
155 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
156 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
157 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
158 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
159 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
160 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
161 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
162 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
163 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
164 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
165 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
166 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
167 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
168 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
169 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
170 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
171
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
172 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
173 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
174
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
175 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
176 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
177 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
178 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
179 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
180 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
181 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
182 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
183 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
184 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
185 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
186 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
187 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
188 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
189 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
190 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
191 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
192 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
193 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
194
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
195 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
196 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
197 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
198 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
199 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
200 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
201 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
202 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
203 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
204 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
205 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
206 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
207 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
208 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
209
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
210 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
211
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
212 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
213 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
214 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
215 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
216
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
217 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
218
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
219 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
220 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
221 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
222 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
223
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
224 cat("</table></html>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
225
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
226
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
227
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
228 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
229 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
230 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
231 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
232 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
233 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
234 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
235 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
236 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
237
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
238 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
239 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
240 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
241 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
242 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
243 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
244 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
245 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
246 threeSample = paste(patient3[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
247
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
248 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
249 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
250 #patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
251
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
252 patient1$merge = paste(patient1$Clone_Sequence)
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
253 patient2$merge = paste(patient2$Clone_Sequence)
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
254 patient3$merge = paste(patient3$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
255
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
256 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
257 patientMerge = merge(patientMerge, patient3, by="merge")
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
258 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
260
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
261 patientMerge12 = merge(patient1, patient2, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
262 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
263 patientMerge13 = merge(patient1, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
264 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
265 patientMerge23 = merge(patient2, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
266 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
267
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
268 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
269 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
270 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
271 scatterplot_data$type = factor(x="single", levels=c("In one", "In two", "In three"))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
272
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
273 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
274 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
275 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
276 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
277 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
278 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
279 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
280 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
281 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
282 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
283
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
284 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
285 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
286 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
287 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
288 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
289 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
290 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
291 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
292 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
293
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
294 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$Clone_Sequence %in% patientMerge[all,]$merge))
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
295 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$Clone_Sequence %in% patientMerge[all,]$merge))
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
296 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$Clone_Sequence %in% patientMerge[all,]$merge))
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
297
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
298 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge) & !(patientMerge12$Clone_Sequence %in% patientMerge[all,]$merge) & !(patientMerge13$Clone_Sequence %in% patientMerge[all,]$merge))
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
299 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge) & !(patientMerge12$Clone_Sequence %in% patientMerge[all,]$merge) & !(patientMerge23$Clone_Sequence %in% patientMerge[all,]$merge))
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
300 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge) & !(patientMerge13$Clone_Sequence %in% patientMerge[all,]$merge) & !(patientMerge23$Clone_Sequence %in% patientMerge[all,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
301
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
302 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
303 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
304 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
305 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
306 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
307 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
308 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
309 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
310 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
311 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
312 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
313 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
314 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
315 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
316 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
317 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
318 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
319 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
320 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
321 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
322 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
323 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
324 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
325 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
326 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
327 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
328 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
329 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
330 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
331 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
332 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
333 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
334 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
335 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
336 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
337 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
338 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
339 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
340 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
341 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
342 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
343 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
344 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
345 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
346 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
347 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
348 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
349 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
350 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
351 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
352 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge12[one_two,]$Clone_Sequence.x, "In two", "In one")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
353 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge13[one_three,]$Clone_Sequence.x, "In two", "In one")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
354 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge23[two_three,]$Clone_Sequence.x, "In two", "In one")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
355 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$type == "In two", ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x, "In three", "In two"), "In one")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
356 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$type == "In one", "In one", "In multiple")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
357
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
358 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
359 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
360 if(on == "normalized_read_count"){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
361 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
362 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
363 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
364 p = ggplot(scatterplot_locus_data, aes(type, Frequency))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
365 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
366 p = p + geom_point(aes(colour=type), position="jitter")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
367 p = p + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
368 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
369 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
370 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
371 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
372 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
373 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
374 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
375 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
376 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
377 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
378 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
379 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
380 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
381 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
382 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
383 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
384 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
385 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
386 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
387 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
388 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
389 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
390
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
391 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
392 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
393 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
394 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
395
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
396 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
397 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
398
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
399 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
400 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
401 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
402 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
403 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
404 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
405 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
406 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
407 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
408
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
409 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
410
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
411 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
412 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
413 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
414 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
415 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
416 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
417 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
418 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
419 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
420 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
421 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
422 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
423 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
424 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
425 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
426 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
427
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
428 triplets$uniqueID = "ID"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
429
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
430 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
431 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
432 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
433
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
434 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
435 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
436 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
437
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
438 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
439
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
440 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
441 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
442 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
443
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
444 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
445 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
446
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
447 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
448
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
449 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
450
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
451 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
452
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
453 triplets = triplets[triplets$normalized_read_count >= min_cells,]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
454
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
455 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
456
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
457 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
458
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
459 interval = intervalReads
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
460 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
461 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
462
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
463 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
464 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
465 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
466 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
467
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
468 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
469 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
470 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
471 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
472
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
473 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
474 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
475 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
476 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
477
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
478
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
479 interval = intervalFreq
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
480 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
481 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
482
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
483 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
484 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
485 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
486 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
487
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
488 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
489 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
490 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
491 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
492
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
493 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
494 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
495 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
496 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)