annotate RScript.r @ 27:dd8518ea23dd draft

Uploaded
author davidvanzessen
date Wed, 25 Feb 2015 09:18:14 -0500
parents f35c127b29b1
children a63ccc36f5a4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
10
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
11 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
16 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
18 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
19 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
20 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
21
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
22 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
23 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
24 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
25 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
26
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
27 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
28
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
29 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
30
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
31 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
32
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
33 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
34
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
35 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
36
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46 dat = merge(dat, min_cell_count, by="min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
48 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
49
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
50 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
51
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
53
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
54 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
55 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
56 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
57 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
58 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
59 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
60 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 x$Sample = factor(x$Sample, levels=unique(x$Sample))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
67 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
68 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
69 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
70 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
71 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
72 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
73 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
74 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
75 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
76 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
77 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
78 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
79 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
84 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
85 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
87 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
88 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
89 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
90
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
91 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
92 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
102 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 }
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
104 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
105
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
106 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
107 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
108 patient1$merge = paste(patient1$Clone_Sequence)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
109 patient2$merge = paste(patient2$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
110
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
111 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
112 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
113 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
117 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
119 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
120 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
121
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
122 print(patient)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
123 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
124 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
125 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
126 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
127 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
128 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
129 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold
19
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
130 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge))
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
131 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
132 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
133 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
134 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
135 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
136 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
137 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
138 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
139 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
140 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
141 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
142 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
143 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
144 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
145 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
146 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
147 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
148 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
149 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
150 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
151 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
152 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
153 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
154 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
155 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
156 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
157 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
158 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
159 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
160 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
161 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
162 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
163 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
164 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
165 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
166 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
167 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
168 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
169 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
170 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
171
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
172 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
173 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
174
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
175 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
176 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
177 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
178 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
179 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
180 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
181 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
182 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
183 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
184 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
185 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
186 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
187 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
188 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
189 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
190 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
191 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
192 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
193 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
194
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
195 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
196 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
197 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
198 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
199 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
200 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
201 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
202 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
203 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
204 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
205 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
206 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
207 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
208 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
209
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
210 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
211
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
212 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
213 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
214 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
215 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
216
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
217 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
218
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
219 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
220 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
221 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
222 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
223
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
224 cat("</table></html>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
225
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
226
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
227
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
228 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
229 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
230 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
231 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
232 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
233 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
234 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
235 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
236 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
237
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
238 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
239 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
240 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
241 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
242 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
243 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
244 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
245 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
246 threeSample = paste(patient3[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
247
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
248 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
249 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
250 #patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
251
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
252 patient1$merge = paste(patient1$Clone_Sequence)
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
253 patient2$merge = paste(patient2$Clone_Sequence)
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
254 patient3$merge = paste(patient3$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
255
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
256 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
257 patientMerge = merge(patientMerge, patient3, by="merge")
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
258 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
260
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
261 patientMerge12 = merge(patient1, patient2, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
262 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
263 patientMerge13 = merge(patient1, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
264 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
265 patientMerge23 = merge(patient2, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
266 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
267
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
268 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
269 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
270 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
271 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
272
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
273 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
274 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
275 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
276 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
277 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
278 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
279 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
280 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
281 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
282 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
283
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
284 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
285 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
286 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
287 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
288 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
289 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
290 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
291 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
292 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
293
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
294 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$Clone_Sequence.x %in% patientMerge[all,]$merge))
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
295 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$Clone_Sequence.x %in% patientMerge[all,]$merge))
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
296 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$Clone_Sequence.x %in% patientMerge[all,]$merge))
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
297
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
298 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient1$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient1$Clone_Sequence %in% patientMerge13[one_three,]$merge))
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
299 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient2$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient2$Clone_Sequence %in% patientMerge23[two_three,]$merge))
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
300 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient3$Clone_Sequence %in% patientMerge13[one_three,]$merge) & !(patient3$Clone_Sequence %in% patientMerge23[two_three,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
301
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
302 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
303 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
304 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
305 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
306 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
307 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
308 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
309 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
310 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
311 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
312 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
313 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
314 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
315 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
316 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
317 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
318 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
319 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
320 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
321 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
322 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
323 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
324 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
325 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
326 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
327 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
328 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
329 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
330 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
331 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
332 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
333 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
334 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
335 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
336 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
337 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
338 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
339 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
340 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
341 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
342 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
343 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
344 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
345 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
346 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
347 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
348 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
349 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
350 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
351 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
352 in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge12[one_two,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge13[one_three,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge23[two_three,]$Clone_Sequence.x)
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
353 if(sum(in_two) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
354 scatterplot_locus_data[in_two,]$type = "In two"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
355 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
356 in_three = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
357 if(sum(in_three)> 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
358 scatterplot_locus_data[in_three,]$type = "In three"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
359 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
360 not_in_one = scatterplot_locus_data$type != "In one"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
361 print(not_in_one)
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
362 if(sum(not_in_one) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
363 scatterplot_locus_data[not_in_one,]$type = "In multiple"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
364 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
365 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
366 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
367 if(on == "normalized_read_count"){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
368 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
369 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
370 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
371 p = ggplot(scatterplot_locus_data, aes(type, Frequency))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
372 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
373 p = p + geom_point(aes(colour=type), position="jitter")
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
374 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
375 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
376 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
377 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
378 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
379 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
380 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
381 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
382 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
383 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
384 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
385 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
386 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
387 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
388 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
389 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
390 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
391 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
392 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
393 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
394 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
395 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
396 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
397
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
398 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
399 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
400 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
401 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
402
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
403 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
404 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
405
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
406 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
407 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
408 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
409 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
410 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
411 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
412 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
413 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
414 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
415
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
416 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
417
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
418 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
419 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
420 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
421 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
422 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
423 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
424 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
425 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
426 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
427 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
428 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
429 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
430 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
431 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
432 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
433 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
434
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
435 triplets$uniqueID = "ID"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
436
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
437 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
438 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
439 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
440
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
441 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
442 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
443 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
444
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
445 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
446
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
447 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
448 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
449 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
450
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
451 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
452 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
453
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
454 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
455
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
456 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
457
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
458 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
459
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
460 triplets = triplets[triplets$normalized_read_count >= min_cells,]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
461
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
462 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
463
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
464 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
465
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
466 interval = intervalReads
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
467 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
468 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
469
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
470 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
471 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
472 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
473 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
474
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
475 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
476 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
477 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
478 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
479
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
480 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
481 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
482 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
483 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
484
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
485
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
486 interval = intervalFreq
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
487 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
488 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
489
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
490 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
491 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
492 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
493 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
494
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
495 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
496 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
497 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
498 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
499
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
500 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
501 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
502 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
503 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)