annotate RScript.r @ 19:5f7ed60975bd draft

Uploaded
author davidvanzessen
date Fri, 20 Feb 2015 04:14:24 -0500
parents f23d3be6fbc8
children d938aef60589
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
10
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
11 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
16 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
18 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
19 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
20 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
21
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
22 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
23 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
24 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
25 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
26
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
27 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
28
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
29 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
30
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
31 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
32
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
33 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
34
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
35 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
36
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46 dat = merge(dat, min_cell_count, by="min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
48 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
49
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
50 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
51
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
53
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
54 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
55 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
56 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
57 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
58 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
59 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
60 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 x$Sample = factor(x$Sample, levels=unique(x$Sample))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
67 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
68 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
69 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
70 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
71 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
72 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
73 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
74 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
75 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
76 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
77 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
78 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
79 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
84 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
85 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
87 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
88 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
89 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
90
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
91 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
92 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
102 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 }
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
104 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
105
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
106 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
107 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
108 patient1$merge = paste(patient1$Clone_Sequence)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
109 patient2$merge = paste(patient2$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
110
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
111 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
112 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
113 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
117 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
119 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
120 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
121
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
122 print(patient)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
123 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
124 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
125 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
126 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
127 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
128 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
129 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold
19
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
130 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge))
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
131 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
132 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
133 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
134 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
135 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
136 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
137 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
138 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
139 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
140 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
141 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
142 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
143 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
144 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
145 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
146 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
147 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
148 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
149 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
150 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
151 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
152 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
153 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
154 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
155 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
156 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
157 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
158 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
159 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
160 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
161 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
162 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
163 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
164 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
165 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
166 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
167 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
168 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
169 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
170 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
171
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
172 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
173 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
174
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
175 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
176 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
177 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
178 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
179 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
180 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
181 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
182 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
183 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
184 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
185 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
186 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
187 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
188 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
189 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
190 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
191 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
192 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
193 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
194
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
195 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
196 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
197 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
198 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
199 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
200 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
201 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
202 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
203 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
204 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
205 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
206 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
207 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
208 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
209
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
210 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
211
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
212 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
213 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
214 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
215 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
216
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
217 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
218
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
219 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
220 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
221 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
222 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
223
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
224 cat("</table></html>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
225
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
226
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
227
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
228 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
229 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
230 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
231 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
232 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
233 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
234 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
235 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
236 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
237
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
238 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
239 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
240 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
241 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
242 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
243 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
244 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
245 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
246 threeSample = paste(patient3[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
247
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
248 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
249 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
250 #patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
251
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
252 patient1$merge = paste(patient1$Clone_Sequence)
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
253 patient2$merge = paste(patient2$Clone_Sequence)
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
254 patient3$merge = paste(patient3$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
255
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
256 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
257 patientMerge = merge(patientMerge, patient3, by="merge")
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
258 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
260 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
261 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
262 res3 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
263 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
264 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
265 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
266 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
267
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
268 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
269 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
270 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
271 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
272 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
273 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
274 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
275 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
276 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
19
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
277 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge))
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
278 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge))
5f7ed60975bd Uploaded
davidvanzessen
parents: 18
diff changeset
279 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
280
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
281 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
282 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
283 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
284 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
285 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
286 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
287 resAll = append(resAll, sum(all))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
288 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
289 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
290 if(sum(one) > 0){
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
291 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
292 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
293 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
294 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
295 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
296 if(sum(two) > 0){
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
297 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
298 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
299 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
300 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
301 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
302 if(sum(three) > 0){
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
303 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
304 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
305 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
306 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
307 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
308 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
309 if(sum(all) > 0){
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
310 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
311 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
312 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
313 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
314 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
315 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
316 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
317 colnames(patientResult)[6] = oneSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
318 colnames(patientResult)[8] = twoSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
319 colnames(patientResult)[10] = threeSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
320
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
321 colnamesBak = colnames(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
322 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample), paste("Normalized Read Count", twoSample), paste("Number of sequences", threeSample), paste("Normalized Read Count", threeSample))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
323 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
324 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
325
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
326 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
327 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
328
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
329 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
330 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
331 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
332 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
333 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
334 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
335 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
336 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
337 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
338
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
339 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
340
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
341 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
342 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
343 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
344 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
345 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
346 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
347 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
348 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
349 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
350 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
351 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
352 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
353 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
354 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
355 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
356 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
357
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
358 triplets$uniqueID = "ID"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
359
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
360 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
361 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
362 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
363
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
364 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
365 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
366 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
367
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
368 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
369
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
370 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
371 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
372 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
373
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
374 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
375 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
376
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
377 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
378
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
379 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
380
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
381 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
382
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
383 triplets = triplets[triplets$normalized_read_count >= min_cells,]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
384
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
385 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
386
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
387 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
388
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
389 interval = intervalReads
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
390 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
391 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
392
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
393 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
394 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
395 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
396 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
397
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
398 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
399 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
400 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
401 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
402
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
403 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
404 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
405 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
406 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
407
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
408
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
409 interval = intervalFreq
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
410 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
411 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
412
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
413 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
414 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
415 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
416 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
417
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
418 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
419 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
420 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
421 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
422
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
423 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
424 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
425 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
426 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)