annotate RScript.r @ 11:bc4612998d50 draft

Uploaded
author davidvanzessen
date Mon, 06 Oct 2014 05:57:55 -0400
parents 974febc99fd4
children eb5b569b44dd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
10
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
11 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
16 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
18 dat = read.table(inFile, header=T, sep="\t", dec=",", fill=T, stringsAsFactors=F)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
19 dat = dat[!is.na(dat$Patient),]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
20
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
21 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
22 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
23 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
24 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
25
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
26 str(dat)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
27 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T)
11
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
28 #dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
29
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
30 most.common = function(x){
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
31 ux = unique(x)
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
32 if(length(ux) > 1){
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
33 xtdf = data.frame(table(x))
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
34 return(xtdf$Var1[which.max(xtdf$Freq)])
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
35 #print(xtdf)
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
36 }
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
37 return(unique(x))
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
38 }
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
39
bc4612998d50 Uploaded
davidvanzessen
parents: 10
diff changeset
40 dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")])
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
41
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
42 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
43 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
44
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
45 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
46
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
47 cat("<tr><td>Normalizing cell count to 1.000.000</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
48 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
49 dat = dat[dat$normalized_read_count >= min_cells,]
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
50 dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
51 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
52
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
53 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
54 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
55 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
56 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
57 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
58 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
59 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
60 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 x$Sample = factor(x$Sample, levels=unique(x$Sample))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
67 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
68 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
69 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
70 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
71 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
72 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
73 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
74 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
75 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
76 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
77
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
78 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
79 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
84 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
85 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
87
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
88 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
89 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
90 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
91 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
92 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
99 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 }
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
101 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
102
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
103 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
104 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
105
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
106 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
107 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
111 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
112 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
113 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
114
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
115 print(patient)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
117 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
119 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
120 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
121 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold)
10
974febc99fd4 Uploaded
davidvanzessen
parents: 9
diff changeset
122 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence.x))
974febc99fd4 Uploaded
davidvanzessen
parents: 9
diff changeset
123 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence.x))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
124 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.x))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
125 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.y))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
126 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
127 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
128 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
129 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
130 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
131 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
132 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
133 if(sum(one) > 0){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
134 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
135 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
136 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
137 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
138 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
139 if(sum(two) > 0){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
140 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
141 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
142 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
143 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
144 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
145 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
146 if(sum(both) > 0){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
147 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
148 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3 Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
149 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
150 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
151 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
152 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
153 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
154 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
155 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
156 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
157 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
158 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
159 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
160 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
161 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
162 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
163
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
164 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
165 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
166
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
167 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
168 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
169 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
170 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
171 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
172 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
173 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
174 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
175 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
176 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
177 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
178 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
179 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
180 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
181 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
182 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
183 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
184 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
185 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
186
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
187 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
188 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
189 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
190 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
191 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
192 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
193 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
194 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
195 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
196 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
197 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
198 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
199 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
200 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
201
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
202 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
203
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
204 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
205 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
206 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
207 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
208
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
209 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
210
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
211 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
212 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
213 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
214 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
215
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
216 cat("</table></html>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
217
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
218
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
219 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
220 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
221 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
222 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
223 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
224 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
225
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
226 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
227 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
228 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
229 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
230 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
231 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
232 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
233 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
234 threeSample = paste(patient3[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
235
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
236 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
237 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
238 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
239
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
240 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
241 patientMerge = merge(patientMerge, patient3, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
242 colnames(patientMerge)[28:length(colnames(patientMerge))] = paste(colnames(patientMerge)[28:length(colnames(patientMerge))], ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
243 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
244 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
245 res3 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
246 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
247 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
248 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
249 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
250
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
251 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
252 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
253 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
254 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
255 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
256 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
257 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
258 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold)
10
974febc99fd4 Uploaded
davidvanzessen
parents: 9
diff changeset
259 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
974febc99fd4 Uploaded
davidvanzessen
parents: 9
diff changeset
260 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
974febc99fd4 Uploaded
davidvanzessen
parents: 9
diff changeset
261 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
262
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
263 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
264 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
265 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
266 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
267 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
268 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
269 resAll = append(resAll, sum(all))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
270 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
271 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
272 if(sum(one) > 0){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
273 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
274 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
275 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
276 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
277 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
278 if(sum(two) > 0){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
279 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
280 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
281 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
282 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
283 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
284 if(sum(three) > 0){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
285 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
286 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
287 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
288 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
289 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
290 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
291 if(sum(all) > 0){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
292 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
293 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
294 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
295 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
296 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
297 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
298 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
299 colnames(patientResult)[6] = oneSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
300 colnames(patientResult)[8] = twoSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
301 colnames(patientResult)[10] = threeSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
302
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
303 colnamesBak = colnames(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
304 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample), paste("Normalized Read Count", twoSample), paste("Number of sequences", threeSample), paste("Normalized Read Count", threeSample))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
305 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
306 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
307
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
308 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
309 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
310
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
311 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
312 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
313 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
314 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
315 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
316 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
317 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
318 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
319 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
320
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
321 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
322
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
323 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
324 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
325 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
326 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
327 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
328 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
329 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
330 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
331 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
332 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
333 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
334 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
335 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
336 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
337 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
338 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
339
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
340
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
341 triplets$uniqueID = "ID"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
342
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
343 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
344 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
345 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
346
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
347 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
348 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
349 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
350
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
351 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
352
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
353 triplets = data.frame(data.table(triplets)[, list(Patient=unique(.SD$uniqueID), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
354
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
355 triplets$Frequency = (10^as.numeric(triplets$Log10_Frequency))*100
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
356 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * 1000000 / 2)
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
357
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
358 interval = intervalReads
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
359 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
360 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
361
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
362 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
363 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
364 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
365 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
366
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
367 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
368 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
369 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
370 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
371
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
372 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
373 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
374 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
375 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
376
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
377
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
378 interval = intervalFreq
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
379 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
380 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
381
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
382 one = triplets[triplets$Sample == "14696_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
383 two = triplets[triplets$Sample == "24536_reg_BM",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
384 three = triplets[triplets$Sample == "24062_reg_BM",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
385 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
386
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
387 one = triplets[triplets$Sample == "16278_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
388 two = triplets[triplets$Sample == "26402_Left",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
389 three = triplets[triplets$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
390 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
391
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
392 one = triplets[triplets$Sample == "16278_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
393 two = triplets[triplets$Sample == "26402_Right",]
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
394 three = triplets[triplets$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
395 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)