annotate RScript.r @ 8:fa240d1c57a9 draft

Uploaded
author davidvanzessen
date Thu, 18 Sep 2014 10:28:24 -0400
parents 68c6c7624ffc
children 58a28427930e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
10
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
11 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
16 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
18 dat = read.csv(inFile, sep="\t")
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
19 #dat = data.frame(fread(inFile)) #faster but with a dep
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
20 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
21 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
22 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
23 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
24
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
25 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
26 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
27
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
28 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
29
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
30 cat("<tr><td>Normalizing cell count to 1.000.000</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
31 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
32 dat = dat[dat$normalized_read_count >= min_cells,]
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
33 dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
34 cat("<tr><td>Removing duplicates</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
35 dat = dat[!duplicated(dat$paste),]
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
36 patients = split(dat, dat$Patient, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
37 intervalReads = rev(c(0,10,25,50,100,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
38 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
39 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
40 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
41 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
42 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
43 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
44
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
45 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
46 x$Sample = factor(x$Sample, levels=unique(x$Sample))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
47 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
48 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
49 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
50 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
51 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
52 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
53 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
54 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
55 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
56 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
57 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
58 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
59 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
60
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
67 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
68 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
69 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
70
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
71 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
72 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
73 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
74 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
75 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
76 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
77 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
78 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
79 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
82 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 }
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
84 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
85 patientMerge = merge(patient1, patient2, by="Clone_Sequence")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
87 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
88 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
89 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
90 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
91 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
92 locussum2 = vector()
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
101 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.x))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
102 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.y))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
104 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
105 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
106 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
107 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 if(sum(one) > 0){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
111 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
112 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
113 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 if(sum(two) > 0){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
117 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
118 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
119 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
120 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
121 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
122 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
123 if(sum(both) > 0){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
124 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
125 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
126 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
127 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
128 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
129 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
130 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
131 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
132 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
133 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
134 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
135 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
136 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
137 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
138 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
139 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
140
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
141 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
142 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
143
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
144 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
145 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
146 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
147 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
148 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
149 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
150 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
151 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
152 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
153 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
154 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
155 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
156 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
157 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
158 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
159 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
160 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
161 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
162 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
163
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
164 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
165 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
166 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
167 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
168 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
169 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
170 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
171 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
172 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
173 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
174 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
175 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
176 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
177 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
178
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
179 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
180
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
181 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
182 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
183 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
184 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
185
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
186 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
187
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
188 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
189 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
190 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
191 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
192
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
193 cat("</table></html>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
194
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
195
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
196 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
197 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
198 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
199 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
200 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
201 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
202
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
203 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
204 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
205 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
206 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
207 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
208 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
209 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
210 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
211 threeSample = paste(patient3[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
212
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
213 patientMerge = merge(patient1, patient2, by="Clone_Sequence")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
214 patientMerge = merge(patientMerge, patient3, by="Clone_Sequence")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
215 colnames(patientMerge)[32:length(colnames(patientMerge))] = paste(colnames(patientMerge)[32:length(colnames(patientMerge))], ".z", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
216 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
217 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
218 res3 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
219 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
220 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
221 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
222 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
223
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
224 if(appendTriplets){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
225 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
226 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
227 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
228 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
229 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
230 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
231 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
232 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
233 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
234 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
235
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
236 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
237 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
238 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
239 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
240 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
241 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
242 resAll = append(resAll, sum(all))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
243 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
244 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
245 if(sum(one) > 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
246 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
247 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
248 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
249 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
250 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
251 if(sum(two) > 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
252 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
253 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
254 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
255 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
256 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
257 if(sum(three) > 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
258 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
259 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
260 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
261 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
262 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
263 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
264 if(sum(all) > 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
265 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
266 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
267 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
268 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
269 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
270 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
271 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
272 colnames(patientResult)[6] = oneSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
273 colnames(patientResult)[8] = twoSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
274 colnames(patientResult)[10] = threeSample
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
275
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
276 colnamesBak = colnames(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
277 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample), paste("Normalized Read Count", twoSample), paste("Number of sequences", threeSample), paste("Normalized Read Count", threeSample))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
278 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
279 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
280
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
281 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
282 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
283
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
284 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
285 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
286 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
287 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
288 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
289 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
290 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
291 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
292 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
293
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
294 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
295
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
296 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
297 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
298 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
299 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
300 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
301 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
302 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
303 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
304 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
305 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
306 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
307 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
308 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
309 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
310 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
311 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
312
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
313 interval = intervalReads
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
314 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
315 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
316
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
317 one = dat[dat$Patient == "VanDongen_cALL_14696.1",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
318 two = dat[dat$Patient == "VanDongen_cALL_14696.2",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
319 three = dat[dat$Patient == "VanDongen_cALL_14696.3",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
320 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
321
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
322 one = dat[dat$Sample == "16278_Left",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
323 two = dat[dat$Sample == "26402_Left",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
324 three = dat[dat$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
325 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
326
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
327 one = dat[dat$Sample == "16278_Right",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
328 two = dat[dat$Sample == "26402_Right",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
329 three = dat[dat$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
330 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
331
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
332
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
333 interval = intervalFreq
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
334 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
335 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
336
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
337 one = dat[dat$Patient == "VanDongen_cALL_14696.1",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
338 two = dat[dat$Patient == "VanDongen_cALL_14696.2",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
339 three = dat[dat$Patient == "VanDongen_cALL_14696.3",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
340 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
341
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
342 one = dat[dat$Sample == "16278_Left",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
343 two = dat[dat$Sample == "26402_Left",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
344 three = dat[dat$Sample == "26759_Left",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
345 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
346
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
347 one = dat[dat$Sample == "16278_Right",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
348 two = dat[dat$Sample == "26402_Right",]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
349 three = dat[dat$Sample == "26759_Right",]
8
fa240d1c57a9 Uploaded
davidvanzessen
parents: 7
diff changeset
350 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
351
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
352
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
353