comparison RScript.r @ 6:f9a657db7af5 draft

Uploaded
author davidvanzessen
date Wed, 16 Oct 2013 04:30:42 -0400
parents 021d293121bb
children ba6c6725b12f
comparison
equal deleted inserted replaced
5:021d293121bb 6:f9a657db7af5
31 31
32 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] 32 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ]
33 33
34 PRODF = PROD[ -1] 34 PRODF = PROD[ -1]
35 35
36 #unique(PRODF[duplicated(PRODF),])
37 #length(row.names(PRODF[duplicated(PRODF),]))
38
39 #length(row.names(PRODF))
40 PRODF = unique(PRODF) 36 PRODF = unique(PRODF)
41 #length(row.names(PRODF)) 37
38 uniqueCount = split(PRODF, f=PRODF[,"Sample"])
39
40 for(i in 1:length(uniqueCount)) {
41 dat = data.frame(uniqueCount[i])
42 sample = paste(unique(dat[,15]))
43 uniqueCount[sample] = length(dat[,1])
44 }
42 45
43 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) 46 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3))
44 PRODFV$Length = as.numeric(PRODFV$Length) 47 PRODFV$Length = as.numeric(PRODFV$Length)
45 Total = 0 48 Total = 0
46 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) 49 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length)))
98 pD 101 pD
99 dev.off(); 102 dev.off();
100 103
101 pJ = ggplot(PRODFJ) 104 pJ = ggplot(PRODFJ)
102 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 105 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
103 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") 106 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage")
104 107
105 png("JPlot.png",width = 800, height = 600) 108 png("JPlot.png",width = 800, height = 600)
106 pJ 109 pJ
107 dev.off(); 110 dev.off();
108 111
109 112
110 plotVD <- function(dat){ 113 plotVD <- function(dat){
111 #dat = dat[order(dat[,8],dat[,9]),]
112 img = ggplot() + 114 img = ggplot() +
113 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + 115 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) +
114 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 116 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
115 scale_fill_gradient(low="gold", high="blue", na.value="white") + 117 scale_fill_gradient(low="gold", high="blue", na.value="white") +
116 ggtitle(unique(dat$Sample)) + 118 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) +
117 xlab("D genes") + 119 xlab("D genes") +
118 ylab("V Genes") 120 ylab("V Genes")
119 121
120 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) 122 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name)))
121 print(img) 123 print(img)
137 lapply(l, FUN=plotVD) 139 lapply(l, FUN=plotVD)
138 140
139 141
140 142
141 plotVJ <- function(dat){ 143 plotVJ <- function(dat){
142 #dat = dat[order(dat[,8],dat[,9]),]
143 img = ggplot() + 144 img = ggplot() +
144 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + 145 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) +
145 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 146 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
146 scale_fill_gradient(low="gold", high="blue", na.value="white") + 147 scale_fill_gradient(low="gold", high="blue", na.value="white") +
147 ggtitle(unique(dat$Sample)) + 148 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) +
148 xlab("J genes") + 149 xlab("J genes") +
149 ylab("V Genes") 150 ylab("V Genes")
150 151
151 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) 152 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name)))
152 print(img) 153 print(img)
164 #completeVJ$log[is.na(completeVJ$log)] = 0 165 #completeVJ$log[is.na(completeVJ$log)] = 0
165 l = split(completeVJ, f=completeVJ[,"Sample"]) 166 l = split(completeVJ, f=completeVJ[,"Sample"])
166 lapply(l, FUN=plotVJ) 167 lapply(l, FUN=plotVJ)
167 168
168 plotDJ <- function(dat){ 169 plotDJ <- function(dat){
169 #dat = dat[order(dat[,8],dat[,9]),]
170 img = ggplot() + 170 img = ggplot() +
171 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + 171 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) +
172 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 172 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
173 scale_fill_gradient(low="gold", high="blue", na.value="white") + 173 scale_fill_gradient(low="gold", high="blue", na.value="white") +
174 ggtitle(unique(dat$Sample)) + 174 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) +
175 xlab("J genes") + 175 xlab("J genes") +
176 ylab("D Genes") 176 ylab("D Genes")
177 177
178 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) 178 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name)))
179 print(img) 179 print(img)