comparison RScript.r @ 4:10cfa5e9186e draft

Uploaded
author davidvanzessen
date Mon, 14 Oct 2013 08:12:41 -0400
parents d27d745d0556
children 021d293121bb
comparison
equal deleted inserted replaced
3:d27d745d0556 4:10cfa5e9186e
106 pJ 106 pJ
107 dev.off(); 107 dev.off();
108 108
109 109
110 plotVD <- function(dat){ 110 plotVD <- function(dat){
111 ggplot() + 111 img = ggplot() +
112 geom_tile(data=dat, aes(x=factor(Top.D.Gene), y=factor(Top.V.Gene), fill=log)) + 112 geom_tile(data=dat, aes(x=factor(Top.D.Gene), y=factor(Top.V.Gene), fill=log)) +
113 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 113 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
114 scale_fill_gradient(low="gold", high="blue", na.value="white") + 114 scale_fill_gradient(low="gold", high="blue", na.value="white") +
115 ggtitle(unique(dat$Sample)) + 115 ggtitle(unique(dat$Sample)) +
116 xlab("D genes") + 116 xlab("D genes") +
117 ylab("V Genes") 117 ylab("V Genes")
118
119 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name)))
120 print(img)
121 dev.off()
118 } 122 }
119 123
120 124
121 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) 125 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3))
122 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) 126 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample))
124 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) 128 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE)
125 completeVD$Length = as.numeric(completeVD$Length) 129 completeVD$Length = as.numeric(completeVD$Length)
126 completeVD$log = log(completeVD$Length) 130 completeVD$log = log(completeVD$Length)
127 #completeVD$log[is.na(completeVD$log)] = 0 131 #completeVD$log[is.na(completeVD$log)] = 0
128 l = split(completeVD, f=completeVD[,"Sample"]) 132 l = split(completeVD, f=completeVD[,"Sample"])
129 png("HeatmapVD%d.png", width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) 133
130 lapply(l, FUN=plotVD) 134 lapply(l, FUN=plotVD)
131 dev.off() 135
132 136
133 137
134 plotVJ <- function(dat){ 138 plotVJ <- function(dat){
135 ggplot() + 139 img = ggplot() +
136 geom_tile(data=dat, aes(x=factor(Top.J.Gene), y=factor(Top.V.Gene), fill=log)) + 140 geom_tile(data=dat, aes(x=factor(Top.J.Gene), y=factor(Top.V.Gene), fill=log)) +
137 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 141 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
138 scale_fill_gradient(low="gold", high="blue", na.value="white") + 142 scale_fill_gradient(low="gold", high="blue", na.value="white") +
139 ggtitle(unique(dat$Sample)) + 143 ggtitle(unique(dat$Sample)) +
140 xlab("J genes") + 144 xlab("J genes") +
141 ylab("V Genes") 145 ylab("V Genes")
146
147 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name)))
148 print(img)
149 dev.off()
142 } 150 }
143 151
144 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) 152 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3))
145 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) 153 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample))
146 154
147 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) 155 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE)
148 completeVJ$Length = as.numeric(completeVJ$Length) 156 completeVJ$Length = as.numeric(completeVJ$Length)
149 completeVJ$log = log(completeVJ$Length) 157 completeVJ$log = log(completeVJ$Length)
150 #completeVJ$log[is.na(completeVJ$log)] = 0 158 #completeVJ$log[is.na(completeVJ$log)] = 0
151 l = split(completeVJ, f=completeVJ[,"Sample"]) 159 l = split(completeVJ, f=completeVJ[,"Sample"])
152 png("HeatmapVJ%d.png", width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name)))
153 lapply(l, FUN=plotVJ) 160 lapply(l, FUN=plotVJ)
154 dev.off()
155 161
156 plotDJ <- function(dat){ 162 plotDJ <- function(dat){
157 ggplot() + 163 img = ggplot() +
158 geom_tile(data=dat, aes(x=factor(Top.J.Gene), y=factor(Top.D.Gene), fill=log)) + 164 geom_tile(data=dat, aes(x=factor(Top.J.Gene), y=factor(Top.D.Gene), fill=log)) +
159 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 165 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
160 scale_fill_gradient(low="gold", high="blue", na.value="white") + 166 scale_fill_gradient(low="gold", high="blue", na.value="white") +
161 ggtitle(unique(dat$Sample)) + 167 ggtitle(unique(dat$Sample)) +
162 xlab("J genes") + 168 xlab("J genes") +
163 ylab("D Genes") 169 ylab("D Genes")
170
171 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name)))
172 print(img)
173 dev.off()
164 } 174 }
165 175
166 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) 176 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3))
167 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) 177 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample))
168 178
169 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) 179 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE)
170 completeDJ$Length = as.numeric(completeDJ$Length) 180 completeDJ$Length = as.numeric(completeDJ$Length)
171 completeDJ$log = log(completeDJ$Length) 181 completeDJ$log = log(completeDJ$Length)
172 #completeDJ$log[is.na(completeDJ$log)] = 0 182 #completeDJ$log[is.na(completeDJ$log)] = 0
173 l = split(completeDJ, f=completeDJ[,"Sample"]) 183 l = split(completeDJ, f=completeDJ[,"Sample"])
174 png("HeatmapDJ%d.png", width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name)))
175 lapply(l, FUN=plotDJ) 184 lapply(l, FUN=plotDJ)
176 dev.off()
177 185
178 186
179 sampleFile <- file("samples.txt") 187 sampleFile <- file("samples.txt")
180 un = unique(test$Sample) 188 un = unique(test$Sample)
181 un = paste(un, sep="\n") 189 un = paste(un, sep="\n")