comparison mutation_analysis.r @ 4:069419cccba4 draft

Uploaded
author davidvanzessen
date Mon, 22 Sep 2014 10:19:36 -0400
parents a0b27058dcac
children cb7c65e3e43f
comparison
equal deleted inserted replaced
3:a0b27058dcac 4:069419cccba4
1 args <- commandArgs(trailingOnly = TRUE) 1 args <- commandArgs(trailingOnly = TRUE)
2 2
3 input = args[1] 3 input = args[1]
4 summaryinput = args[2] 4 genes = unlist(strsplit(args[2], ","))
5 outputdir = args[3] 5 outputdir = args[3]
6 setwd(outputdir) 6 setwd(outputdir)
7 7
8 #dat = read.table("NWK276_MID6_25NT/8_V-REGION-nt-mutation-statistics_NWK276_MID6_25NT_051113.txt", header=T, sep="\t", fill=T, stringsAsFactors=F)
9 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) 8 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F)
10 9
11 datSum = read.table(summaryinput, header=T, sep="\t", fill=T, stringsAsFactors=F)
12 datSum = datSum[,c("Sequence.ID","J.GENE.and.allele", "AA.JUNCTION")]
13
14 dat = merge(dat, datSum, by="Sequence.ID", all.x=T)
15
16 if(length(dat$Sequence.ID) == 0){ 10 if(length(dat$Sequence.ID) == 0){
17 setwd(outputdir) 11 setwd(outputdir)
18 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) 12 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
19 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") 13 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)")
20 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) 14 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
21 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) 15 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4))
22 row.names(transitionTable) = c("A", "C", "G", "T") 16 row.names(transitionTable) = c("A", "C", "G", "T")
23 transitionTable["A","A"] = NA 17 transitionTable["A","A"] = NA
24 transitionTable["C","C"] = NA 18 transitionTable["C","C"] = NA
25 transitionTable["G","G"] = NA 19 transitionTable["G","G"] = NA
26 transitionTable["T","T"] = NA 20 transitionTable["T","T"] = NA
27 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) 21 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
28 cat("0", file="n.txt") 22 cat("0", file="n.txt")
29 stop("No data") 23 stop("No data")
30 } 24 }
31 25
32 #print(paste("After filtering on 'productive' and deduplicating based on V and AA JUNCTION there are", length(dat$Sequence.ID), "sequences left")) 26
33
34 result = data.frame(x = 1:5, y = 1:5, z = 1:5)
35 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)")
36 27
37 cleanup_columns = c("FR1.IMGT.c.a", 28 cleanup_columns = c("FR1.IMGT.c.a",
38 "FR2.IMGT.g.t", 29 "FR2.IMGT.g.t",
39 "CDR1.IMGT.Nb.of.nucleotides", 30 "CDR1.IMGT.Nb.of.nucleotides",
40 "CDR2.IMGT.t.a", 31 "CDR2.IMGT.t.a",
109 #dat[dat[,col] == "",] = "0" 100 #dat[dat[,col] == "",] = "0"
110 dat[,col] = as.numeric(dat[,col]) 101 dat[,col] = as.numeric(dat[,col])
111 dat[is.na(dat[,col]),] = 0 102 dat[is.na(dat[,col]),] = 0
112 } 103 }
113 104
114 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) 105 dat$VRegionMutations = dat$FR1.IMGT.Nb.of.mutations +
115 dat$VGene = gsub("[*].*", "", dat$VGene)
116 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
117 dat$JGene = gsub("[*].*", "", dat$JGene)
118
119 dat$past = paste(dat$AA.JUNCTION, dat$VGene, dat$JGene, (dat$FR1.IMGT.Nb.of.mutations + dat$CDR1.IMGT.Nb.of.mutations + dat$FR2.IMGT.Nb.of.mutations + dat$CDR2.IMGT.Nb.of.mutations + dat$FR3.IMGT.Nb.of.mutations))
120
121 dat = dat[!duplicated(dat$past), ]
122
123 VRegionMutations = sum(dat$FR1.IMGT.Nb.of.mutations +
124 dat$CDR1.IMGT.Nb.of.mutations + 106 dat$CDR1.IMGT.Nb.of.mutations +
125 dat$FR2.IMGT.Nb.of.mutations + 107 dat$FR2.IMGT.Nb.of.mutations +
126 dat$CDR2.IMGT.Nb.of.mutations + 108 dat$CDR2.IMGT.Nb.of.mutations +
127 dat$FR3.IMGT.Nb.of.mutations) 109 dat$FR3.IMGT.Nb.of.mutations
128 110
129 VRegionNucleotides = sum( dat$FR1.IMGT.Nb.of.nucleotides + 111 dat$VRegionNucleotides = dat$FR1.IMGT.Nb.of.nucleotides +
130 dat$CDR1.IMGT.Nb.of.nucleotides + 112 dat$CDR1.IMGT.Nb.of.nucleotides +
131 dat$FR2.IMGT.Nb.of.nucleotides + 113 dat$FR2.IMGT.Nb.of.nucleotides +
132 dat$CDR2.IMGT.Nb.of.nucleotides + 114 dat$CDR2.IMGT.Nb.of.nucleotides +
133 dat$FR3.IMGT.Nb.of.nucleotides) 115 dat$FR3.IMGT.Nb.of.nucleotides
134 116
135 result[1,"x"] = VRegionMutations 117 dat$transitionMutations = dat$FR1.IMGT.a.g +
136 result[1,"y"] = VRegionNucleotides
137 result[1,"z"] = round(result[1,"x"] / result[1,"y"] * 100, digits=1)
138
139 transitionMutations = sum(dat$FR1.IMGT.a.g +
140 dat$FR1.IMGT.g.a + 118 dat$FR1.IMGT.g.a +
141 dat$FR1.IMGT.c.t + 119 dat$FR1.IMGT.c.t +
142 dat$FR1.IMGT.t.c + 120 dat$FR1.IMGT.t.c +
143 dat$CDR1.IMGT.a.g + 121 dat$CDR1.IMGT.a.g +
144 dat$CDR1.IMGT.g.a + 122 dat$CDR1.IMGT.g.a +
153 dat$CDR2.IMGT.c.t + 131 dat$CDR2.IMGT.c.t +
154 dat$CDR2.IMGT.t.c + 132 dat$CDR2.IMGT.t.c +
155 dat$FR3.IMGT.a.g + 133 dat$FR3.IMGT.a.g +
156 dat$FR3.IMGT.g.a + 134 dat$FR3.IMGT.g.a +
157 dat$FR3.IMGT.c.t + 135 dat$FR3.IMGT.c.t +
158 dat$FR3.IMGT.t.c) 136 dat$FR3.IMGT.t.c
159 137
160 result[2,"x"] = transitionMutations 138 dat$transversionMutations = dat$FR1.IMGT.a.c +
161 result[2,"y"] = VRegionMutations 139 dat$FR1.IMGT.c.a +
162 result[2,"z"] = round(result[2,"x"] / result[2,"y"] * 100, digits=1) 140 dat$FR1.IMGT.a.t +
163 141 dat$FR1.IMGT.t.a +
164 transversionMutations = sum( dat$FR1.IMGT.a.c + 142 dat$FR1.IMGT.g.c +
165 dat$FR1.IMGT.c.a + 143 dat$FR1.IMGT.c.g +
166 dat$FR1.IMGT.a.t + 144 dat$FR1.IMGT.g.t +
167 dat$FR1.IMGT.t.a + 145 dat$FR1.IMGT.t.g +
168 dat$FR1.IMGT.g.c + 146 dat$CDR1.IMGT.a.c +
169 dat$FR1.IMGT.c.g + 147 dat$CDR1.IMGT.c.a +
170 dat$FR1.IMGT.g.t + 148 dat$CDR1.IMGT.a.t +
171 dat$FR1.IMGT.t.g + 149 dat$CDR1.IMGT.t.a +
172 dat$CDR1.IMGT.a.c + 150 dat$CDR1.IMGT.g.c +
173 dat$CDR1.IMGT.c.a + 151 dat$CDR1.IMGT.c.g +
174 dat$CDR1.IMGT.a.t + 152 dat$CDR1.IMGT.g.t +
175 dat$CDR1.IMGT.t.a + 153 dat$CDR1.IMGT.t.g +
176 dat$CDR1.IMGT.g.c + 154 dat$FR2.IMGT.a.c +
177 dat$CDR1.IMGT.c.g + 155 dat$FR2.IMGT.c.a +
178 dat$CDR1.IMGT.g.t + 156 dat$FR2.IMGT.a.t +
179 dat$CDR1.IMGT.t.g + 157 dat$FR2.IMGT.t.a +
180 dat$FR2.IMGT.a.c + 158 dat$FR2.IMGT.g.c +
181 dat$FR2.IMGT.c.a + 159 dat$FR2.IMGT.c.g +
182 dat$FR2.IMGT.a.t + 160 dat$FR2.IMGT.g.t +
183 dat$FR2.IMGT.t.a + 161 dat$FR2.IMGT.t.g +
184 dat$FR2.IMGT.g.c + 162 dat$CDR2.IMGT.a.c +
185 dat$FR2.IMGT.c.g + 163 dat$CDR2.IMGT.c.a +
186 dat$FR2.IMGT.g.t + 164 dat$CDR2.IMGT.a.t +
187 dat$FR2.IMGT.t.g + 165 dat$CDR2.IMGT.t.a +
188 dat$CDR2.IMGT.a.c + 166 dat$CDR2.IMGT.g.c +
189 dat$CDR2.IMGT.c.a + 167 dat$CDR2.IMGT.c.g +
190 dat$CDR2.IMGT.a.t + 168 dat$CDR2.IMGT.g.t +
191 dat$CDR2.IMGT.t.a + 169 dat$CDR2.IMGT.t.g +
192 dat$CDR2.IMGT.g.c + 170 dat$FR3.IMGT.a.c +
193 dat$CDR2.IMGT.c.g + 171 dat$FR3.IMGT.c.a +
194 dat$CDR2.IMGT.g.t + 172 dat$FR3.IMGT.a.t +
195 dat$CDR2.IMGT.t.g + 173 dat$FR3.IMGT.t.a +
196 dat$FR3.IMGT.a.c + 174 dat$FR3.IMGT.g.c +
197 dat$FR3.IMGT.c.a + 175 dat$FR3.IMGT.c.g +
198 dat$FR3.IMGT.a.t + 176 dat$FR3.IMGT.g.t +
199 dat$FR3.IMGT.t.a + 177 dat$FR3.IMGT.t.g
200 dat$FR3.IMGT.g.c + 178
201 dat$FR3.IMGT.c.g + 179
202 dat$FR3.IMGT.g.t + 180 dat$transitionMutationsAtGC = dat$FR1.IMGT.g.a +
203 dat$FR3.IMGT.t.g)
204
205 result[3,"x"] = transversionMutations
206 result[3,"y"] = VRegionMutations
207 result[3,"z"] = round(result[3,"x"] / result[3,"y"] * 100, digits=1)
208
209 transitionMutationsAtGC = sum(dat$FR1.IMGT.g.a +
210 dat$FR1.IMGT.c.t + 181 dat$FR1.IMGT.c.t +
211 dat$CDR1.IMGT.g.a + 182 dat$CDR1.IMGT.g.a +
212 dat$CDR1.IMGT.c.t + 183 dat$CDR1.IMGT.c.t +
213 dat$FR2.IMGT.g.a + 184 dat$FR2.IMGT.g.a +
214 dat$FR2.IMGT.c.t + 185 dat$FR2.IMGT.c.t +
215 dat$CDR2.IMGT.g.a + 186 dat$CDR2.IMGT.g.a +
216 dat$CDR2.IMGT.c.t + 187 dat$CDR2.IMGT.c.t +
217 dat$FR3.IMGT.g.a + 188 dat$FR3.IMGT.g.a +
218 dat$FR3.IMGT.c.t) 189 dat$FR3.IMGT.c.t
219 190
220 totalMutationsAtGC = sum(dat$FR1.IMGT.g.a + 191 dat$totalMutationsAtGC = dat$FR1.IMGT.g.a +
221 dat$FR1.IMGT.c.t + 192 dat$FR1.IMGT.c.t +
222 dat$FR1.IMGT.c.a + 193 dat$FR1.IMGT.c.a +
223 dat$FR1.IMGT.g.c + 194 dat$FR1.IMGT.g.c +
224 dat$FR1.IMGT.c.g + 195 dat$FR1.IMGT.c.g +
225 dat$FR1.IMGT.g.t + 196 dat$FR1.IMGT.g.t +
244 dat$FR3.IMGT.g.a + 215 dat$FR3.IMGT.g.a +
245 dat$FR3.IMGT.c.t + 216 dat$FR3.IMGT.c.t +
246 dat$FR3.IMGT.c.a + 217 dat$FR3.IMGT.c.a +
247 dat$FR3.IMGT.g.c + 218 dat$FR3.IMGT.g.c +
248 dat$FR3.IMGT.c.g + 219 dat$FR3.IMGT.c.g +
249 dat$FR3.IMGT.g.t) 220 dat$FR3.IMGT.g.t
250 221
251 result[4,"x"] = transitionMutationsAtGC 222
252 result[4,"y"] = totalMutationsAtGC 223
253 result[4,"z"] = round(result[4,"x"] / result[4,"y"] * 100, digits=1) 224 setwd(outputdir)
254 225
255 result[5,"x"] = totalMutationsAtGC 226 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=5)
256 result[5,"y"] = VRegionMutations 227 for(i in 1:length(genes)){
257 result[5,"z"] = round(result[5,"x"] / result[5,"y"] * 100, digits=1) 228 gene = genes[i]
258 229 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
259 230 if(gene == "."){
260 #transitiontable 231 tmp = dat
232 }
233 if(length(tmp) == 0){
234 cat("0", file=paste(gene, "_value.txt" ,sep=""))
235 next
236 }
237 j = i - 1
238 x = (j * 3) + 1
239 y = (j * 3) + 2
240 z = (j * 3) + 3
241 matrx[1,x] = sum(tmp$VRegionMutations)
242 matrx[1,y] = sum(tmp$VRegionNucleotides)
243 matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1)
244 matrx[2,x] = sum(tmp$transitionMutations)
245 matrx[2,y] = sum(tmp$VRegionMutations)
246 matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
247 matrx[3,x] = sum(tmp$transversionMutations)
248 matrx[3,y] = sum(tmp$VRegionMutations)
249 matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
250 matrx[4,x] = sum(tmp$transitionMutationsAtGC)
251 matrx[4,y] = sum(tmp$totalMutationsAtGC)
252 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
253 matrx[5,x] = sum(tmp$totalMutationsAtGC)
254 matrx[5,y] = sum(tmp$VRegionMutations)
255 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
256
257 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4)
258 row.names(transitionTable) = c("A", "C", "G", "T")
259 transitionTable["A","A"] = NA
260 transitionTable["C","C"] = NA
261 transitionTable["G","G"] = NA
262 transitionTable["T","T"] = NA
263 nts = c("a", "c", "g", "t")
264
265
266 for(nt1 in nts){
267 for(nt2 in nts){
268 if(nt1 == nt2){
269 next
270 }
271 NT1 = LETTERS[letters == nt1]
272 NT2 = LETTERS[letters == nt2]
273 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="")
274 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
275 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
276 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
277 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
278 transitionTable[NT1,NT2] = sum( tmp[,FR1] +
279 tmp[,CDR1] +
280 tmp[,FR2] +
281 tmp[,CDR2] +
282 tmp[,FR3])
283 }
284 }
285 write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
286 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
287
288 cat(matrx[1,x], file=paste(gene, "_value.txt" ,sep=""))
289 cat(length(tmp$Sequence.ID), file=paste(gene, "_n.txt" ,sep=""))
290 }
291
292 #again for all of the data
293 tmp = dat
294 j = i
295 x = (j * 3) + 1
296 y = (j * 3) + 2
297 z = (j * 3) + 3
298 matrx[1,x] = sum(tmp$VRegionMutations)
299 matrx[1,y] = sum(tmp$VRegionNucleotides)
300 matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1)
301 matrx[2,x] = sum(tmp$transitionMutations)
302 matrx[2,y] = sum(tmp$VRegionMutations)
303 matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
304 matrx[3,x] = sum(tmp$transversionMutations)
305 matrx[3,y] = sum(tmp$VRegionMutations)
306 matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
307 matrx[4,x] = sum(tmp$transitionMutationsAtGC)
308 matrx[4,y] = sum(tmp$totalMutationsAtGC)
309 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
310 matrx[5,x] = sum(tmp$totalMutationsAtGC)
311 matrx[5,y] = sum(tmp$VRegionMutations)
312 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
261 313
262 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) 314 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4)
263 row.names(transitionTable) = c("A", "C", "G", "T") 315 row.names(transitionTable) = c("A", "C", "G", "T")
264 transitionTable["A","A"] = NA 316 transitionTable["A","A"] = NA
265 transitionTable["C","C"] = NA 317 transitionTable["C","C"] = NA
267 transitionTable["T","T"] = NA 319 transitionTable["T","T"] = NA
268 nts = c("a", "c", "g", "t") 320 nts = c("a", "c", "g", "t")
269 321
270 322
271 for(nt1 in nts){ 323 for(nt1 in nts){
272 for(nt2 in nts){ 324 for(nt2 in nts){
273 if(nt1 == nt2){ 325 if(nt1 == nt2){
274 next 326 next
275 } 327 }
276 NT1 = LETTERS[letters == nt1] 328 NT1 = LETTERS[letters == nt1]
277 NT2 = LETTERS[letters == nt2] 329 NT2 = LETTERS[letters == nt2]
278 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") 330 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="")
279 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") 331 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
280 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") 332 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
281 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") 333 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
282 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") 334 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
283 transitionTable[NT1,NT2] = sum( dat[,FR1] + 335 transitionTable[NT1,NT2] = sum( tmp[,FR1] +
284 dat[,CDR1] + 336 tmp[,CDR1] +
285 dat[,FR2] + 337 tmp[,FR2] +
286 dat[,CDR2] + 338 tmp[,CDR2] +
287 dat[,FR3]) 339 tmp[,FR3])
288 } 340 }
289 } 341 }
290 342 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
291 setwd(outputdir) 343 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T)
344 cat(matrx[1,x], file="total_value.txt")
345 cat(length(tmp$Sequence.ID), file="total_n.txt")
346
347
348
349 result = data.frame(matrx)
350 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)")
351
292 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) 352 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
293 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) 353
294 cat(length(dat$Sequence.ID), file="n.txt") 354
295 355 if (!("ggplot2" %in% rownames(installed.packages()))) {
296 356 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/")
297 weblogo = dat[,c("Sequence.ID", "VGene")] 357 }
298 weblogo$VGene = gsub("\\*.*", "", weblogo$VGene) 358 library(ggplot2)
299 359
300 rs12 = read.table("HS12RSS.txt", sep="\t", header=TRUE) 360 genesForPlot = gsub("[0-9]", "", dat$best_match)
301 rs23 = read.table("HS23RSS.txt", sep="\t", header=TRUE) 361 genesForPlot = data.frame(table(genesForPlot))
302 362 colnames(genesForPlot) = c("Gene","Freq")
303 result12 = merge(weblogo, rs12, by.x="VGene", by.y="Gene") 363 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
304 result23 = merge(weblogo, rs23, by.x="VGene", by.y="Gene") 364
305 365 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
306 write.table(x=result12$Sequence, file="weblogo_in_rs12.txt", sep=",",quote=F,row.names=F,col.names=F) 366 pc = pc + geom_bar(width = 1, stat = "identity")
307 write.table(x=result23$Sequence, file="weblogo_in_rs23.txt", sep=",",quote=F,row.names=F,col.names=F) 367 pc = pc + coord_polar(theta="y")
308 368 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")"))
369
370 png(filename="all.png")
371 pc
372 dev.off()
373
374
375 #blegh
376 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match
377 if(length(genesForPlot) > 0){
378 genesForPlot = data.frame(table(genesForPlot))
379 colnames(genesForPlot) = c("Gene","Freq")
380 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
381
382 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
383 pc = pc + geom_bar(width = 1, stat = "identity")
384 pc = pc + coord_polar(theta="y")
385 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")"))
386
387
388 png(filename="ca.png")
389 print(pc)
390 dev.off()
391 }
392
393 genesForPlot = dat[grepl("cg", dat$best_match),]$best_match
394 if(length(genesForPlot) > 0){
395 genesForPlot = data.frame(table(genesForPlot))
396 colnames(genesForPlot) = c("Gene","Freq")
397 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
398
399 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
400 pc = pc + geom_bar(width = 1, stat = "identity")
401 pc = pc + coord_polar(theta="y")
402 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG", "( n =", sum(genesForPlot$Freq), ")"))
403
404
405 png(filename="cg.png")
406 print(pc)
407 dev.off()
408 }