Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 4:069419cccba4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 22 Sep 2014 10:19:36 -0400 |
parents | a0b27058dcac |
children | cb7c65e3e43f |
comparison
equal
deleted
inserted
replaced
3:a0b27058dcac | 4:069419cccba4 |
---|---|
1 args <- commandArgs(trailingOnly = TRUE) | 1 args <- commandArgs(trailingOnly = TRUE) |
2 | 2 |
3 input = args[1] | 3 input = args[1] |
4 summaryinput = args[2] | 4 genes = unlist(strsplit(args[2], ",")) |
5 outputdir = args[3] | 5 outputdir = args[3] |
6 setwd(outputdir) | 6 setwd(outputdir) |
7 | 7 |
8 #dat = read.table("NWK276_MID6_25NT/8_V-REGION-nt-mutation-statistics_NWK276_MID6_25NT_051113.txt", header=T, sep="\t", fill=T, stringsAsFactors=F) | |
9 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) | 8 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) |
10 | 9 |
11 datSum = read.table(summaryinput, header=T, sep="\t", fill=T, stringsAsFactors=F) | |
12 datSum = datSum[,c("Sequence.ID","J.GENE.and.allele", "AA.JUNCTION")] | |
13 | |
14 dat = merge(dat, datSum, by="Sequence.ID", all.x=T) | |
15 | |
16 if(length(dat$Sequence.ID) == 0){ | 10 if(length(dat$Sequence.ID) == 0){ |
17 setwd(outputdir) | 11 setwd(outputdir) |
18 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) | 12 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) |
19 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") | 13 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") |
20 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 14 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
21 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) | 15 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) |
22 row.names(transitionTable) = c("A", "C", "G", "T") | 16 row.names(transitionTable) = c("A", "C", "G", "T") |
23 transitionTable["A","A"] = NA | 17 transitionTable["A","A"] = NA |
24 transitionTable["C","C"] = NA | 18 transitionTable["C","C"] = NA |
25 transitionTable["G","G"] = NA | 19 transitionTable["G","G"] = NA |
26 transitionTable["T","T"] = NA | 20 transitionTable["T","T"] = NA |
27 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) | 21 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) |
28 cat("0", file="n.txt") | 22 cat("0", file="n.txt") |
29 stop("No data") | 23 stop("No data") |
30 } | 24 } |
31 | 25 |
32 #print(paste("After filtering on 'productive' and deduplicating based on V and AA JUNCTION there are", length(dat$Sequence.ID), "sequences left")) | 26 |
33 | |
34 result = data.frame(x = 1:5, y = 1:5, z = 1:5) | |
35 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") | |
36 | 27 |
37 cleanup_columns = c("FR1.IMGT.c.a", | 28 cleanup_columns = c("FR1.IMGT.c.a", |
38 "FR2.IMGT.g.t", | 29 "FR2.IMGT.g.t", |
39 "CDR1.IMGT.Nb.of.nucleotides", | 30 "CDR1.IMGT.Nb.of.nucleotides", |
40 "CDR2.IMGT.t.a", | 31 "CDR2.IMGT.t.a", |
109 #dat[dat[,col] == "",] = "0" | 100 #dat[dat[,col] == "",] = "0" |
110 dat[,col] = as.numeric(dat[,col]) | 101 dat[,col] = as.numeric(dat[,col]) |
111 dat[is.na(dat[,col]),] = 0 | 102 dat[is.na(dat[,col]),] = 0 |
112 } | 103 } |
113 | 104 |
114 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) | 105 dat$VRegionMutations = dat$FR1.IMGT.Nb.of.mutations + |
115 dat$VGene = gsub("[*].*", "", dat$VGene) | |
116 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) | |
117 dat$JGene = gsub("[*].*", "", dat$JGene) | |
118 | |
119 dat$past = paste(dat$AA.JUNCTION, dat$VGene, dat$JGene, (dat$FR1.IMGT.Nb.of.mutations + dat$CDR1.IMGT.Nb.of.mutations + dat$FR2.IMGT.Nb.of.mutations + dat$CDR2.IMGT.Nb.of.mutations + dat$FR3.IMGT.Nb.of.mutations)) | |
120 | |
121 dat = dat[!duplicated(dat$past), ] | |
122 | |
123 VRegionMutations = sum(dat$FR1.IMGT.Nb.of.mutations + | |
124 dat$CDR1.IMGT.Nb.of.mutations + | 106 dat$CDR1.IMGT.Nb.of.mutations + |
125 dat$FR2.IMGT.Nb.of.mutations + | 107 dat$FR2.IMGT.Nb.of.mutations + |
126 dat$CDR2.IMGT.Nb.of.mutations + | 108 dat$CDR2.IMGT.Nb.of.mutations + |
127 dat$FR3.IMGT.Nb.of.mutations) | 109 dat$FR3.IMGT.Nb.of.mutations |
128 | 110 |
129 VRegionNucleotides = sum( dat$FR1.IMGT.Nb.of.nucleotides + | 111 dat$VRegionNucleotides = dat$FR1.IMGT.Nb.of.nucleotides + |
130 dat$CDR1.IMGT.Nb.of.nucleotides + | 112 dat$CDR1.IMGT.Nb.of.nucleotides + |
131 dat$FR2.IMGT.Nb.of.nucleotides + | 113 dat$FR2.IMGT.Nb.of.nucleotides + |
132 dat$CDR2.IMGT.Nb.of.nucleotides + | 114 dat$CDR2.IMGT.Nb.of.nucleotides + |
133 dat$FR3.IMGT.Nb.of.nucleotides) | 115 dat$FR3.IMGT.Nb.of.nucleotides |
134 | 116 |
135 result[1,"x"] = VRegionMutations | 117 dat$transitionMutations = dat$FR1.IMGT.a.g + |
136 result[1,"y"] = VRegionNucleotides | |
137 result[1,"z"] = round(result[1,"x"] / result[1,"y"] * 100, digits=1) | |
138 | |
139 transitionMutations = sum(dat$FR1.IMGT.a.g + | |
140 dat$FR1.IMGT.g.a + | 118 dat$FR1.IMGT.g.a + |
141 dat$FR1.IMGT.c.t + | 119 dat$FR1.IMGT.c.t + |
142 dat$FR1.IMGT.t.c + | 120 dat$FR1.IMGT.t.c + |
143 dat$CDR1.IMGT.a.g + | 121 dat$CDR1.IMGT.a.g + |
144 dat$CDR1.IMGT.g.a + | 122 dat$CDR1.IMGT.g.a + |
153 dat$CDR2.IMGT.c.t + | 131 dat$CDR2.IMGT.c.t + |
154 dat$CDR2.IMGT.t.c + | 132 dat$CDR2.IMGT.t.c + |
155 dat$FR3.IMGT.a.g + | 133 dat$FR3.IMGT.a.g + |
156 dat$FR3.IMGT.g.a + | 134 dat$FR3.IMGT.g.a + |
157 dat$FR3.IMGT.c.t + | 135 dat$FR3.IMGT.c.t + |
158 dat$FR3.IMGT.t.c) | 136 dat$FR3.IMGT.t.c |
159 | 137 |
160 result[2,"x"] = transitionMutations | 138 dat$transversionMutations = dat$FR1.IMGT.a.c + |
161 result[2,"y"] = VRegionMutations | 139 dat$FR1.IMGT.c.a + |
162 result[2,"z"] = round(result[2,"x"] / result[2,"y"] * 100, digits=1) | 140 dat$FR1.IMGT.a.t + |
163 | 141 dat$FR1.IMGT.t.a + |
164 transversionMutations = sum( dat$FR1.IMGT.a.c + | 142 dat$FR1.IMGT.g.c + |
165 dat$FR1.IMGT.c.a + | 143 dat$FR1.IMGT.c.g + |
166 dat$FR1.IMGT.a.t + | 144 dat$FR1.IMGT.g.t + |
167 dat$FR1.IMGT.t.a + | 145 dat$FR1.IMGT.t.g + |
168 dat$FR1.IMGT.g.c + | 146 dat$CDR1.IMGT.a.c + |
169 dat$FR1.IMGT.c.g + | 147 dat$CDR1.IMGT.c.a + |
170 dat$FR1.IMGT.g.t + | 148 dat$CDR1.IMGT.a.t + |
171 dat$FR1.IMGT.t.g + | 149 dat$CDR1.IMGT.t.a + |
172 dat$CDR1.IMGT.a.c + | 150 dat$CDR1.IMGT.g.c + |
173 dat$CDR1.IMGT.c.a + | 151 dat$CDR1.IMGT.c.g + |
174 dat$CDR1.IMGT.a.t + | 152 dat$CDR1.IMGT.g.t + |
175 dat$CDR1.IMGT.t.a + | 153 dat$CDR1.IMGT.t.g + |
176 dat$CDR1.IMGT.g.c + | 154 dat$FR2.IMGT.a.c + |
177 dat$CDR1.IMGT.c.g + | 155 dat$FR2.IMGT.c.a + |
178 dat$CDR1.IMGT.g.t + | 156 dat$FR2.IMGT.a.t + |
179 dat$CDR1.IMGT.t.g + | 157 dat$FR2.IMGT.t.a + |
180 dat$FR2.IMGT.a.c + | 158 dat$FR2.IMGT.g.c + |
181 dat$FR2.IMGT.c.a + | 159 dat$FR2.IMGT.c.g + |
182 dat$FR2.IMGT.a.t + | 160 dat$FR2.IMGT.g.t + |
183 dat$FR2.IMGT.t.a + | 161 dat$FR2.IMGT.t.g + |
184 dat$FR2.IMGT.g.c + | 162 dat$CDR2.IMGT.a.c + |
185 dat$FR2.IMGT.c.g + | 163 dat$CDR2.IMGT.c.a + |
186 dat$FR2.IMGT.g.t + | 164 dat$CDR2.IMGT.a.t + |
187 dat$FR2.IMGT.t.g + | 165 dat$CDR2.IMGT.t.a + |
188 dat$CDR2.IMGT.a.c + | 166 dat$CDR2.IMGT.g.c + |
189 dat$CDR2.IMGT.c.a + | 167 dat$CDR2.IMGT.c.g + |
190 dat$CDR2.IMGT.a.t + | 168 dat$CDR2.IMGT.g.t + |
191 dat$CDR2.IMGT.t.a + | 169 dat$CDR2.IMGT.t.g + |
192 dat$CDR2.IMGT.g.c + | 170 dat$FR3.IMGT.a.c + |
193 dat$CDR2.IMGT.c.g + | 171 dat$FR3.IMGT.c.a + |
194 dat$CDR2.IMGT.g.t + | 172 dat$FR3.IMGT.a.t + |
195 dat$CDR2.IMGT.t.g + | 173 dat$FR3.IMGT.t.a + |
196 dat$FR3.IMGT.a.c + | 174 dat$FR3.IMGT.g.c + |
197 dat$FR3.IMGT.c.a + | 175 dat$FR3.IMGT.c.g + |
198 dat$FR3.IMGT.a.t + | 176 dat$FR3.IMGT.g.t + |
199 dat$FR3.IMGT.t.a + | 177 dat$FR3.IMGT.t.g |
200 dat$FR3.IMGT.g.c + | 178 |
201 dat$FR3.IMGT.c.g + | 179 |
202 dat$FR3.IMGT.g.t + | 180 dat$transitionMutationsAtGC = dat$FR1.IMGT.g.a + |
203 dat$FR3.IMGT.t.g) | |
204 | |
205 result[3,"x"] = transversionMutations | |
206 result[3,"y"] = VRegionMutations | |
207 result[3,"z"] = round(result[3,"x"] / result[3,"y"] * 100, digits=1) | |
208 | |
209 transitionMutationsAtGC = sum(dat$FR1.IMGT.g.a + | |
210 dat$FR1.IMGT.c.t + | 181 dat$FR1.IMGT.c.t + |
211 dat$CDR1.IMGT.g.a + | 182 dat$CDR1.IMGT.g.a + |
212 dat$CDR1.IMGT.c.t + | 183 dat$CDR1.IMGT.c.t + |
213 dat$FR2.IMGT.g.a + | 184 dat$FR2.IMGT.g.a + |
214 dat$FR2.IMGT.c.t + | 185 dat$FR2.IMGT.c.t + |
215 dat$CDR2.IMGT.g.a + | 186 dat$CDR2.IMGT.g.a + |
216 dat$CDR2.IMGT.c.t + | 187 dat$CDR2.IMGT.c.t + |
217 dat$FR3.IMGT.g.a + | 188 dat$FR3.IMGT.g.a + |
218 dat$FR3.IMGT.c.t) | 189 dat$FR3.IMGT.c.t |
219 | 190 |
220 totalMutationsAtGC = sum(dat$FR1.IMGT.g.a + | 191 dat$totalMutationsAtGC = dat$FR1.IMGT.g.a + |
221 dat$FR1.IMGT.c.t + | 192 dat$FR1.IMGT.c.t + |
222 dat$FR1.IMGT.c.a + | 193 dat$FR1.IMGT.c.a + |
223 dat$FR1.IMGT.g.c + | 194 dat$FR1.IMGT.g.c + |
224 dat$FR1.IMGT.c.g + | 195 dat$FR1.IMGT.c.g + |
225 dat$FR1.IMGT.g.t + | 196 dat$FR1.IMGT.g.t + |
244 dat$FR3.IMGT.g.a + | 215 dat$FR3.IMGT.g.a + |
245 dat$FR3.IMGT.c.t + | 216 dat$FR3.IMGT.c.t + |
246 dat$FR3.IMGT.c.a + | 217 dat$FR3.IMGT.c.a + |
247 dat$FR3.IMGT.g.c + | 218 dat$FR3.IMGT.g.c + |
248 dat$FR3.IMGT.c.g + | 219 dat$FR3.IMGT.c.g + |
249 dat$FR3.IMGT.g.t) | 220 dat$FR3.IMGT.g.t |
250 | 221 |
251 result[4,"x"] = transitionMutationsAtGC | 222 |
252 result[4,"y"] = totalMutationsAtGC | 223 |
253 result[4,"z"] = round(result[4,"x"] / result[4,"y"] * 100, digits=1) | 224 setwd(outputdir) |
254 | 225 |
255 result[5,"x"] = totalMutationsAtGC | 226 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=5) |
256 result[5,"y"] = VRegionMutations | 227 for(i in 1:length(genes)){ |
257 result[5,"z"] = round(result[5,"x"] / result[5,"y"] * 100, digits=1) | 228 gene = genes[i] |
258 | 229 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
259 | 230 if(gene == "."){ |
260 #transitiontable | 231 tmp = dat |
232 } | |
233 if(length(tmp) == 0){ | |
234 cat("0", file=paste(gene, "_value.txt" ,sep="")) | |
235 next | |
236 } | |
237 j = i - 1 | |
238 x = (j * 3) + 1 | |
239 y = (j * 3) + 2 | |
240 z = (j * 3) + 3 | |
241 matrx[1,x] = sum(tmp$VRegionMutations) | |
242 matrx[1,y] = sum(tmp$VRegionNucleotides) | |
243 matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1) | |
244 matrx[2,x] = sum(tmp$transitionMutations) | |
245 matrx[2,y] = sum(tmp$VRegionMutations) | |
246 matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1) | |
247 matrx[3,x] = sum(tmp$transversionMutations) | |
248 matrx[3,y] = sum(tmp$VRegionMutations) | |
249 matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1) | |
250 matrx[4,x] = sum(tmp$transitionMutationsAtGC) | |
251 matrx[4,y] = sum(tmp$totalMutationsAtGC) | |
252 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | |
253 matrx[5,x] = sum(tmp$totalMutationsAtGC) | |
254 matrx[5,y] = sum(tmp$VRegionMutations) | |
255 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | |
256 | |
257 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) | |
258 row.names(transitionTable) = c("A", "C", "G", "T") | |
259 transitionTable["A","A"] = NA | |
260 transitionTable["C","C"] = NA | |
261 transitionTable["G","G"] = NA | |
262 transitionTable["T","T"] = NA | |
263 nts = c("a", "c", "g", "t") | |
264 | |
265 | |
266 for(nt1 in nts){ | |
267 for(nt2 in nts){ | |
268 if(nt1 == nt2){ | |
269 next | |
270 } | |
271 NT1 = LETTERS[letters == nt1] | |
272 NT2 = LETTERS[letters == nt2] | |
273 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") | |
274 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") | |
275 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") | |
276 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") | |
277 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") | |
278 transitionTable[NT1,NT2] = sum( tmp[,FR1] + | |
279 tmp[,CDR1] + | |
280 tmp[,FR2] + | |
281 tmp[,CDR2] + | |
282 tmp[,FR3]) | |
283 } | |
284 } | |
285 write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | |
286 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | |
287 | |
288 cat(matrx[1,x], file=paste(gene, "_value.txt" ,sep="")) | |
289 cat(length(tmp$Sequence.ID), file=paste(gene, "_n.txt" ,sep="")) | |
290 } | |
291 | |
292 #again for all of the data | |
293 tmp = dat | |
294 j = i | |
295 x = (j * 3) + 1 | |
296 y = (j * 3) + 2 | |
297 z = (j * 3) + 3 | |
298 matrx[1,x] = sum(tmp$VRegionMutations) | |
299 matrx[1,y] = sum(tmp$VRegionNucleotides) | |
300 matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1) | |
301 matrx[2,x] = sum(tmp$transitionMutations) | |
302 matrx[2,y] = sum(tmp$VRegionMutations) | |
303 matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1) | |
304 matrx[3,x] = sum(tmp$transversionMutations) | |
305 matrx[3,y] = sum(tmp$VRegionMutations) | |
306 matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1) | |
307 matrx[4,x] = sum(tmp$transitionMutationsAtGC) | |
308 matrx[4,y] = sum(tmp$totalMutationsAtGC) | |
309 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | |
310 matrx[5,x] = sum(tmp$totalMutationsAtGC) | |
311 matrx[5,y] = sum(tmp$VRegionMutations) | |
312 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | |
261 | 313 |
262 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) | 314 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) |
263 row.names(transitionTable) = c("A", "C", "G", "T") | 315 row.names(transitionTable) = c("A", "C", "G", "T") |
264 transitionTable["A","A"] = NA | 316 transitionTable["A","A"] = NA |
265 transitionTable["C","C"] = NA | 317 transitionTable["C","C"] = NA |
267 transitionTable["T","T"] = NA | 319 transitionTable["T","T"] = NA |
268 nts = c("a", "c", "g", "t") | 320 nts = c("a", "c", "g", "t") |
269 | 321 |
270 | 322 |
271 for(nt1 in nts){ | 323 for(nt1 in nts){ |
272 for(nt2 in nts){ | 324 for(nt2 in nts){ |
273 if(nt1 == nt2){ | 325 if(nt1 == nt2){ |
274 next | 326 next |
275 } | 327 } |
276 NT1 = LETTERS[letters == nt1] | 328 NT1 = LETTERS[letters == nt1] |
277 NT2 = LETTERS[letters == nt2] | 329 NT2 = LETTERS[letters == nt2] |
278 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") | 330 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") |
279 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") | 331 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") |
280 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") | 332 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") |
281 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") | 333 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") |
282 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") | 334 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") |
283 transitionTable[NT1,NT2] = sum( dat[,FR1] + | 335 transitionTable[NT1,NT2] = sum( tmp[,FR1] + |
284 dat[,CDR1] + | 336 tmp[,CDR1] + |
285 dat[,FR2] + | 337 tmp[,FR2] + |
286 dat[,CDR2] + | 338 tmp[,CDR2] + |
287 dat[,FR3]) | 339 tmp[,FR3]) |
288 } | 340 } |
289 } | 341 } |
290 | 342 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) |
291 setwd(outputdir) | 343 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T) |
344 cat(matrx[1,x], file="total_value.txt") | |
345 cat(length(tmp$Sequence.ID), file="total_n.txt") | |
346 | |
347 | |
348 | |
349 result = data.frame(matrx) | |
350 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)") | |
351 | |
292 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 352 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
293 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) | 353 |
294 cat(length(dat$Sequence.ID), file="n.txt") | 354 |
295 | 355 if (!("ggplot2" %in% rownames(installed.packages()))) { |
296 | 356 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") |
297 weblogo = dat[,c("Sequence.ID", "VGene")] | 357 } |
298 weblogo$VGene = gsub("\\*.*", "", weblogo$VGene) | 358 library(ggplot2) |
299 | 359 |
300 rs12 = read.table("HS12RSS.txt", sep="\t", header=TRUE) | 360 genesForPlot = gsub("[0-9]", "", dat$best_match) |
301 rs23 = read.table("HS23RSS.txt", sep="\t", header=TRUE) | 361 genesForPlot = data.frame(table(genesForPlot)) |
302 | 362 colnames(genesForPlot) = c("Gene","Freq") |
303 result12 = merge(weblogo, rs12, by.x="VGene", by.y="Gene") | 363 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
304 result23 = merge(weblogo, rs23, by.x="VGene", by.y="Gene") | 364 |
305 | 365 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
306 write.table(x=result12$Sequence, file="weblogo_in_rs12.txt", sep=",",quote=F,row.names=F,col.names=F) | 366 pc = pc + geom_bar(width = 1, stat = "identity") |
307 write.table(x=result23$Sequence, file="weblogo_in_rs23.txt", sep=",",quote=F,row.names=F,col.names=F) | 367 pc = pc + coord_polar(theta="y") |
308 | 368 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) |
369 | |
370 png(filename="all.png") | |
371 pc | |
372 dev.off() | |
373 | |
374 | |
375 #blegh | |
376 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match | |
377 if(length(genesForPlot) > 0){ | |
378 genesForPlot = data.frame(table(genesForPlot)) | |
379 colnames(genesForPlot) = c("Gene","Freq") | |
380 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | |
381 | |
382 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | |
383 pc = pc + geom_bar(width = 1, stat = "identity") | |
384 pc = pc + coord_polar(theta="y") | |
385 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) | |
386 | |
387 | |
388 png(filename="ca.png") | |
389 print(pc) | |
390 dev.off() | |
391 } | |
392 | |
393 genesForPlot = dat[grepl("cg", dat$best_match),]$best_match | |
394 if(length(genesForPlot) > 0){ | |
395 genesForPlot = data.frame(table(genesForPlot)) | |
396 colnames(genesForPlot) = c("Gene","Freq") | |
397 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | |
398 | |
399 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | |
400 pc = pc + geom_bar(width = 1, stat = "identity") | |
401 pc = pc + coord_polar(theta="y") | |
402 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG", "( n =", sum(genesForPlot$Freq), ")")) | |
403 | |
404 | |
405 png(filename="cg.png") | |
406 print(pc) | |
407 dev.off() | |
408 } |