comparison mutation_analysis.r @ 24:31eee1b3d7df draft

Uploaded
author davidvanzessen
date Tue, 07 Apr 2015 05:57:47 -0400
parents 28b8d980db22
children 58a62d2c0377
comparison
equal deleted inserted replaced
23:28b8d980db22 24:31eee1b3d7df
128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) 128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns)
129 129
130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") 130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="")
131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) 131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns)
132 132
133 silentMutations_columns = paste(regions, ".IMGT.Nb.of.silent.mutations", sep="") 133 FRRegions = regions[grepl("FR", regions)]
134 silentMutations_columns 134 CDRRegions = regions[grepl("CDR", regions)]
135 dat[,silentMutations_columns] 135
136 dat$silentMutations = apply(dat, FUN=sum_by_row, 1, columns=silentMutations_columns) 136 FR_silentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
137 137 dat$silentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_silentMutations_columns)
138 nonSilentMutations_columns = paste(regions, ".IMGT.Nb.of.nonsilent.mutations", sep="") 138
139 dat$nonSilentMutations = apply(dat, FUN=sum_by_row, 1, columns=nonSilentMutations_columns) 139 CDR_silentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
140 dat$silentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_silentMutations_columns)
141
142 FR_nonSilentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
143 dat$nonSilentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_nonSilentMutations_columns)
144
145 CDR_nonSilentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns)
140 147
141 148
142 setwd(outputdir) 149 setwd(outputdir)
143 150
144 nts = c("a", "t", "g", "c") 151 nts = c("a", "t", "g", "c")
145 zeros=rep(0, 4) 152 zeros=rep(0, 4)
146 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=6) 153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7)
147 for(i in 1:length(genes)){ 154 for(i in 1:length(genes)){
148 gene = genes[i] 155 gene = genes[i]
149 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] 156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
150 if(gene == "."){ 157 if(gene == "."){
151 tmp = dat 158 tmp = dat
167 matrx[4,y] = sum(tmp$totalMutationsAtGC) 174 matrx[4,y] = sum(tmp$totalMutationsAtGC)
168 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) 175 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
169 matrx[5,x] = sum(tmp$totalMutationsAtGC) 176 matrx[5,x] = sum(tmp$totalMutationsAtGC)
170 matrx[5,y] = sum(tmp$VRegionMutations) 177 matrx[5,y] = sum(tmp$VRegionMutations)
171 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) 178 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
172 matrx[6,x] = sum(tmp$silentMutations) 179 matrx[6,x] = sum(tmp$silentMutationsFR)
173 matrx[6,y] = sum(tmp$nonSilentMutations) 180 matrx[6,y] = sum(tmp$nonSilentMutationsFR)
174 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) 181 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
182 matrx[7,x] = sum(tmp$silentMutationsCDR)
183 matrx[7,y] = sum(tmp$nonSilentMutationsCDR)
184 matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1)
175 185
176 186
177 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) 187 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros)
178 row.names(transitionTable) = c("A", "C", "G", "T") 188 row.names(transitionTable) = c("A", "C", "G", "T")
179 transitionTable["A","A"] = NA 189 transitionTable["A","A"] = NA
230 matrx[4,y] = sum(tmp$totalMutationsAtGC) 240 matrx[4,y] = sum(tmp$totalMutationsAtGC)
231 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) 241 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
232 matrx[5,x] = sum(tmp$totalMutationsAtGC) 242 matrx[5,x] = sum(tmp$totalMutationsAtGC)
233 matrx[5,y] = sum(tmp$VRegionMutations) 243 matrx[5,y] = sum(tmp$VRegionMutations)
234 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) 244 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
235 matrx[6,x] = sum(tmp$silentMutations) 245 matrx[6,x] = sum(tmp$silentMutationsFR)
236 matrx[6,y] = sum(tmp$nonSilentMutations) 246 matrx[6,y] = sum(tmp$nonSilentMutationsFR)
237 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) 247 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
248 matrx[7,x] = sum(tmp$silentMutationsCDR)
249 matrx[7,y] = sum(tmp$nonSilentMutationsCDR)
250 matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1)
238 251
239 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) 252 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4)
240 row.names(transitionTable) = c("A", "C", "G", "T") 253 row.names(transitionTable) = c("A", "C", "G", "T")
241 transitionTable["A","A"] = NA 254 transitionTable["A","A"] = NA
242 transitionTable["C","C"] = NA 255 transitionTable["C","C"] = NA
269 cat(length(tmp$Sequence.ID), file="total_n.txt") 282 cat(length(tmp$Sequence.ID), file="total_n.txt")
270 283
271 284
272 285
273 result = data.frame(matrx) 286 result = data.frame(matrx)
274 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "Silent/Non Silent (%)") 287 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "FR S/R (%)", "CDR S/R (%)")
275 288
276 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) 289 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
277 290
278 291
279 if (!("ggplot2" %in% rownames(installed.packages()))) { 292 if (!("ggplot2" %in% rownames(installed.packages()))) {