Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 24:31eee1b3d7df draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 07 Apr 2015 05:57:47 -0400 |
parents | 28b8d980db22 |
children | 58a62d2c0377 |
comparison
equal
deleted
inserted
replaced
23:28b8d980db22 | 24:31eee1b3d7df |
---|---|
128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) | 128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) |
129 | 129 |
130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") | 130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") |
131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) | 131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) |
132 | 132 |
133 silentMutations_columns = paste(regions, ".IMGT.Nb.of.silent.mutations", sep="") | 133 FRRegions = regions[grepl("FR", regions)] |
134 silentMutations_columns | 134 CDRRegions = regions[grepl("CDR", regions)] |
135 dat[,silentMutations_columns] | 135 |
136 dat$silentMutations = apply(dat, FUN=sum_by_row, 1, columns=silentMutations_columns) | 136 FR_silentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.silent.mutations", sep="") |
137 | 137 dat$silentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_silentMutations_columns) |
138 nonSilentMutations_columns = paste(regions, ".IMGT.Nb.of.nonsilent.mutations", sep="") | 138 |
139 dat$nonSilentMutations = apply(dat, FUN=sum_by_row, 1, columns=nonSilentMutations_columns) | 139 CDR_silentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.silent.mutations", sep="") |
140 dat$silentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_silentMutations_columns) | |
141 | |
142 FR_nonSilentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="") | |
143 dat$nonSilentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_nonSilentMutations_columns) | |
144 | |
145 CDR_nonSilentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="") | |
146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) | |
140 | 147 |
141 | 148 |
142 setwd(outputdir) | 149 setwd(outputdir) |
143 | 150 |
144 nts = c("a", "t", "g", "c") | 151 nts = c("a", "t", "g", "c") |
145 zeros=rep(0, 4) | 152 zeros=rep(0, 4) |
146 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=6) | 153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7) |
147 for(i in 1:length(genes)){ | 154 for(i in 1:length(genes)){ |
148 gene = genes[i] | 155 gene = genes[i] |
149 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] | 156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
150 if(gene == "."){ | 157 if(gene == "."){ |
151 tmp = dat | 158 tmp = dat |
167 matrx[4,y] = sum(tmp$totalMutationsAtGC) | 174 matrx[4,y] = sum(tmp$totalMutationsAtGC) |
168 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | 175 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) |
169 matrx[5,x] = sum(tmp$totalMutationsAtGC) | 176 matrx[5,x] = sum(tmp$totalMutationsAtGC) |
170 matrx[5,y] = sum(tmp$VRegionMutations) | 177 matrx[5,y] = sum(tmp$VRegionMutations) |
171 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | 178 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) |
172 matrx[6,x] = sum(tmp$silentMutations) | 179 matrx[6,x] = sum(tmp$silentMutationsFR) |
173 matrx[6,y] = sum(tmp$nonSilentMutations) | 180 matrx[6,y] = sum(tmp$nonSilentMutationsFR) |
174 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) | 181 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) |
182 matrx[7,x] = sum(tmp$silentMutationsCDR) | |
183 matrx[7,y] = sum(tmp$nonSilentMutationsCDR) | |
184 matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1) | |
175 | 185 |
176 | 186 |
177 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) | 187 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) |
178 row.names(transitionTable) = c("A", "C", "G", "T") | 188 row.names(transitionTable) = c("A", "C", "G", "T") |
179 transitionTable["A","A"] = NA | 189 transitionTable["A","A"] = NA |
230 matrx[4,y] = sum(tmp$totalMutationsAtGC) | 240 matrx[4,y] = sum(tmp$totalMutationsAtGC) |
231 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | 241 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) |
232 matrx[5,x] = sum(tmp$totalMutationsAtGC) | 242 matrx[5,x] = sum(tmp$totalMutationsAtGC) |
233 matrx[5,y] = sum(tmp$VRegionMutations) | 243 matrx[5,y] = sum(tmp$VRegionMutations) |
234 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | 244 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) |
235 matrx[6,x] = sum(tmp$silentMutations) | 245 matrx[6,x] = sum(tmp$silentMutationsFR) |
236 matrx[6,y] = sum(tmp$nonSilentMutations) | 246 matrx[6,y] = sum(tmp$nonSilentMutationsFR) |
237 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) | 247 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) |
248 matrx[7,x] = sum(tmp$silentMutationsCDR) | |
249 matrx[7,y] = sum(tmp$nonSilentMutationsCDR) | |
250 matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1) | |
238 | 251 |
239 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) | 252 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) |
240 row.names(transitionTable) = c("A", "C", "G", "T") | 253 row.names(transitionTable) = c("A", "C", "G", "T") |
241 transitionTable["A","A"] = NA | 254 transitionTable["A","A"] = NA |
242 transitionTable["C","C"] = NA | 255 transitionTable["C","C"] = NA |
269 cat(length(tmp$Sequence.ID), file="total_n.txt") | 282 cat(length(tmp$Sequence.ID), file="total_n.txt") |
270 | 283 |
271 | 284 |
272 | 285 |
273 result = data.frame(matrx) | 286 result = data.frame(matrx) |
274 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "Silent/Non Silent (%)") | 287 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "FR S/R (%)", "CDR S/R (%)") |
275 | 288 |
276 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 289 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
277 | 290 |
278 | 291 |
279 if (!("ggplot2" %in% rownames(installed.packages()))) { | 292 if (!("ggplot2" %in% rownames(installed.packages()))) { |