diff mutation_analysis.r @ 26:2433a1e110e1 draft

Uploaded
author davidvanzessen
date Wed, 08 Apr 2015 05:25:52 -0400
parents 58a62d2c0377
children ac9a4307861a
line wrap: on
line diff
--- a/mutation_analysis.r	Tue Apr 07 07:32:43 2015 -0400
+++ b/mutation_analysis.r	Wed Apr 08 05:25:52 2015 -0400
@@ -148,7 +148,7 @@
 
 setwd(outputdir)
 
-nts = c("a", "t", "g", "c")
+nts = c("a", "c", "g", "t")
 zeros=rep(0, 4)
 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7)
 for(i in 1:length(genes)){
@@ -298,11 +298,13 @@
 genesForPlot = data.frame(table(genesForPlot))
 colnames(genesForPlot) = c("Gene","Freq")
 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
+write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
 
 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
 pc = pc + geom_bar(width = 1, stat = "identity")
 pc = pc + coord_polar(theta="y")
-pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")"))
+pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")"))
 
 png(filename="all.png")
 pc
@@ -319,8 +321,8 @@
 	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
 	pc = pc + geom_bar(width = 1, stat = "identity")
 	pc = pc + coord_polar(theta="y")
-	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")"))
-
+	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA subclasses", "( n =", sum(genesForPlot$Freq), ")"))
+	write.table(genesForPlot, "ca.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
 	png(filename="ca.png")
 	print(pc)
@@ -336,8 +338,8 @@
 	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
 	pc = pc + geom_bar(width = 1, stat = "identity")
 	pc = pc + coord_polar(theta="y")
-	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG", "( n =", sum(genesForPlot$Freq), ")"))
-
+	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG subclasses", "( n =", sum(genesForPlot$Freq), ")"))
+	write.table(genesForPlot, "cg.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
 	png(filename="cg.png")
 	print(pc)
@@ -346,9 +348,11 @@
 
 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2)
 
-p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales)
+p = ggplot(dat, aes(best_match, percentage_mutations))
 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter")
 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot")
+write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
 
 png(filename="scatter.png")
 print(p)