changeset 8:e2972f0935e9 draft

Uploaded
author davidvanzessen
date Wed, 26 Mar 2014 11:55:07 -0400
parents 1d0ed31089c6
children 712f3e9924d5
files RScript.r
diffstat 1 files changed, 23 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/RScript.r	Tue Mar 25 07:08:12 2014 -0400
+++ b/RScript.r	Wed Mar 26 11:55:07 2014 -0400
@@ -134,7 +134,9 @@
 VGenes = merge(VGenes, TotalPerSample, by="Sample")
 VGenes$Frequency = VGenes$Count * 100 / VGenes$total
 VPlot = ggplot(VGenes)
-VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
+VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+				ggtitle("Distribution of V gene families") + 
+				ylab("Percentage of sequences")
 png("VFPlot.png")
 VPlot
 dev.off();
@@ -147,7 +149,9 @@
 DGenes = merge(DGenes, TotalPerSample, by="Sample")
 DGenes$Frequency = DGenes$Count * 100 / DGenes$total
 DPlot = ggplot(DGenes)
-DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
+DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+				ggtitle("Distribution of D gene families") + 
+				ylab("Percentage of sequences")
 png("DFPlot.png")
 DPlot
 dev.off();
@@ -160,12 +164,28 @@
 JGenes = merge(JGenes, TotalPerSample, by="Sample")
 JGenes$Frequency = JGenes$Count * 100 / JGenes$total
 JPlot = ggplot(JGenes)
-JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
+JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+				ggtitle("Distribution of J gene families") + 
+				ylab("Percentage of sequences")
 png("JFPlot.png")
 JPlot
 dev.off();
 write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
 
+CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")])
+TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample])
+CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample")
+CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total
+CDR3LengthPlot = ggplot(CDR3Length)
+CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+				ggtitle("Length distribution of CDR3") + 
+				xlab("CDR3 Length") + 
+				ylab("Percentage of sequences")
+png("CDR3LengthPlot.png",width = 1280, height = 720)
+CDR3LengthPlot
+dev.off()
+write.table(x=CDR3Length, file="CDR3LengthPlot.csv", sep=",",quote=F,row.names=F,col.names=T)
+
 revVchain = Vchain
 revDchain = Dchain
 revVchain$chr.orderV = rev(revVchain$chr.orderV)