diff RScript.r @ 24:5454af6fece1 draft

Uploaded
author davidvanzessen
date Fri, 23 Jan 2015 03:06:43 -0500
parents 5f0597a3fd8b
children ea5c2a2cc1f3
line wrap: on
line diff
--- a/RScript.r	Fri Jan 16 07:37:41 2015 -0500
+++ b/RScript.r	Fri Jan 23 03:06:43 2015 -0500
@@ -584,3 +584,42 @@
                                           by=c("Sample")])
   write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
 }
+
+# ---------------------- AA composition in CDR3 ----------------------
+
+AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]
+
+TotalPerSample = data.frame(data.table(AACDR3)[, list(total=sum(nchar(as.character(.SD$CDR3.Seq)))), by=Sample])
+
+AAfreq = list()
+
+for(i in 1:nrow(TotalPerSample)){
+	sample = TotalPerSample$Sample[i]
+  AAfreq[[i]] = data.frame(table(unlist(strsplit(as.character(AACDR3[AACDR3$Sample == sample,c("CDR3.Seq")]), ""))))
+  AAfreq[[i]]$Sample = sample
+}
+
+AAfreq = ldply(AAfreq, data.frame)
+AAfreq = merge(AAfreq, TotalPerSample, by="Sample", all.x = T)
+AAfreq$freq_perc = as.numeric(AAfreq$Freq / AAfreq$total * 100)
+
+
+AAorder = read.table(sep="\t", header=TRUE, text="order.aa\tAA\n1\tR\n2\tK\n3\tN\n4\tD\n5\tQ\n6\tE\n7\tH\n8\tP\n9\tY\n10\tW\n11\tS\n12\tT\n13\tG\n14\tA\n15\tM\n16\tC\n17\tF\n18\tL\n19\tV\n20\tI")
+AAfreq = merge(AAfreq, AAorder, by.x='Var1', by.y='AA', all.x=TRUE)
+
+AAfreq = AAfreq[!is.na(AAfreq$order.aa),]
+
+AAfreqplot = ggplot(AAfreq)
+AAfreqplot = AAfreqplot + geom_bar(aes( x=factor(reorder(Var1, order.aa)), y = freq_perc, fill = Sample), stat='identity', position='dodge' )
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
+AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage")
+
+png("AAComposition.png",width = 1280, height = 720)
+AAfreqplot
+dev.off()
+write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T)
+
+