Mercurial > repos > davidvanzessen > report_clonality_igg
diff RScript.r @ 24:5454af6fece1 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 23 Jan 2015 03:06:43 -0500 |
parents | 5f0597a3fd8b |
children | ea5c2a2cc1f3 |
line wrap: on
line diff
--- a/RScript.r Fri Jan 16 07:37:41 2015 -0500 +++ b/RScript.r Fri Jan 23 03:06:43 2015 -0500 @@ -584,3 +584,42 @@ by=c("Sample")]) write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) } + +# ---------------------- AA composition in CDR3 ---------------------- + +AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] + +TotalPerSample = data.frame(data.table(AACDR3)[, list(total=sum(nchar(as.character(.SD$CDR3.Seq)))), by=Sample]) + +AAfreq = list() + +for(i in 1:nrow(TotalPerSample)){ + sample = TotalPerSample$Sample[i] + AAfreq[[i]] = data.frame(table(unlist(strsplit(as.character(AACDR3[AACDR3$Sample == sample,c("CDR3.Seq")]), "")))) + AAfreq[[i]]$Sample = sample +} + +AAfreq = ldply(AAfreq, data.frame) +AAfreq = merge(AAfreq, TotalPerSample, by="Sample", all.x = T) +AAfreq$freq_perc = as.numeric(AAfreq$Freq / AAfreq$total * 100) + + +AAorder = read.table(sep="\t", header=TRUE, text="order.aa\tAA\n1\tR\n2\tK\n3\tN\n4\tD\n5\tQ\n6\tE\n7\tH\n8\tP\n9\tY\n10\tW\n11\tS\n12\tT\n13\tG\n14\tA\n15\tM\n16\tC\n17\tF\n18\tL\n19\tV\n20\tI") +AAfreq = merge(AAfreq, AAorder, by.x='Var1', by.y='AA', all.x=TRUE) + +AAfreq = AAfreq[!is.na(AAfreq$order.aa),] + +AAfreqplot = ggplot(AAfreq) +AAfreqplot = AAfreqplot + geom_bar(aes( x=factor(reorder(Var1, order.aa)), y = freq_perc, fill = Sample), stat='identity', position='dodge' ) +AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) +AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) +AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) +AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) +AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + +png("AAComposition.png",width = 1280, height = 720) +AAfreqplot +dev.off() +write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) + +