comparison RScript.r @ 24:5454af6fece1 draft

Uploaded
author davidvanzessen
date Fri, 23 Jan 2015 03:06:43 -0500
parents 5f0597a3fd8b
children ea5c2a2cc1f3
comparison
equal deleted inserted replaced
23:5f0597a3fd8b 24:5454af6fece1
582 mean(P3D.nt.nb, na.rm=T) + 582 mean(P3D.nt.nb, na.rm=T) +
583 mean(P5J.nt.nb, na.rm=T))), 583 mean(P5J.nt.nb, na.rm=T))),
584 by=c("Sample")]) 584 by=c("Sample")])
585 write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 585 write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
586 } 586 }
587
588 # ---------------------- AA composition in CDR3 ----------------------
589
590 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]
591
592 TotalPerSample = data.frame(data.table(AACDR3)[, list(total=sum(nchar(as.character(.SD$CDR3.Seq)))), by=Sample])
593
594 AAfreq = list()
595
596 for(i in 1:nrow(TotalPerSample)){
597 sample = TotalPerSample$Sample[i]
598 AAfreq[[i]] = data.frame(table(unlist(strsplit(as.character(AACDR3[AACDR3$Sample == sample,c("CDR3.Seq")]), ""))))
599 AAfreq[[i]]$Sample = sample
600 }
601
602 AAfreq = ldply(AAfreq, data.frame)
603 AAfreq = merge(AAfreq, TotalPerSample, by="Sample", all.x = T)
604 AAfreq$freq_perc = as.numeric(AAfreq$Freq / AAfreq$total * 100)
605
606
607 AAorder = read.table(sep="\t", header=TRUE, text="order.aa\tAA\n1\tR\n2\tK\n3\tN\n4\tD\n5\tQ\n6\tE\n7\tH\n8\tP\n9\tY\n10\tW\n11\tS\n12\tT\n13\tG\n14\tA\n15\tM\n16\tC\n17\tF\n18\tL\n19\tV\n20\tI")
608 AAfreq = merge(AAfreq, AAorder, by.x='Var1', by.y='AA', all.x=TRUE)
609
610 AAfreq = AAfreq[!is.na(AAfreq$order.aa),]
611
612 AAfreqplot = ggplot(AAfreq)
613 AAfreqplot = AAfreqplot + geom_bar(aes( x=factor(reorder(Var1, order.aa)), y = freq_perc, fill = Sample), stat='identity', position='dodge' )
614 AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
615 AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
616 AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
617 AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
618 AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage")
619
620 png("AAComposition.png",width = 1280, height = 720)
621 AAfreqplot
622 dev.off()
623 write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T)
624
625