Mercurial > repos > davidvanzessen > report_clonality_igg
diff RScript.r @ 6:18ac92a69ef1 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 18 Mar 2014 09:03:54 -0400 |
parents | 99834201251f |
children | e2972f0935e9 |
line wrap: on
line diff
--- a/RScript.r Mon Mar 10 11:06:17 2014 -0400 +++ b/RScript.r Tue Mar 18 09:03:54 2014 -0400 @@ -25,6 +25,11 @@ } library(data.table) +if (!("reshape2" %in% rownames(installed.packages()))) { + install.packages("reshape2", repos="http://cran.xl-mirror.nl/") +} +library(reshape2) + test = read.table(inFile, sep="\t", header=TRUE, fill=T, comment.char="") @@ -93,11 +98,12 @@ setwd(outDir) -write.table(PRODF, "allUnique.tsv", sep="\t",quote=F,row.names=F,col.names=T) +write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) pV = ggplot(PRODFV) pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") +write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) png("VPlot.png",width = 1280, height = 720) pV @@ -106,6 +112,7 @@ pD = ggplot(PRODFD) pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") +write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) png("DPlot.png",width = 800, height = 600) pD @@ -114,11 +121,51 @@ pJ = ggplot(PRODFJ) pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") +write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) png("JPlot.png",width = 800, height = 600) pJ dev.off(); +VGenes = PRODF[,c("Sample", "Top.V.Gene")] +VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene) +VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")]) +TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) +VGenes = merge(VGenes, TotalPerSample, by="Sample") +VGenes$Frequency = VGenes$Count * 100 / VGenes$total +VPlot = ggplot(VGenes) +VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +png("VFPlot.png") +VPlot +dev.off(); +write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) + +DGenes = PRODF[,c("Sample", "Top.D.Gene")] +DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) +DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) +TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample]) +DGenes = merge(DGenes, TotalPerSample, by="Sample") +DGenes$Frequency = DGenes$Count * 100 / DGenes$total +DPlot = ggplot(DGenes) +DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +png("DFPlot.png") +DPlot +dev.off(); +write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) + +JGenes = PRODF[,c("Sample", "Top.J.Gene")] +JGenes$Top.J.Gene = gsub("-.*", "", JGenes$Top.J.Gene) +JGenes = data.frame(data.table(JGenes)[, list(Count=.N), by=c("Sample", "Top.J.Gene")]) +TotalPerSample = data.frame(data.table(JGenes)[, list(total=sum(.SD$Count)), by=Sample]) +JGenes = merge(JGenes, TotalPerSample, by="Sample") +JGenes$Frequency = JGenes$Count * 100 / JGenes$total +JPlot = ggplot(JGenes) +JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +png("JFPlot.png") +JPlot +dev.off(); +write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) + revVchain = Vchain revDchain = Dchain revVchain$chr.orderV = rev(revVchain$chr.orderV) @@ -138,7 +185,9 @@ png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) print(img) + dev.off() + write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) } VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) @@ -174,6 +223,7 @@ png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) print(img) dev.off() + write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) } VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) @@ -206,6 +256,7 @@ png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) print(img) dev.off() + write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) } DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) @@ -236,17 +287,17 @@ clonalityFrame = PROD clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":")) clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$ReplicateConcat), ] - write.table(clonalityFrame, "clonalityComplete.tsv", sep="\t",quote=F,row.names=F,col.names=T) + write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) ClonalitySampleReplicatePrint <- function(dat){ - write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".tsv", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) } clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")]) #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint) ClonalitySamplePrint <- function(dat){ - write.table(dat, paste("clonality_", unique(dat$Sample) , ".tsv", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(dat, paste("clonality_", unique(dat$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) } clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"])