comparison RScript.r @ 2:aac19ac7cb21 draft

Uploaded
author davidvanzessen
date Fri, 11 Oct 2013 09:11:03 -0400
parents c43c63f4ef80
children d27d745d0556
comparison
equal deleted inserted replaced
1:c43c63f4ef80 2:aac19ac7cb21
7 outDir = args[3] 7 outDir = args[3]
8 8
9 if (!require("gridExtra")) { 9 if (!require("gridExtra")) {
10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") 10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/")
11 } 11 }
12 library (gridExtra) 12 library(gridExtra)
13 if (!require("ggplot2")) { 13 if (!require("ggplot2")) {
14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") 14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/")
15 } 15 }
16 require(ggplot2) 16 require(ggplot2)
17 if (!require("plyr")) { 17 if (!require("plyr")) {
82 82
83 setwd(outDir) 83 setwd(outDir)
84 84
85 pV = ggplot(PRODFV) 85 pV = ggplot(PRODFV)
86 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 86 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
87 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage")
87 88
88 png("VPlot.png",width = 1280, height = 720) 89 png("VPlot.png",width = 1280, height = 720)
89 pV 90 pV
90 dev.off(); 91 dev.off();
91 92
92 pD = ggplot(PRODFD) 93 pD = ggplot(PRODFD)
93 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 94 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
95 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage")
94 96
95 png("DPlot.png",width = 800, height = 600) 97 png("DPlot.png",width = 800, height = 600)
96 pD 98 pD
97 dev.off(); 99 dev.off();
98 100
99 pJ = ggplot(PRODFJ) 101 pJ = ggplot(PRODFJ)
100 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 102 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
103 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage")
101 104
102 png("JPlot.png",width = 800, height = 600) 105 png("JPlot.png",width = 800, height = 600)
103 pJ 106 pJ
104 dev.off(); 107 dev.off();
105 108
106 109
107 plotVD <- function(dat){ 110 plotVD <- function(dat){
108 ggplot() + 111 ggplot() +
109 geom_tile(data=dat, aes(x=factor(Top.V.Gene), y=factor(Top.D.Gene), fill=log)) + 112 geom_tile(data=dat, aes(x=factor(Top.D.Gene), y=factor(Top.V.Gene), fill=log)) +
110 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 113 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
111 scale_fill_gradient(low="white", high="red") + 114 scale_fill_gradient(low="gold", high="blue") +
112 ggtitle(unique(dat$Sample)) 115 ggtitle(unique(dat$Sample)) +
116 xlab("D genes") +
117 ylab("V Genes")
113 } 118 }
114 119
115 120
116 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) 121 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3))
117 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) 122 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample))
119 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) 124 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE)
120 completeVD$Length = as.numeric(completeVD$Length) 125 completeVD$Length = as.numeric(completeVD$Length)
121 completeVD$log = log(completeVD$Length) 126 completeVD$log = log(completeVD$Length)
122 completeVD$log[is.na(completeVD$log)] = 0 127 completeVD$log[is.na(completeVD$log)] = 0
123 l = split(completeVD, f=completeVD[,"Sample"]) 128 l = split(completeVD, f=completeVD[,"Sample"])
124 png("HeatmapVD%d.png", width=100+(15*length(Vchain$v.name)), height=100+(15*length(Dchain$v.name))) 129 png("HeatmapVD%d.png", width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name)))
125 lapply(l, FUN=plotVD) 130 lapply(l, FUN=plotVD)
126 dev.off() 131 dev.off()
127 132
128 133
129 plotVJ <- function(dat){ 134 plotVJ <- function(dat){
130 ggplot() + 135 ggplot() +
131 geom_tile(data=dat, aes(x=factor(Top.V.Gene), y=factor(Top.J.Gene), fill=log)) + 136 geom_tile(data=dat, aes(x=factor(Top.J.Gene), y=factor(Top.V.Gene), fill=log)) +
132 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 137 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
133 scale_fill_gradient(low="white", high="red") + 138 scale_fill_gradient(low="gold", high="blue") +
134 ggtitle(unique(dat$Sample)) 139 ggtitle(unique(dat$Sample)) +
140 xlab("J genes") +
141 ylab("V Genes")
135 } 142 }
136 143
137 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) 144 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3))
138 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) 145 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample))
139 146
140 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) 147 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE)
141 completeVJ$Length = as.numeric(completeVJ$Length) 148 completeVJ$Length = as.numeric(completeVJ$Length)
142 completeVJ$log = log(completeVJ$Length) 149 completeVJ$log = log(completeVJ$Length)
143 completeVJ$log[is.na(completeVJ$log)] = 0 150 completeVJ$log[is.na(completeVJ$log)] = 0
144 l = split(completeVJ, f=completeVJ[,"Sample"]) 151 l = split(completeVJ, f=completeVJ[,"Sample"])
145 png("HeatmapVJ%d.png", width=100+(15*length(Vchain$v.name)), height=100+(15*length(Jchain$v.name))) 152 png("HeatmapVJ%d.png", width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name)))
146 lapply(l, FUN=plotVJ) 153 lapply(l, FUN=plotVJ)
147 dev.off() 154 dev.off()
148 155
149 plotDJ <- function(dat){ 156 plotDJ <- function(dat){
150 ggplot() + 157 ggplot() +
151 geom_tile(data=dat, aes(x=factor(Top.D.Gene), y=factor(Top.J.Gene), fill=log)) + 158 geom_tile(data=dat, aes(x=factor(Top.J.Gene), y=factor(Top.D.Gene), fill=log)) +
152 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 159 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
153 scale_fill_gradient(low="white", high="red") + 160 scale_fill_gradient(low="gold", high="blue") +
154 ggtitle(unique(dat$Sample)) 161 ggtitle(unique(dat$Sample)) +
162 xlab("J genes") +
163 ylab("D Genes")
155 } 164 }
156 165
157 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) 166 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3))
158 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) 167 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample))
159 168
160 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) 169 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE)
161 completeDJ$Length = as.numeric(completeDJ$Length) 170 completeDJ$Length = as.numeric(completeDJ$Length)
162 completeDJ$log = log(completeDJ$Length) 171 completeDJ$log = log(completeDJ$Length)
163 completeDJ$log[is.na(completeDJ$log)] = 0 172 completeDJ$log[is.na(completeDJ$log)] = 0
164 l = split(completeDJ, f=completeDJ[,"Sample"]) 173 l = split(completeDJ, f=completeDJ[,"Sample"])
165 png("HeatmapDJ%d.png", width=100+(15*length(Dchain$v.name)), height=100+(15*length(Jchain$v.name))) 174 png("HeatmapDJ%d.png", width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name)))
166 lapply(l, FUN=plotDJ) 175 lapply(l, FUN=plotDJ)
167 dev.off() 176 dev.off()
168 177
169 178
170 sampleFile <- file("samples.txt") 179 sampleFile <- file("samples.txt")