6
|
1 args <- commandArgs(TRUE)
|
|
2 input <- args[2]
|
|
3 pngFile <- args[3]
|
3
|
4 dataTable <-read.table(file=input, header=TRUE);
|
|
5 chip.data<-data.frame(dataTable)
|
|
6 ifReg <- 0
|
|
7 if (length(unique(chip.data$Reg))>1) {
|
|
8 ifReg <- 1
|
|
9 }
|
|
10
|
|
11 ifPDF <- 0
|
|
12 bootstrap <- 1
|
|
13
|
6
|
14 if (length(args)>=6) {
|
|
15 ifPDF=args[6]
|
|
16 bootstrap=args[7]
|
3
|
17 }
|
6
|
18 if (length(args)==5 & args[5]==1) {
|
3
|
19 ifPDF=1
|
|
20 }
|
|
21
|
|
22 ifControl <- 0
|
6
|
23 if (length(args)>=5 & args[5]!=1 & args[5]!=0) {
|
|
24 dataTable <-read.table(file=args[5], header=TRUE);
|
3
|
25 control.data<-data.frame(dataTable)
|
|
26 ifControl <- 1
|
|
27
|
|
28
|
|
29 }
|
|
30
|
|
31 error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
|
|
32 if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
|
|
33 stop("vectors must be same length")
|
|
34 arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
|
|
35 }
|
|
36
|
|
37
|
|
38
|
6
|
39 logFile <- args[4]
|
3
|
40 sink(logFile, append=FALSE, split=FALSE)
|
|
41
|
|
42 if (ifReg & ifControl) {
|
|
43
|
|
44 if (ifPDF==1) {
|
|
45 pdf(file = pngFile, width = 14, height = 13, pointsize = 20, bg = "white")
|
|
46 } else {
|
|
47 png(filename = pngFile, width = 1140, height = 840, units = "px", pointsize = 20, bg = "white", res = NA)
|
|
48 plot(1:10)
|
|
49 }
|
|
50 op <- par(mfrow = c(3,1))
|
|
51 } else {
|
|
52 if (ifReg | ifControl) {
|
|
53
|
|
54 if (ifPDF==1) {
|
|
55 pdf(file = pngFile, width = 20, height = 17, pointsize = 20, bg = "white")
|
|
56 } else {
|
|
57 png(filename = pngFile, width = 1580, height = 1230, units = "px", pointsize = 20, bg = "white", res = NA)
|
|
58 plot(1:10)
|
|
59 }
|
|
60 op <- par(mfrow = c(2,1))
|
|
61 } else {
|
|
62 if (ifPDF==1) {
|
|
63 pdf(file = pngFile, width = 22, height = 8, pointsize = 20, bg = "white")
|
|
64 } else {
|
|
65 png(filename = pngFile, width = 1580, height = 530, units = "px", pointsize = 20, bg = "white", res = NA)
|
|
66 plot(1:10)
|
|
67 }
|
|
68 op <- par(mfrow = c(1,1))
|
|
69 }
|
|
70 }
|
|
71 myColor <- 1
|
|
72 myColor[1] <- colors()[131]
|
|
73 myColor[2] <- colors()[59]
|
|
74 myColor[3] <- colors()[76]
|
|
75 myColor[4] <- colors()[88]
|
|
76 myColor[5] <- colors()[17]
|
|
77 myColor[6] <- colors()[565]
|
|
78 myColor[7] <- colors()[454]
|
|
79 myColor[8] <- colors()[401]
|
|
80 myColor[9] <- colors()[99]
|
|
81 myColorControl <- 1
|
|
82 myColorControl[1] <- colors()[24]
|
|
83 myColorControl[2] <- colors()[278]
|
|
84 myColorControl[3] <- colors()[305]
|
|
85 myColorControl[4] <- colors()[219]
|
|
86 myColorControl[5] <- colors()[343]
|
|
87 myColorControl[6] <- colors()[245]
|
|
88 myLevels <- 0
|
|
89 nn <- colnames(chip.data)
|
|
90 ifk27 <- 0
|
|
91 if (length(colnames(chip.data))==41) {
|
|
92 cc <- c(1:41)
|
|
93 colnames(chip.data) <- cc
|
|
94 }
|
|
95
|
|
96 if (length(colnames(chip.data))==45) {
|
|
97 cc <- c(1:45)
|
|
98 colnames(chip.data) <- cc
|
|
99 ifk27 <- 1
|
|
100 }
|
|
101
|
|
102 if (!ifk27) {
|
|
103
|
|
104 countTotal <- length(unique(chip.data$"1"))
|
|
105 tt <- which(chip.data$"16">0)
|
|
106 countTotalEnh <- length(unique(chip.data$"1"[tt]))
|
|
107 tt <- which(chip.data$"10">0)
|
|
108 countTotalProm <- length(unique(chip.data$"1"[tt]))
|
|
109 tt <- which(chip.data$"12">0)
|
|
110 countTotalImDown <- length(unique(chip.data$"1"[tt]))
|
|
111 tt <- which(chip.data$"18">0)
|
|
112 countTotalIntra <- length(unique(chip.data$"1"[tt]))
|
|
113 tt <- which(chip.data$"20">0)
|
|
114 countTotal5kbD <- length(unique(chip.data$"1"[tt]))
|
|
115 tt <- which(chip.data$"28">0)
|
|
116 countTotal1IntronI <- length(unique(chip.data$"1"[tt]))
|
|
117 tt <- which(chip.data$"32">0)
|
|
118 countTotalExonsI <- length(unique(chip.data$"1"[tt]))
|
|
119 tt <- which(chip.data$"36">0)
|
|
120 countTotalIntronsI <- length(unique(chip.data$"1"[tt]))
|
|
121 tt <- which(chip.data$"40">0)
|
|
122 countTotalJonctionsI <- length(unique(chip.data$"1"[tt]))
|
|
123
|
|
124 names <- c("Enh.","Prom.","Imm.Down.","Intrag.","GeneDown.","F.Intron","Exons","2,3,etc.Introns","E.I.Junctions")
|
|
125 yChIPTotal <- c (countTotalEnh/countTotal,countTotalProm/countTotal, countTotalImDown/countTotal,countTotalIntra/countTotal,countTotal5kbD/countTotal,countTotal1IntronI/countTotal,countTotalExonsI/countTotal,countTotalIntronsI/countTotal,countTotalJonctionsI/countTotal)
|
|
126
|
|
127 } else {
|
|
128 countTotal <- length(unique(chip.data$"1"))
|
|
129 tt <- which(chip.data$"42">0)
|
|
130 countTSS <- length(unique(chip.data$"1"[tt]))
|
|
131 tt <- which(chip.data$"44">0)
|
|
132 countGene <- length(unique(chip.data$"1"[tt]))
|
|
133 names <- c("gene TSS","gene Body")
|
|
134 yChIPTotal <- c (countTSS/countTotal,countGene/countTotal)
|
|
135 }
|
|
136
|
|
137 if(!ifReg&!ifControl) {
|
|
138 par(mar=c(5.1, 7.1, 4.1, 2.1))
|
|
139 barplot(yChIPTotal,xlab="",beside=TRUE, col=c(myColor), names.arg=c(names),ylab="Proportion of genes with a peak")
|
|
140 cat ("Proportion of genes with a peak in a given genomic region:\n")
|
|
141 cat (paste(c(names),sep='\t'))
|
|
142 cat("\n")
|
|
143 cat (paste(c(yChIPTotal),sep='\t'))
|
|
144 cat("\n\n")
|
|
145 }
|
|
146 if (ifControl) {
|
|
147 if (bootstrap>1) {
|
|
148 yControlTotalMatrix <- NULL
|
|
149 }
|
|
150 for (fileNumber in 1:bootstrap) {
|
|
151
|
|
152 if (fileNumber>=2) {
|
6
|
153 dataTable <-read.table(file=paste(args[5],fileNumber,sep=""), header=TRUE);
|
3
|
154 control.data<-data.frame(dataTable)
|
|
155 }
|
|
156
|
|
157 colnames(control.data) <- cc
|
|
158
|
|
159 if (!ifk27) {
|
|
160
|
|
161 countTotalCntr <- length(unique(control.data$"1"))
|
|
162 tt <- which(control.data$"16">0)
|
|
163 countTotalEnhCntr <- length(unique(control.data$"1"[tt]))
|
|
164 tt <- which(control.data$"10">0)
|
|
165 countTotalPromCntr <- length(unique(control.data$"1"[tt]))
|
|
166 tt <- which(control.data$"12">0)
|
|
167 countTotalImDownCntr <- length(unique(control.data$"1"[tt]))
|
|
168 tt <- which(control.data$"18">0)
|
|
169 countTotalIntraCntr <- length(unique(control.data$"1"[tt]))
|
|
170 tt <- which(control.data$"20">0)
|
|
171 countTotal5kbDCntr <- length(unique(control.data$"1"[tt]))
|
|
172 tt <- which(control.data$"28">0)
|
|
173 countTotal1IntronICntr <- length(unique(control.data$"1"[tt]))
|
|
174 tt <- which(control.data$"32">0)
|
|
175 countTotalExonsICntr <- length(unique(control.data$"1"[tt]))
|
|
176 tt <- which(control.data$"36">0)
|
|
177 countTotalIntronsICntr <- length(unique(control.data$"1"[tt]))
|
|
178 tt <- which(control.data$"40">0)
|
|
179 countTotalJonctionsICntr <- length(unique(control.data$"1"[tt]))
|
|
180 yControlTotal <- c (countTotalEnhCntr/countTotalCntr,countTotalPromCntr/countTotalCntr, countTotalImDownCntr/countTotalCntr,countTotalIntraCntr/countTotalCntr,countTotal5kbDCntr/countTotalCntr,countTotal1IntronICntr/countTotalCntr,countTotalExonsICntr/countTotalCntr,countTotalIntronsICntr/countTotalCntr,countTotalJonctionsICntr/countTotalCntr)
|
|
181
|
|
182 } else {
|
|
183 countTotalCntr <- length(unique(control.data$"1"))
|
|
184 tt <- which(control.data$"42">0)
|
|
185 countTSSCntr <- length(unique(control.data$"1"[tt]))
|
|
186 tt <- which(control.data$"44">0)
|
|
187 countGeneCntr <- length(unique(control.data$"1"[tt]))
|
|
188 yControlTotal <- c (countTSSCntr/countTotalCntr,countGeneCntr/countTotalCntr)
|
|
189
|
|
190 }
|
|
191 if (bootstrap>1) {
|
|
192 yControlTotalMatrix <- rbind(yControlTotalMatrix,yControlTotal)
|
|
193 }
|
|
194 }
|
|
195 if (bootstrap>1) {
|
|
196 yControlTotal <- colMeans(yControlTotalMatrix)
|
|
197 Nrows <- nrow(yControlTotalMatrix)
|
|
198 colVars <- Nrows/(Nrows-1) * (colMeans(yControlTotalMatrix*yControlTotalMatrix)-colMeans(yControlTotalMatrix)^2)
|
|
199 }
|
|
200 if (!ifReg) {
|
|
201 cum = matrix( 0, nrow=2, ncol=length(names), byrow = TRUE)
|
|
202 for (i in c(1:length(names))) {
|
|
203 cum[1,i] <- yChIPTotal[i]
|
|
204 cum[2,i] <- yControlTotal[i]
|
|
205 }
|
|
206 if (bootstrap>1) {
|
|
207 wiskers <- matrix(c(colVars-colVars,sqrt(colVars)),2,length(names),byrow=TRUE)*1.96
|
|
208 }
|
|
209 par(mar=c(5.1, 7.1, 4.1, 2.1))
|
|
210 barx <- barplot(cum,xlab="",beside=TRUE, col=c(myColor[6],myColor[5]), legend = c("ChIP","Control"), names.arg=c(names),ylab="Proportion of genes with a peak")
|
|
211 cat ("Proportion of genes with a peak from the ChIP dataset in a given genomic region:\n")
|
|
212 cat (paste(c(names),sep='\t'))
|
|
213 cat("\n")
|
|
214 cat (paste(c(yChIPTotal),sep='\t'))
|
|
215 cat("\n\n")
|
|
216 cat ("Proportion of genes with a peak from the Control dataset in a given genomic region:\n")
|
|
217 cat (paste(c(names),sep='\t'))
|
|
218 cat("\n")
|
|
219 cat (paste(c(yControlTotal),sep='\t'))
|
|
220 cat("\n\n")
|
|
221 if (bootstrap>1) {
|
|
222 error.bar(barx,cum,wiskers)
|
|
223 cat ("Standard deviation for the Control dataset in a given genomic region:\n")
|
|
224 cat (paste(c(names),sep='\t'))
|
|
225 cat("\n")
|
|
226 cat (paste(c(sqrt(colVars)),sep='\t'))
|
|
227 cat("\n\n")
|
|
228 }
|
|
229 enrich <- NULL
|
|
230 for (i in c(1:length(names))) {
|
|
231 enrich[i] <- yChIPTotal[i]/yControlTotal[i];
|
|
232 }
|
|
233 barplot(enrich-1,xlab="",beside=TRUE, col=c(myColor), names.arg=c(names),ylab="Enrichment in comparison\nwith the control dataset", yaxt="n")
|
|
234 minX <- min(enrich-1)
|
|
235 maxX <- max(enrich-1)
|
|
236 x = seq(length=11, from=round(minX*10)/10, by=round((maxX-minX)*10)/100)
|
|
237 axis(2, at=x,labels=x+1, las=2)
|
|
238
|
|
239 cat ("Enrichment of genomic regions, ChIP peaks vs Control Peaks:\n")
|
|
240 cat (paste(c(names),sep='\t'))
|
|
241 cat("\n")
|
|
242 cat (paste(c(yChIPTotal/yControlTotal),sep='\t'))
|
|
243 cat("\n\n")
|
|
244 if (bootstrap>1) {
|
|
245 z <- (yChIPTotal-yControlTotal)/sqrt(colVars)
|
|
246 pvalues <- 2*pnorm(-abs(z))
|
|
247 cat ("Two-side P-values for each genomic category:\n")
|
|
248 cat (paste(c(names),sep='\t'))
|
|
249 cat("\n")
|
|
250 cat (paste(c(yChIPTotal/yControlTotal),sep='\t'))
|
|
251 cat("\n\n")
|
|
252 }
|
|
253 }
|
|
254 }
|
|
255 if (ifReg) {
|
|
256 n.types <- length(levels(chip.data$"6"))
|
|
257 myLevels <- levels(chip.data$"6")
|
|
258 nlev <- length(names)
|
|
259
|
|
260 if (ifControl) {
|
|
261 cum = matrix( 0, nrow=length(myLevels)+1, ncol=nlev, byrow = TRUE)
|
|
262 cumEnrichTotal = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE)
|
|
263 cumEnrichControl = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE)
|
|
264 }else {
|
|
265 cum = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE)
|
|
266 cumEnrichTotal = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE)
|
|
267 }
|
|
268 colReg <-NULL
|
|
269 for (r in c(1:length(myLevels))) {
|
|
270 tt <- which(chip.data$"6"==myLevels[r])
|
|
271 subset.data <- (chip.data[tt,])
|
|
272 countTotalSubset <- length(unique(subset.data$"1"))
|
|
273
|
|
274 if (!ifk27) {
|
|
275
|
|
276 tt <- which(subset.data$"16">0)
|
|
277 countTotalEnhSubset <- length(unique(subset.data$"1"[tt]))
|
|
278 tt <- which(subset.data$"10">0)
|
|
279 countTotalPromSubset <- length(unique(subset.data$"1"[tt]))
|
|
280 tt <- which(subset.data$"12">0)
|
|
281 countTotalImDownSubset <- length(unique(subset.data$"1"[tt]))
|
|
282 tt <- which(subset.data$"18">0)
|
|
283 countTotalIntraSubset <- length(unique(subset.data$"1"[tt]))
|
|
284 tt <- which(subset.data$"20">0)
|
|
285 countTotal5kbDSubset <- length(unique(subset.data$"1"[tt]))
|
|
286 tt <- which(subset.data$"28">0)
|
|
287 countTotal1IntronISubset <- length(unique(subset.data$"1"[tt]))
|
|
288 tt <- which(subset.data$"32">0)
|
|
289 countTotalExonsISubset <- length(unique(subset.data$"1"[tt]))
|
|
290 tt <- which(subset.data$"36">0)
|
|
291 countTotalIntronsISubset <- length(unique(subset.data$"1"[tt]))
|
|
292 tt <- which(subset.data$"40">0)
|
|
293 countTotalJonctionsISubset <- length(unique(subset.data$"1"[tt]))
|
|
294 ySubsetTotal <- c (countTotalEnhSubset/countTotalSubset,countTotalPromSubset/countTotalSubset, countTotalImDownSubset/countTotalSubset,countTotalIntraSubset/countTotalSubset,countTotal5kbDSubset/countTotalSubset,countTotal1IntronISubset/countTotalSubset,countTotalExonsISubset/countTotalSubset,countTotalIntronsISubset/countTotalSubset,countTotalJonctionsISubset/countTotalSubset)
|
|
295 } else {
|
|
296 tt <- which(subset.data$"42">0)
|
|
297 countTotalTSSSubset <- length(unique(subset.data$"1"[tt]))
|
|
298 tt <- which(subset.data$"44">0)
|
|
299 countTotalGeneSubset <- length(unique(subset.data$"1"[tt]))
|
|
300 ySubsetTotal <- c (countTotalTSSSubset/countTotalSubset,countTotalGeneSubset/countTotalSubset)
|
|
301 }
|
|
302 for (i in c(1:nlev)) {
|
|
303 cum[r,i] <- ySubsetTotal[i]
|
|
304 cumEnrichTotal[r,i] <- ySubsetTotal[i]/yChIPTotal[i]
|
|
305 if (ifControl) {
|
|
306 cumEnrichControl[r,i] <- ySubsetTotal[i]/yControlTotal[i]
|
|
307 }
|
|
308 }
|
|
309 colReg[r]<-myColor[r+3]
|
|
310 }
|
|
311 if (ifControl) {
|
|
312 for (i in c(1:nlev)) {
|
|
313 cum[4,i] <- yControlTotal[i]
|
|
314 }
|
|
315 }
|
|
316
|
|
317 cat ("Proportion of genes with a peak from the ChIP dataset in a given genomic region:\n")
|
|
318 for (r in c(1:length(myLevels))) {
|
|
319 cat (paste(myLevels[r],":\n",sep=""))
|
|
320 cat (paste(c(names),sep='\t'))
|
|
321 cat("\n")
|
|
322 cat (paste(c(cum[r,] ),sep='\t'))
|
|
323 cat("\n")
|
|
324 }
|
|
325 cat("\n")
|
|
326 par(mar=c(5.1, 7.1, 4.1, 2.1))
|
|
327 if (ifControl) {
|
|
328 barx <- barplot(cum,xlab="",beside=TRUE, col=c(colReg, colors()[334]), legend = c(myLevels,"Control"), names.arg=c(names),ylab="Proportion of genes with a peak")
|
|
329 cat ("Proportion of genes with a peak from the Control dataset in a given genomic region:\n")
|
|
330 cat (paste(c(names),sep='\t'))
|
|
331 cat("\n")
|
|
332 cat (paste(c(yControlTotal),sep='\t'))
|
|
333 cat("\n\n")
|
|
334 if (bootstrap>1) {
|
|
335 wiskers <- cum-cum
|
|
336 wiskers[nrow(wiskers),] <- sqrt(colVars)*1.96
|
|
337 error.bar(barx,cum,wiskers)
|
|
338 cat ("Standard deviation for the Control dataset in a given genomic region:\n")
|
|
339 cat (paste(c(names),sep='\t'))
|
|
340 cat("\n")
|
|
341 cat (paste(c(sqrt(colVars)),sep='\t'))
|
|
342 cat("\n\n")
|
|
343 }
|
|
344 } else {
|
|
345 barplot(cum,xlab="",beside=TRUE, col=c(colReg), legend = c(myLevels), names.arg=c(names),ylab="Proportion of genes with a peak")
|
|
346 }
|
|
347 barplot(cumEnrichTotal-1,xlab="",beside=TRUE, col=c(colReg), names.arg=c(names),ylab="Enrichment in comparison\nwith the total gene set", yaxt="n")
|
|
348 minX <- min(cumEnrichTotal-1)
|
|
349 maxX <- max(cumEnrichTotal-1)
|
|
350 x = seq(length=11, from=round(minX*10)/10, by=round((maxX-minX)*10)/100)
|
|
351 axis(2, at=x,labels=x+1, las=2)
|
|
352
|
|
353 cat ("Enrichment of genomic regions, Transcriptional categories vs All Genes:\n")
|
|
354 for (r in c(1:length(myLevels))) {
|
|
355 cat (paste(myLevels[r],":\n",sep=""))
|
|
356 cat (paste(c(names),sep='\t'))
|
|
357 cat("\n")
|
|
358 cat (paste(c(cumEnrichTotal[r,]),sep='\t'))
|
|
359 cat("\n")
|
|
360 }
|
|
361 cat("\n")
|
|
362
|
|
363 if (ifControl) {
|
|
364 barplot(cumEnrichControl-1,xlab="",beside=TRUE, col=c(colReg), names.arg=c(names),ylab="Enrichment in comparison\nwith control", yaxt="n")
|
|
365 minX <- min(cumEnrichControl-1)
|
|
366 maxX <- max(cumEnrichControl-1)
|
|
367 x = seq(length=11, from=round(minX*10)/10, by=round((maxX-minX)*10)/100)
|
|
368 axis(2, at=x,labels=x+1, las=2)
|
|
369 cat ("Enrichment of genomic regions, ChIP peaks vs Control Peaks:\n")
|
|
370 for (r in c(1:length(myLevels))) {
|
|
371 cat (paste(myLevels[r],":\n",sep=""))
|
|
372 cat (paste(c(names),sep='\t'))
|
|
373 cat("\n")
|
|
374 cat (paste(c(cumEnrichControl[r,]),sep='\t'))
|
|
375 cat("\n")
|
|
376 }
|
|
377 cat("\n")
|
|
378 if (bootstrap>1) {
|
|
379 cat ("Two-side P-values for each genomic category:\n")
|
|
380 for (r in c(1:length(myLevels))) {
|
|
381 z <- (cum[r,]-yControlTotal)/sqrt(colVars)
|
|
382 pvalues <- 2*pnorm(-abs(z))
|
|
383 cat (paste(myLevels[r],":\n",sep=""))
|
|
384 cat (paste(c(names),sep='\t'))
|
|
385 cat("\n")
|
|
386 cat (paste(c(pvalues),sep='\t'))
|
|
387 cat("\n")
|
|
388 }
|
|
389 }
|
|
390 }
|
|
391 }
|
|
392 sink() #stop sinking :)
|
|
393 dev.off()
|
|
394
|