Mercurial > repos > jbrayet > maketssdist
changeset 44:dba6d7bc567c draft
Deleted selected files
| author | jbrayet |
|---|---|
| date | Wed, 12 Aug 2015 09:20:24 -0400 |
| parents | 0abf47694700 |
| children | 7d5e9f4ffad5 |
| files | makeTSSdist.R |
| diffstat | 1 files changed, 0 insertions(+), 484 deletions(-) [+] |
line wrap: on
line diff
--- a/makeTSSdist.R Wed Aug 12 09:15:08 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,484 +0,0 @@ -#!/usr/bin/env Rscript - -#usage $0 STEP RIGHT chipPeaks outputFile.png output.txt [controlPeaks] [1 for pdf] -args <- commandArgs(TRUE) -#print (args) -myStep <- type.convert(args[2]) -maxValue <- type.convert(args[3]) - -dataTable <-read.table(file=paste(args[4],".genes.ClosestPeakDist", sep=""), header=TRUE); -chip.genes.ClosestPeakDist<-data.frame(dataTable) -ifReg <- 0 -if (length(unique(chip.genes.ClosestPeakDist$Reg))>1) { - ifReg <- 1 -} -ifControl <- 0 - -#options(bitmapType='cairo') - -ifPDF <- 0 -if (length(args)>=8) { - ifPDF=args[8] -} -if (length(args)==7 & args[7]==1) { - ifPDF=1 -} - -suppressMessages(library(Hmisc)) - -if (length(args)>=7 & args[7]!=1 & args[7]!=0) { - dataTable <-read.table(file=paste(args[7],".genes.ClosestPeakDist", sep=""), header=TRUE); - control.genes.ClosestPeakDist<-data.frame(dataTable) - ifControl <- 1 -} -if (ifReg & ifControl) { - if (ifPDF==1) { - pdf(file = args[5], width = 19, height = 8, pointsize = 20, bg = "white") - } else { - jpeg(filename = args[5], width = 1440 , height = 680, units = "px", pointsize = 20, bg = "white", res = NA) - plot(1:10) - } - op <- par(mfrow = c(2,3)) -} else { - if (ifPDF==1) { - pdf(file = args[5], width = 10, height = 13, pointsize = 20, bg = "white") - } else { - jpeg(filename = args[5], width = 680, height = 880, units = "px", pointsize = 20, bg = "white", res = NA) - plot(1:10) - } - # plot(1:10) - op <- par(mfrow = c(2,1)) -} -myColor <- 1 -myColor[1] <- colors()[131] -myColor[2] <- "darkolivegreen3" -myColor[3] <- "azure4" -myColor[4] <- "royalblue3" -myColor[5] <- colors()[17] - -myColorControl <- 1 - -myColorControl[1] <- colors()[24] -myColorControl[2] <- colors()[278] -myColorControl[3] <- colors()[305] -myColorControl[4] <- colors()[219] -myColorControl[5] <- colors()[343] - - - -#for cumulative: -dist_real_f <- chip.genes.ClosestPeakDist -if (ifControl) { - dist_control_f <- control.genes.ClosestPeakDist -} -step <- myStep -lim <- maxValue -x <- 0 -count <- 1 -countL <-1 -n.types <- 1 -myLevels <- 0 -countTotalCont <- 0 -countTotal <-0 -countLCont <- 0 -cumTotalCont <- 0 -if (ifReg) { - n.types <- length(levels(chip.genes.ClosestPeakDist$Reg)) - myLevels <- levels(chip.genes.ClosestPeakDist$Reg) - cum = matrix( 0, nrow=lim/step +1, ncol=n.types, byrow = TRUE) - for (i in c(1:n.types)) { - t <- which ((dist_real_f$Reg==myLevels[i])) - countL[i] <- length(t) - } - count <-1 - for (i in seq(length=lim/step +1, from=0, by=step)) { - for (t in c(1:n.types)) { - tt <- which ((dist_real_f$Reg==myLevels[t])&(dist_real_f$Dist<=i)&(dist_real_f$Dist>=-i)) - cum[count,t] <- length(tt) - } - x[count] <- i - count <- count + 1 - } - ymax <- max(cum[,1]/countL[1], na.rm=TRUE) - for (i in c(2:n.types)) { - ymax <- max(ymax,max(cum[,i]/countL[i], na.rm=TRUE)) - } - myLocCol <- myColor[2] - - par(mar=c(5.1, 7.1, 4.1, 2.1)) - - plot (x,cum[,1]/countL[1] ,col = myColor[2],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" , ylim=c(0,ymax)) - for (i in c(2:n.types)) { - colorr <- i+1 - myLocCol <- c(myLocCol,myColor[colorr]) - lines (x,cum[,i]/countL[i] ,col = myColor[colorr],type="l", lwd = 2) -# print (myColor[colorr]) - } - - gradi <- 1000 - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - axisx <- seq(length=lim/gradi+1, from=0, by=gradi) - axisxlab <- paste(axisx/1000,"", sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) - ymax <- max(cum[,i]/countL[i], na.rm=TRUE) - - minor.tick(nx=5,tick.ratio=0.5) - - legend(lim*0.45, ymax*0.45, myLevels, cex=1, lwd = 2, bty = "n", col = myLocCol, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) - - title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") - - if (ifControl) { - count <-1 - n.types <- length(levels(control.genes.ClosestPeakDist$Reg)) - myLevels <- levels(control.genes.ClosestPeakDist$Reg) - cumCont = matrix( 0, nrow=lim/step +1, ncol=n.types, byrow = TRUE) - for (i in c(1:n.types)) { - t <- which ((dist_control_f$Reg==myLevels[i])) - countLCont[i] <- length(t) - } - for (i in seq(length=lim/step +1, from=0, by=step)) { - for (t in c(1:n.types)) { - tt <- which ((dist_control_f$Reg==myLevels[t])&(dist_control_f$Dist<=i)&(dist_control_f$Dist>=-i)) - cumCont[count,t] <- length(tt) - } - x[count] <- i - count <- count + 1 - } - ymax <- max(cumCont[,1]/countLCont[1], na.rm=TRUE) - for (i in c(2:n.types)) { - ymax <- max(ymax,max(cumCont[,i]/countLCont[i], na.rm=TRUE)) - } - myLocColCntr <- myColorControl[2] - plot (x,cumCont[,1]/countLCont[1] ,col = myLocColCntr[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" , ylim=c(0,ymax)) - for (i in c(2:n.types)) { - colorr <- i+1 - myLocColCntr <- c(myLocColCntr,myColorControl[colorr]) - lines (x,cumCont[,i]/countLCont[i] ,col = myColorControl[colorr],type="l", lwd = 2) - } - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - axisx <- seq(length=lim/gradi+1, from=0, by=gradi) - axisxlab <- paste(axisx/1000, sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) - minor.tick(nx=5,tick.ratio=0.5) - legend(lim*0.45, ymax*0.45, myLevels, cex=1 , lwd = 2, bty = "n", col = myLocColCntr, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) - title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") - #real_vs_control_cumulative: - count <-1 - countTotal <- length(dist_real_f$Reg) - cumTotal <- 0 - for (i in seq(length=lim/step +1, from=0, by=step)) { - t <- which ((dist_real_f$Dist<=i)&(dist_real_f$Dist>=-i)) - cumTotal[count] <- length(t) - x[count] <- i - count <- count + 1 - } - plot (x,cumTotal/countTotal ,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" ) - gradi <- 1000 - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - axisx <- seq(length=lim/gradi+1, from=0, by=gradi) - axisxlab <- paste(axisx/1000, sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) - ymax <- max(cumTotal/countTotal, na.rm=TRUE) - minor.tick(nx=5,tick.ratio=0.5) - countTotalCont <- length(dist_control_f$Reg) - cumTotalCont <- 0 - count <- 1 - for (i in seq(length=lim/step +1, from=0, by=step)) { - t <- which ((dist_control_f$Dist<=i)&(dist_control_f$Dist>=-i)) - cumTotalCont[count] <- length(t) - x[count] <- i - count <- count + 1 - } - lines (x,cumTotalCont/countTotalCont ,col = colors()[328],type="l", lwd = 2) - legend(lim*0.45, ymax*0.45, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) - title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") - } -} else { - countTotal <- length(dist_real_f$Reg) - cumTotal <- 0 - count <-1 - - gradi <- 1000 - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - - for (i in seq(length=lim/step +1, from=0, by=step)) { - t <- which ((dist_real_f$Dist<=i)&(dist_real_f$Dist>=-i)) - cumTotal[count] <- length(t) - x[count] <- i - count <- count + 1 - } - par(mar=c(5.1, 7.1, 4.1, 2.1)) - - plot (x,cumTotal/countTotal ,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" ) - axisx <- seq(length=lim/gradi+1, from=0, by=gradi) - axisxlab <- paste(axisx/1000, sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) -title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") - ymax <- max(cumTotal/countTotal, na.rm=TRUE) - if (ifControl) { - countTotalCont <- length(dist_control_f$Reg) - cumTotalCont <- 0 - count <- 1 - for (i in seq(length=lim/step +1, from=0, by=step)) { - t <- which ((dist_control_f$Dist<=i)&(dist_control_f$Dist>=-i)) - cumTotalCont[count] <- length(t) - x[count] <- i - count <- count + 1 - } - lines (x,cumTotalCont/countTotalCont ,col = colors()[328],type="l", lwd = 2) - legend(lim*0.45, ymax*0.45, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) - } else { - legend(lim*0.45, ymax*0.45, c("ChIP"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1]), lty = c(1), pt.bg= c(myColor[1]) , merge = TRUE) - } -} - -sink(args[6], append=FALSE, split=FALSE) -if (ifReg) { - if (ifControl) { - cat (paste("Dist_TSS","% genes w/ a peak in ChIP","% genes w/ a peak in control",sep='\t')) - cat("\t") - for (i in c(1:n.types)) { - cat(paste("% ", myLevels[i]," genes w/ a peak in ChIP", sep="")) - cat("\t") - } - - for (i in c(1:n.types)) { - cat(paste("% ", myLevels[i]," genes w/ a peak in Control", sep="")) - cat("\t") - } - cat("\n") - for (i in c(1:length(x))) { - cat(paste(x[i],cumTotal[i]/countTotal,cumTotalCont[i]/countTotalCont,sep="\t")) - cat("\t") - for (t in c(1:n.types)) { - cat(paste(cum[i,t]/countL[t],"\t", sep="")) - } - for (t in c(1:n.types)) { - cat(paste(cumCont[i,t]/countLCont[t],"\t", sep="")) - } - cat("\n") - } - }else { - cat (paste("Dist_TSS","\t",sep='')) - for (i in c(1:n.types)) { - cat(paste("% ", myLevels[i]," genes w/ a peak in ChIP", "\t", sep="")) - } - cat("\n") - for (i in c(1:length(x))) { - cat(paste(x[i],"\t",sep="")) - for (t in c(1:n.types)) { - cat(paste(cum[i,t]/countL[t],"\t", sep="")) - } - cat("\n") - } - } -} else { - if (ifControl) { - cat (paste("Dist_TSS","% genes w/ a peak in ChIP","% genes w/ a peak in control",sep='\t')) - cat("\n") - for (i in c(1:length(x))) { - cat(paste(x[i],cumTotal[i]/countTotal,cumTotalCont[i]/countTotalCont,sep="\t")) - cat("\n") - } - }else { - cat (paste("Dist_TSS","% genes w/ a peak in ChIP",sep='\t')) - cat("\n") - for (i in c(1:length(x))) { - cat(paste(x[i],cumTotal[i]/countTotal,sep="\t")) - cat("\n") - } - - } -} - - -#stop sinking: -sink() - -#around TSS: -lim <- maxValue -step <- myStep -my_breaks <- seq(length=lim/step*2 +1, from=-lim, by=step) -chip.genes <- read.table(file=paste(args[4],".genes", sep=""), header=TRUE) ; -dist_real_f <- chip.genes -if (ifControl) { - control.genes <- read.table(file=paste(args[4],".genes", sep=""), header=TRUE) ; - dist_control_f<-data.frame(control.genes) -} -if (ifReg) { - #n.types <- length(levels(chip.genes.ClosestPeakDist$Reg)) - #myLevels <- levels(dist_real_f$Reg) - - t<- which (dist_real_f$Reg==myLevels[1]) - values_real <-dist_real_f$Dist[t] - hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) - ymax <- max(hTSSreal$density, na.rm=TRUE) - for (i in c(2:n.types)) { - t<- which (dist_real_f$Reg==myLevels[i]) - values_real <-dist_real_f$Dist[t] - hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) - ymax <- max(ymax,max(hTSSreal$density, na.rm=TRUE)) - } - - - t<- which (dist_real_f$Reg==myLevels[1]) - values_real <-dist_real_f$Dist[t] - hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) - plot (hTSSreal$mids,hTSSreal$density,col = myLocCol[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),ylim = c(0, ymax), xaxt="n" ) - - title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") - - for (i in c(2:n.types)) { - t<- which (dist_real_f$Reg==myLevels[i]) - values_real <-dist_real_f$Dist[t] - hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) - lines (hTSSreal$mids,hTSSreal$density,col = myLocCol[i],type="l", lwd = 2) - } - legend(lim*0.1, ymax*0.9, myLevels, cex=1 , lwd = 2, bty = "n", col = myLocCol, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) - - gradi <- 1000 - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - - axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) - axisxlab <- paste(axisx/1000, sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) - - - #minor.tick(nx=10,tick.ratio=0.5) - if (ifControl) { - t<- which (dist_control_f$Reg==myLevels[1]) - values_control <-dist_control_f$Dist[t] - hTSScontrol= hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) - ymax <- max(hTSScontrol$density, na.rm=TRUE) - for (i in c(2:n.types)) { - t<- which (dist_control_f$Reg==myLevels[i]) - values_control <-dist_control_f$Dist[t] - hTSScontrol = hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) - ymax <- max(ymax,max(hTSScontrol$density, na.rm=TRUE)) - } - t<- which (dist_control_f$Reg==myLevels[1]) - values_control <-dist_control_f$Dist[t] - hTSScontrol= hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) - plot (hTSScontrol$mids,hTSScontrol$density,col = myLocColCntr[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),ylim = c(0, ymax),xaxt="n" ) - title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") - for (i in c(2:n.types)) { - t<- which (dist_control_f$Reg==myLevels[i]) - values_control <-dist_control_f$Dist[t] - hTSScontrol = hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) - lines (hTSScontrol$mids,hTSScontrol$density,col = myLocColCntr[i],type="l", lwd = 2) - } - - gradi <- 1000 - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - - axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) - axisxlab <- paste(axisx/1000, sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) - - legend(lim*0.1, ymax*0.9, myLevels, cex=1 , lwd = 2, bty = "n", col = myLocColCntr, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) - - # minor.tick(nx=10,tick.ratio=0.5) - #control vs real - values_real <-dist_real_f$Dist - hTSSreal = hist(values_real, plot=FALSE, breaks = c(min(values_real),my_breaks,max(values_real)) ) - plot (hTSSreal$mids,hTSSreal$density,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),xaxt="n") - title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") - ymax <- max(hTSSreal$density, na.rm=TRUE) - values_control <-dist_control_f$Dist - hTSScontrol = hist(values_control, plot=FALSE, breaks = c(min(values_control),my_breaks,max(values_control)) ) - lines (hTSScontrol$mids,hTSScontrol$density,col = colors()[328],type="l", lwd = 2) - legend(lim*0.2, ymax*0.9, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) - - gradi <- 1000 - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - - axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) - axisxlab <- paste(axisx/1000, sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) - - - - # minor.tick(nx=10,tick.ratio=0.5) - } -} else { - values_real <-dist_real_f$Dist - hTSSreal = hist(values_real, plot=FALSE, breaks = c(min(values_real),my_breaks,max(values_real)) ) - plot (hTSSreal$mids,hTSSreal$density,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),xaxt="n") - title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") - ymax <- max(hTSSreal$density, na.rm=TRUE) - if (ifControl) { - values_control <-dist_control_f$Dist - hTSScontrol = hist(values_control, plot=FALSE, breaks = c(min(values_control),my_breaks,max(values_control)) ) - lines (hTSScontrol$mids,hTSScontrol$density,col = colors()[328],type="l", lwd = 2) - legend(lim*0.2, ymax*0.9, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) - } else { - legend(lim*0.2, ymax*0.9, c("ChIP"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1]), lty = c(1), pt.bg= c(myColor[1]) , merge = TRUE) - } - - gradi <- 1000 - if (lim>10000) { - gradi <- 10000 - } - if (lim<3000) { - gradi <- 500 - } - - axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) - axisxlab <- paste(axisx/1000, sep = "") - axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) - - - # minor.tick(nx=10,tick.ratio=0.5) -} -suppressMessages(dev.off()) -q(); -cat (paste("peak height","# peaks in ChIP","# peaks in Control","#control/chip","\n",sep='\t')) -for (xval in c(minValue:maxValue)) { - for (i in (1:length(chipHist$mids))) { - if (xval==chipHist$mids[i]) { - ychip <- chipHist$counts[i] - } - } - for (i in (1:length(controlHist$mids))) { - if (xval==controlHist$mids[i]) { - ycontrol <- controlHist$counts[i] - } - } - cat (paste(xval,ychip,ycontrol,ycontrol/ychip,"\n",sep='\t')) -}
