Mercurial > repos > artbio > ngsplot
comparison ngs.plot.r @ 0:3ca58369469c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/ngsplot commit b'e9fcc157a7f2f2fa9d6ac9a58d425ff17c975f5c\n'
| author | artbio |
|---|---|
| date | Wed, 06 Dec 2017 19:01:53 -0500 |
| parents | |
| children | de27d4172d19 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3ca58369469c |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 # | |
| 3 # Program: ngs.plot.r | |
| 4 # Purpose: Plot sequencing coverages at different genomic regions. | |
| 5 # Allow overlaying various coverages with gene lists. | |
| 6 # | |
| 7 # -- by Li Shen, MSSM | |
| 8 # Created: Nov 2011. | |
| 9 # -- by Christophe Antoniewski | |
| 10 # recoded for Galaxy: Dec 2017 | |
| 11 | |
| 12 ngsplot.version <- '2.63_artbio' | |
| 13 # Program environment variable. | |
| 14 progpath <- Sys.getenv('NGSPLOT') | |
| 15 if(progpath == "") { | |
| 16 stop("Set environment variable NGSPLOT before run the program. See README | |
| 17 for details.\n") | |
| 18 } | |
| 19 | |
| 20 source(file.path(progpath, 'lib', 'parse.args.r')) | |
| 21 source(file.path(progpath, 'lib', 'genedb.r')) | |
| 22 source(file.path(progpath, 'lib', 'plotlib.r')) | |
| 23 source(file.path(progpath, 'lib', 'coverage.r')) | |
| 24 | |
| 25 # Deal with command line arguments. | |
| 26 cmd.help <- function(){ | |
| 27 cat("\nVisit https://github.com/shenlab-sinai/ngsplot/wiki/ProgramArguments101 for details\n") | |
| 28 cat(paste("Version:", ngsplot.version, sep=" ")) | |
| 29 cat("\nUsage: ngs.plot.r -G genome -R region -C [cov|config]file\n") | |
| 30 cat(" -O name [Options]\n") | |
| 31 cat("\n## Mandatory parameters:\n") | |
| 32 cat(" -G Genome name. Use ngsplotdb.py list to show available genomes.\n") | |
| 33 cat(" -R Genomic regions to plot: tss, tes, genebody, exon, cgi, enhancer, dhs or bed\n") | |
| 34 cat(" -C Indexed bam file or a configuration file for multiplot\n") | |
| 35 cat(" -O Name for output: multiple files will be generated\n") | |
| 36 cat("## Optional parameters related to configuration file:\n") | |
| 37 cat(" -E Gene list to subset regions OR bed file for custom region\n") | |
| 38 cat(" -T Image title\n") | |
| 39 EchoCoverageArgs() | |
| 40 EchoPlotArgs() | |
| 41 cat("\n") | |
| 42 } | |
| 43 | |
| 44 | |
| 45 ########################################################################### | |
| 46 #################### Deal with program input arguments #################### | |
| 47 args <- commandArgs(T) | |
| 48 # args <- unlist(strsplit('-G mm10 -R tss -C K27M_no_stim_3_GATCAG_L006_R1_001Aligned.out.srt.rmdup.bam -O test -Debug 1', ' ')) | |
| 49 | |
| 50 # Input argument parser. | |
| 51 args.tbl <- parseArgs(args, c('-G', '-C', '-R', '-O')) # see lib/parse.args.r | |
| 52 if(is.null(args.tbl)){ | |
| 53 cmd.help() | |
| 54 stop('Error in parsing command line arguments. Stop.\n') | |
| 55 } | |
| 56 genome <- args.tbl['-G'] | |
| 57 reg2plot <- args.tbl['-R'] | |
| 58 oname <- args.tbl['-O'] | |
| 59 warning(sprintf("%s", args.tbl)) | |
| 60 | |
| 61 cat("Configuring variables...") | |
| 62 # Load tables of database: default.tbl, dbfile.tbl | |
| 63 default.tbl <- read.delim(file.path(progpath, 'database', 'default.tbl')) | |
| 64 dbfile.tbl <- read.delim(file.path(progpath, 'database', 'dbfile.tbl')) | |
| 65 CheckRegionAllowed(reg2plot, default.tbl) | |
| 66 | |
| 67 # Setup variables from arguments. | |
| 68 cov.args <- CoverageVars(args.tbl, reg2plot) | |
| 69 attach(cov.args) # | |
| 70 plot.args <- PlotVars(args.tbl) | |
| 71 attach(plot.args) | |
| 72 | |
| 73 # Configuration: coverage-genelist-title relationships. | |
| 74 ctg.tbl <- ConfigTbl(args.tbl, fraglen) | |
| 75 | |
| 76 # Setup plot-related coordinates and variables. | |
| 77 plotvar.list <- SetupPlotCoord(args.tbl, ctg.tbl, default.tbl, dbfile.tbl, | |
| 78 progpath, genome, reg2plot, inttag, flanksize, | |
| 79 samprate, galaxy) | |
| 80 attach(plotvar.list) | |
| 81 | |
| 82 # Setup data points for plot. | |
| 83 pts.list <- SetPtsSpline(pint, lgint) | |
| 84 pts <- pts.list$pts # data points for avg. profile and standard errors. | |
| 85 m.pts <- pts.list$m.pts # middle data points. For pint, m.pts=1. | |
| 86 f.pts <- pts.list$f.pts # flanking region data points. | |
| 87 | |
| 88 # Setup matrix for avg. profiles. | |
| 89 regcovMat <- CreatePlotMat(pts, ctg.tbl) | |
| 90 # Setup matrix for standard errors. | |
| 91 confiMat <- CreateConfiMat(se, pts, ctg.tbl) | |
| 92 | |
| 93 # Genomic enrichment for all profiles in the config. Use this for heatmaps. | |
| 94 enrichList <- vector('list', nrow(ctg.tbl)) | |
| 95 cat("Done\n") | |
| 96 | |
| 97 # Load required libraries. | |
| 98 cat("Loading R libraries") | |
| 99 if(!suppressMessages(require(ShortRead, warn.conflicts=F))) { | |
| 100 source("http://bioconductor.org/biocLite.R") | |
| 101 biocLite(ShortRead) | |
| 102 if(!suppressMessages(require(ShortRead, warn.conflicts=F))) { | |
| 103 stop('Loading package ShortRead failed!') | |
| 104 } | |
| 105 } | |
| 106 cat('.') | |
| 107 if(!suppressMessages(require(BSgenome, warn.conflicts=F))) { | |
| 108 source("http://bioconductor.org/biocLite.R") | |
| 109 biocLite(BSgenome) | |
| 110 if(!suppressMessages(require(BSgenome, warn.conflicts=F))) { | |
| 111 stop('Loading package BSgenome failed!') | |
| 112 } | |
| 113 } | |
| 114 cat('.') | |
| 115 if(!suppressMessages(require(doMC, warn.conflicts=F))) { | |
| 116 install.packages('doMC') | |
| 117 if(!suppressMessages(require(doMC, warn.conflicts=F))) { | |
| 118 stop('Loading package doMC failed!') | |
| 119 } | |
| 120 } | |
| 121 # Register doMC with CPU number. | |
| 122 if(cores.number == 0){ | |
| 123 registerDoMC() | |
| 124 } else { | |
| 125 registerDoMC(cores.number) | |
| 126 } | |
| 127 cat('.') | |
| 128 if(!suppressMessages(require(caTools, warn.conflicts=F))) { | |
| 129 install.packages('caTools') | |
| 130 if(!suppressMessages(require(caTools, warn.conflicts=F))) { | |
| 131 stop('Loading package caTools failed!') | |
| 132 } | |
| 133 } | |
| 134 cat('.') | |
| 135 if(!suppressMessages(require(utils, warn.conflicts=F))) { | |
| 136 install.packages('utils') | |
| 137 if(!suppressMessages(require(utils, warn.conflicts=F))) { | |
| 138 stop('Loading package utils failed!') | |
| 139 } | |
| 140 } | |
| 141 cat('.') | |
| 142 cat("Done\n") | |
| 143 | |
| 144 ####################################################################### | |
| 145 # Here start to extract coverages for all genomic regions and calculate | |
| 146 # data for plotting. | |
| 147 | |
| 148 cat("Analyze bam files and calculate coverage") | |
| 149 # Extract bam file names from configuration and determine if bam-pair is used. | |
| 150 bfl.res <- bamFileList(ctg.tbl) | |
| 151 bam.pair <- bfl.res$bbp # boolean for bam-pair. | |
| 152 bam.list <- bfl.res$bam.list # bam file list. | |
| 153 CheckHMColorConfig(hm.color, bam.pair) | |
| 154 | |
| 155 # Determine if bowtie is used to generate the bam files. | |
| 156 # Index bam files if not done yet. | |
| 157 v.map.bowtie <- headerIndexBam(bam.list) | |
| 158 | |
| 159 # Retrieve chromosome names for each bam file. | |
| 160 sn.list <- seqnamesBam(bam.list) | |
| 161 | |
| 162 # Calculate library size from bam files for normalization. | |
| 163 v.lib.size <- libSizeBam(bam.list) | |
| 164 | |
| 165 v.low.cutoff <- vector("integer", nrow(ctg.tbl)) # low count cutoffs. | |
| 166 # Process the config file row by row. | |
| 167 # browser() | |
| 168 for(r in 1:nrow(ctg.tbl)) { # r: index of plots/profiles. | |
| 169 | |
| 170 reg <- ctg.tbl$glist[r] # retrieve gene list names. | |
| 171 # Create coordinate chunk indices. | |
| 172 chkidx.list <- chunkIndex(nrow(coord.list[[reg]]), gcs) | |
| 173 | |
| 174 # Do coverage for each bam file. | |
| 175 bam.files <- unlist(strsplit(ctg.tbl$cov[r], ':')) | |
| 176 | |
| 177 # Obtain fraglen for each bam file. | |
| 178 fraglens <- as.integer(unlist(strsplit(ctg.tbl$fraglen[r], ':'))) | |
| 179 | |
| 180 # Obtain bam file basic info. | |
| 181 libsize <- v.lib.size[bam.files[1]] | |
| 182 v.low.cutoff[r] <- low.count / libsize * 1e6 | |
| 183 result.pseudo.rpm <- 1e6 / libsize | |
| 184 sn.inbam <- sn.list[[bam.files[1]]] | |
| 185 # chr.tag <- chrTag(sn.inbam) | |
| 186 chr.tag <- NA # do NOT modify the chromosome names. | |
| 187 is.bowtie <- v.map.bowtie[bam.files[1]] | |
| 188 cat("\nreport reg2plot\n") | |
| 189 cat(reg2plot) | |
| 190 cat("\n") | |
| 191 result.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, | |
| 192 exonmodel, libsize, TRUE, chr.tag, pint, | |
| 193 reg2plot, flanksize, flankfactor, m.pts, f.pts, | |
| 194 bufsize, cov.algo, bam.files[1], sn.inbam, | |
| 195 fraglens[1], map.qual, is.bowtie, | |
| 196 strand.spec=strand.spec) | |
| 197 # Rprof(NULL) | |
| 198 if(bam.pair) { # calculate background. | |
| 199 fraglen2 <- ifelse(length(fraglens) > 1, fraglens[2], fraglens[1]) | |
| 200 libsize <- v.lib.size[bam.files[2]] | |
| 201 bkg.pseudo.rpm <- 1e6 / libsize | |
| 202 sn.inbam <- sn.list[[bam.files[2]]] | |
| 203 # chr.tag <- chrTag(sn.inbam) | |
| 204 chr.tag <- NA | |
| 205 is.bowtie <- v.map.bowtie[bam.files[2]] | |
| 206 # if(class(chr.tag) == 'character') { | |
| 207 # stop(sprintf("Read %s error: %s", bam.files[2], chr.tag)) | |
| 208 # } | |
| 209 bkg.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, | |
| 210 exonmodel, libsize, TRUE, chr.tag, pint, | |
| 211 reg2plot, flanksize, flankfactor, m.pts, f.pts, | |
| 212 bufsize, cov.algo, bam.files[2], sn.inbam, | |
| 213 fraglen2, map.qual, is.bowtie, | |
| 214 strand.spec=strand.spec) | |
| 215 # browser() | |
| 216 result.matrix <- log2((result.matrix + result.pseudo.rpm) / | |
| 217 (bkg.matrix + bkg.pseudo.rpm)) | |
| 218 } | |
| 219 | |
| 220 # Calculate SEM if needed. Shut off SEM in single gene case. | |
| 221 if(nrow(result.matrix) > 1 && se){ | |
| 222 confiMat[, r] <- apply(result.matrix, 2, function(x) CalcSem(x, robust)) | |
| 223 } | |
| 224 | |
| 225 # Book-keep this matrix for heatmap. | |
| 226 enrichList[[r]] <- result.matrix | |
| 227 | |
| 228 # Return avg. profile. | |
| 229 regcovMat[, r] <- apply(result.matrix, 2, function(x) mean(x, trim=robust, | |
| 230 na.rm=T)) | |
| 231 } | |
| 232 # browser() | |
| 233 cat("Done\n") | |
| 234 | |
| 235 ######################################## | |
| 236 # Add row names to heatmap data. | |
| 237 for(i in 1:length(enrichList)) { | |
| 238 reg <- ctg.tbl$glist[i] # gene list name. | |
| 239 rownames(enrichList[[i]]) <- paste(coord.list[[reg]]$gname, | |
| 240 coord.list[[reg]]$tid, sep=':') | |
| 241 } | |
| 242 # Some basic parameters. | |
| 243 xticks <- genXticks(reg2plot, pint, lgint, pts, flanksize, flankfactor, Labs) | |
| 244 unit.width <- 4 | |
| 245 ng.list <- sapply(enrichList, nrow) # number of genes per heatmap. | |
| 246 | |
| 247 # Create image file and plot data into it. | |
| 248 if(!fi_tag){ | |
| 249 cat("Plotting figures...") | |
| 250 #### Average profile plot. #### | |
| 251 out.plot <- avgname | |
| 252 pdf(out.plot, width=plot.width, height=plot.height, pointsize=font.size) | |
| 253 plotmat(regcovMat, ctg.tbl$title, ctg.tbl$color, bam.pair, xticks, pts, | |
| 254 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc) | |
| 255 out.dev <- dev.off() | |
| 256 | |
| 257 #### Heatmap. #### | |
| 258 # Setup output device. | |
| 259 hd <- SetupHeatmapDevice(reg.list, uniq.reg, ng.list, pts, font.size, | |
| 260 unit.width, rr) | |
| 261 reg.hei <- hd$reg.hei # list of image heights for unique regions. | |
| 262 hm.width <- hd$hm.width # image width. | |
| 263 hm.height <- hd$hm.height # image height. | |
| 264 lay.mat <- hd$lay.mat # matrix for heatmap layout. | |
| 265 heatmap.mar <- hd$heatmap.mar # heatmap margins in inches. | |
| 266 | |
| 267 out.hm <- heatmapname | |
| 268 pdf(out.hm, width=hm.width, height=hm.height, pointsize=font.size) | |
| 269 par(mai=heatmap.mar) | |
| 270 layout(lay.mat, heights=reg.hei) | |
| 271 | |
| 272 # Do heatmap plotting. | |
| 273 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, | |
| 274 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, | |
| 275 do.plot=T, hm.color=hm.color, color.distr=color.distr, | |
| 276 color.scale=color.scale) | |
| 277 out.dev <- dev.off() | |
| 278 cat("Done\n") | |
| 279 } else { | |
| 280 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, | |
| 281 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, | |
| 282 do.plot=F, hm.color=hm.color, color.distr=color.distr, | |
| 283 color.scale=color.scale) | |
| 284 } | |
| 285 | |
| 286 # Save plotting data. | |
| 287 oname1="data" | |
| 288 cat("Saving results...") | |
| 289 dir.create(oname1, showWarnings=F) | |
| 290 | |
| 291 # Average profiles. | |
| 292 out.prof <- file.path(oname1, 'avgprof.txt') | |
| 293 write.table(regcovMat, file=out.prof, row.names=F, sep="\t", quote=F) | |
| 294 | |
| 295 # Standard errors of mean. | |
| 296 if(!is.null(confiMat)){ | |
| 297 out.confi <- file.path(oname1, 'sem.txt') | |
| 298 write.table(confiMat, file=out.confi, row.names=F, sep="\t", quote=F) | |
| 299 } | |
| 300 | |
| 301 # Heatmap density values. | |
| 302 for(i in 1:length(enrichList)) { | |
| 303 reg <- ctg.tbl$glist[i] # gene list name. | |
| 304 out.heat <- file.path(oname1, paste('hm', i, '.txt', sep='')) | |
| 305 write.table(cbind(coord.list[[reg]][, c('gid', 'gname', 'tid', 'strand')], | |
| 306 enrichList[[i]]), | |
| 307 file=out.heat, row.names=F, sep="\t", quote=F) | |
| 308 } | |
| 309 | |
| 310 # Avg. profile R data. | |
| 311 prof.dat <- file.path(oname1, 'avgprof.RData') | |
| 312 save(plot.width, plot.height, regcovMat, ctg.tbl, bam.pair, xticks, pts, | |
| 313 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc, se, v.lib.size, | |
| 314 font.size, | |
| 315 file=prof.dat) | |
| 316 | |
| 317 # Heatmap R data. | |
| 318 heat.dat <- file.path(oname1, 'heatmap.RData') | |
| 319 save(reg.list, uniq.reg, ng.list, pts, enrichList, v.low.cutoff, go.algo, | |
| 320 ctg.tbl, bam.pair, xticks, flood.frac, hm.color, unit.width, rr, | |
| 321 go.list, color.scale, v.lib.size, font.size, go.paras, low.count, | |
| 322 color.distr, | |
| 323 file=heat.dat) | |
| 324 cat("Done\n") | |
| 325 | |
| 326 # Wrap results up. | |
| 327 cat("Wrapping results up...") | |
| 328 cur.dir <- getwd() | |
| 329 out.dir <- dirname(oname1) | |
| 330 out.zip <- basename(oname1) | |
| 331 setwd(out.dir) | |
| 332 if(!zip(paste(out.zip, '.zip', sep=''), out.zip, extras='-q')) { | |
| 333 if(unlink(oname, recursive=T)) { | |
| 334 warning(sprintf("Unable to delete intermediate result folder: %s", | |
| 335 oname)) | |
| 336 } | |
| 337 } | |
| 338 setwd(cur.dir) | |
| 339 cat("Done\n") | |
| 340 cat("All done. Cheers!\n") | |
| 341 |
