comparison ngs.plot.r @ 3:de27d4172d19 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/ngsplot commit b'60a55d19937d9dc976020e1c396948b41967a7f8\n'
author artbio
date Fri, 08 Dec 2017 17:14:03 -0500
parents 3ca58369469c
children
comparison
equal deleted inserted replaced
2:4b4d858c0aa9 3:de27d4172d19
11 11
12 ngsplot.version <- '2.63_artbio' 12 ngsplot.version <- '2.63_artbio'
13 # Program environment variable. 13 # Program environment variable.
14 progpath <- Sys.getenv('NGSPLOT') 14 progpath <- Sys.getenv('NGSPLOT')
15 if(progpath == "") { 15 if(progpath == "") {
16 stop("Set environment variable NGSPLOT before run the program. See README 16 stop("Set environment variable NGSPLOT before run the program. See README
17 for details.\n") 17 for details.\n")
18 } 18 }
19 19
20 source(file.path(progpath, 'lib', 'parse.args.r')) 20 source(file.path(progpath, 'lib', 'parse.args.r'))
21 source(file.path(progpath, 'lib', 'genedb.r')) 21 source(file.path(progpath, 'lib', 'genedb.r'))
46 #################### Deal with program input arguments #################### 46 #################### Deal with program input arguments ####################
47 args <- commandArgs(T) 47 args <- commandArgs(T)
48 # args <- unlist(strsplit('-G mm10 -R tss -C K27M_no_stim_3_GATCAG_L006_R1_001Aligned.out.srt.rmdup.bam -O test -Debug 1', ' ')) 48 # args <- unlist(strsplit('-G mm10 -R tss -C K27M_no_stim_3_GATCAG_L006_R1_001Aligned.out.srt.rmdup.bam -O test -Debug 1', ' '))
49 49
50 # Input argument parser. 50 # Input argument parser.
51 args.tbl <- parseArgs(args, c('-G', '-C', '-R', '-O')) # see lib/parse.args.r 51 args.tbl <- parseArgs(args, c('-G', '-C', '-R')) # see lib/parse.args.r
52 if(is.null(args.tbl)){ 52 if(is.null(args.tbl)){
53 cmd.help() 53 cmd.help()
54 stop('Error in parsing command line arguments. Stop.\n') 54 stop('Error in parsing command line arguments. Stop.\n')
55 } 55 }
56 genome <- args.tbl['-G'] 56 genome <- args.tbl['-G']
57 reg2plot <- args.tbl['-R'] 57 reg2plot <- args.tbl['-R']
58 oname <- args.tbl['-O']
59 warning(sprintf("%s", args.tbl)) 58 warning(sprintf("%s", args.tbl))
60 59
61 cat("Configuring variables...") 60 cat("Configuring variables...")
62 # Load tables of database: default.tbl, dbfile.tbl 61 # Load tables of database: default.tbl, dbfile.tbl
63 default.tbl <- read.delim(file.path(progpath, 'database', 'default.tbl')) 62 default.tbl <- read.delim(file.path(progpath, 'database', 'default.tbl'))
72 71
73 # Configuration: coverage-genelist-title relationships. 72 # Configuration: coverage-genelist-title relationships.
74 ctg.tbl <- ConfigTbl(args.tbl, fraglen) 73 ctg.tbl <- ConfigTbl(args.tbl, fraglen)
75 74
76 # Setup plot-related coordinates and variables. 75 # Setup plot-related coordinates and variables.
77 plotvar.list <- SetupPlotCoord(args.tbl, ctg.tbl, default.tbl, dbfile.tbl, 76 plotvar.list <- SetupPlotCoord(args.tbl, ctg.tbl, default.tbl, dbfile.tbl,
78 progpath, genome, reg2plot, inttag, flanksize, 77 progpath, genome, reg2plot, inttag, flanksize,
79 samprate, galaxy) 78 samprate, galaxy)
80 attach(plotvar.list) 79 attach(plotvar.list)
81 80
82 # Setup data points for plot. 81 # Setup data points for plot.
83 pts.list <- SetPtsSpline(pint, lgint) 82 pts.list <- SetPtsSpline(pint, lgint)
140 } 139 }
141 cat('.') 140 cat('.')
142 cat("Done\n") 141 cat("Done\n")
143 142
144 ####################################################################### 143 #######################################################################
145 # Here start to extract coverages for all genomic regions and calculate 144 # Here start to extract coverages for all genomic regions and calculate
146 # data for plotting. 145 # data for plotting.
147 146
148 cat("Analyze bam files and calculate coverage") 147 cat("Analyze bam files and calculate coverage")
149 # Extract bam file names from configuration and determine if bam-pair is used. 148 # Extract bam file names from configuration and determine if bam-pair is used.
150 bfl.res <- bamFileList(ctg.tbl) 149 bfl.res <- bamFileList(ctg.tbl)
186 chr.tag <- NA # do NOT modify the chromosome names. 185 chr.tag <- NA # do NOT modify the chromosome names.
187 is.bowtie <- v.map.bowtie[bam.files[1]] 186 is.bowtie <- v.map.bowtie[bam.files[1]]
188 cat("\nreport reg2plot\n") 187 cat("\nreport reg2plot\n")
189 cat(reg2plot) 188 cat(reg2plot)
190 cat("\n") 189 cat("\n")
191 result.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, 190 result.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb,
192 exonmodel, libsize, TRUE, chr.tag, pint, 191 exonmodel, libsize, TRUE, chr.tag, pint,
193 reg2plot, flanksize, flankfactor, m.pts, f.pts, 192 reg2plot, flanksize, flankfactor, m.pts, f.pts,
194 bufsize, cov.algo, bam.files[1], sn.inbam, 193 bufsize, cov.algo, bam.files[1], sn.inbam,
195 fraglens[1], map.qual, is.bowtie, 194 fraglens[1], map.qual, is.bowtie,
196 strand.spec=strand.spec) 195 strand.spec=strand.spec)
197 # Rprof(NULL) 196 # Rprof(NULL)
198 if(bam.pair) { # calculate background. 197 if(bam.pair) { # calculate background.
199 fraglen2 <- ifelse(length(fraglens) > 1, fraglens[2], fraglens[1]) 198 fraglen2 <- ifelse(length(fraglens) > 1, fraglens[2], fraglens[1])
200 libsize <- v.lib.size[bam.files[2]] 199 libsize <- v.lib.size[bam.files[2]]
204 chr.tag <- NA 203 chr.tag <- NA
205 is.bowtie <- v.map.bowtie[bam.files[2]] 204 is.bowtie <- v.map.bowtie[bam.files[2]]
206 # if(class(chr.tag) == 'character') { 205 # if(class(chr.tag) == 'character') {
207 # stop(sprintf("Read %s error: %s", bam.files[2], chr.tag)) 206 # stop(sprintf("Read %s error: %s", bam.files[2], chr.tag))
208 # } 207 # }
209 bkg.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, 208 bkg.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb,
210 exonmodel, libsize, TRUE, chr.tag, pint, 209 exonmodel, libsize, TRUE, chr.tag, pint,
211 reg2plot, flanksize, flankfactor, m.pts, f.pts, 210 reg2plot, flanksize, flankfactor, m.pts, f.pts,
212 bufsize, cov.algo, bam.files[2], sn.inbam, 211 bufsize, cov.algo, bam.files[2], sn.inbam,
213 fraglen2, map.qual, is.bowtie, 212 fraglen2, map.qual, is.bowtie,
214 strand.spec=strand.spec) 213 strand.spec=strand.spec)
215 # browser() 214 # browser()
216 result.matrix <- log2((result.matrix + result.pseudo.rpm) / 215 result.matrix <- log2((result.matrix + result.pseudo.rpm) /
217 (bkg.matrix + bkg.pseudo.rpm)) 216 (bkg.matrix + bkg.pseudo.rpm))
218 } 217 }
219 218
220 # Calculate SEM if needed. Shut off SEM in single gene case. 219 # Calculate SEM if needed. Shut off SEM in single gene case.
221 if(nrow(result.matrix) > 1 && se){ 220 if(nrow(result.matrix) > 1 && se){
224 223
225 # Book-keep this matrix for heatmap. 224 # Book-keep this matrix for heatmap.
226 enrichList[[r]] <- result.matrix 225 enrichList[[r]] <- result.matrix
227 226
228 # Return avg. profile. 227 # Return avg. profile.
229 regcovMat[, r] <- apply(result.matrix, 2, function(x) mean(x, trim=robust, 228 regcovMat[, r] <- apply(result.matrix, 2, function(x) mean(x, trim=robust,
230 na.rm=T)) 229 na.rm=T))
231 } 230 }
232 # browser() 231 # browser()
233 cat("Done\n") 232 cat("Done\n")
234 233
235 ######################################## 234 ########################################
236 # Add row names to heatmap data. 235 # Add row names to heatmap data.
237 for(i in 1:length(enrichList)) { 236 for(i in 1:length(enrichList)) {
238 reg <- ctg.tbl$glist[i] # gene list name. 237 reg <- ctg.tbl$glist[i] # gene list name.
239 rownames(enrichList[[i]]) <- paste(coord.list[[reg]]$gname, 238 rownames(enrichList[[i]]) <- paste(coord.list[[reg]]$gname,
240 coord.list[[reg]]$tid, sep=':') 239 coord.list[[reg]]$tid, sep=':')
241 } 240 }
242 # Some basic parameters. 241 # Some basic parameters.
243 xticks <- genXticks(reg2plot, pint, lgint, pts, flanksize, flankfactor, Labs) 242 xticks <- genXticks(reg2plot, pint, lgint, pts, flanksize, flankfactor, Labs)
244 unit.width <- 4 243 unit.width <- 4
248 if(!fi_tag){ 247 if(!fi_tag){
249 cat("Plotting figures...") 248 cat("Plotting figures...")
250 #### Average profile plot. #### 249 #### Average profile plot. ####
251 out.plot <- avgname 250 out.plot <- avgname
252 pdf(out.plot, width=plot.width, height=plot.height, pointsize=font.size) 251 pdf(out.plot, width=plot.width, height=plot.height, pointsize=font.size)
253 plotmat(regcovMat, ctg.tbl$title, ctg.tbl$color, bam.pair, xticks, pts, 252 plotmat(regcovMat, ctg.tbl$title, ctg.tbl$color, bam.pair, xticks, pts,
254 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc) 253 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc)
255 out.dev <- dev.off() 254 out.dev <- dev.off()
256 255
257 #### Heatmap. #### 256 #### Heatmap. ####
258 # Setup output device. 257 # Setup output device.
259 hd <- SetupHeatmapDevice(reg.list, uniq.reg, ng.list, pts, font.size, 258 hd <- SetupHeatmapDevice(reg.list, uniq.reg, ng.list, pts, font.size,
260 unit.width, rr) 259 unit.width, rr)
261 reg.hei <- hd$reg.hei # list of image heights for unique regions. 260 reg.hei <- hd$reg.hei # list of image heights for unique regions.
262 hm.width <- hd$hm.width # image width. 261 hm.width <- hd$hm.width # image width.
263 hm.height <- hd$hm.height # image height. 262 hm.height <- hd$hm.height # image height.
264 lay.mat <- hd$lay.mat # matrix for heatmap layout. 263 lay.mat <- hd$lay.mat # matrix for heatmap layout.
268 pdf(out.hm, width=hm.width, height=hm.height, pointsize=font.size) 267 pdf(out.hm, width=hm.width, height=hm.height, pointsize=font.size)
269 par(mai=heatmap.mar) 268 par(mai=heatmap.mar)
270 layout(lay.mat, heights=reg.hei) 269 layout(lay.mat, heights=reg.hei)
271 270
272 # Do heatmap plotting. 271 # Do heatmap plotting.
273 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, 272 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo,
274 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, 273 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac,
275 do.plot=T, hm.color=hm.color, color.distr=color.distr, 274 do.plot=T, hm.color=hm.color, color.distr=color.distr,
276 color.scale=color.scale) 275 color.scale=color.scale)
277 out.dev <- dev.off() 276 out.dev <- dev.off()
278 cat("Done\n") 277 cat("Done\n")
279 } else { 278 } else {
280 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, 279 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo,
281 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, 280 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac,
282 do.plot=F, hm.color=hm.color, color.distr=color.distr, 281 do.plot=F, hm.color=hm.color, color.distr=color.distr,
283 color.scale=color.scale) 282 color.scale=color.scale)
284 } 283 }
285 284
286 # Save plotting data. 285 # Save plotting data.
287 oname1="data" 286 oname1="data"
300 299
301 # Heatmap density values. 300 # Heatmap density values.
302 for(i in 1:length(enrichList)) { 301 for(i in 1:length(enrichList)) {
303 reg <- ctg.tbl$glist[i] # gene list name. 302 reg <- ctg.tbl$glist[i] # gene list name.
304 out.heat <- file.path(oname1, paste('hm', i, '.txt', sep='')) 303 out.heat <- file.path(oname1, paste('hm', i, '.txt', sep=''))
305 write.table(cbind(coord.list[[reg]][, c('gid', 'gname', 'tid', 'strand')], 304 write.table(cbind(coord.list[[reg]][, c('gid', 'gname', 'tid', 'strand')],
306 enrichList[[i]]), 305 enrichList[[i]]),
307 file=out.heat, row.names=F, sep="\t", quote=F) 306 file=out.heat, row.names=F, sep="\t", quote=F)
308 } 307 }
309 308
310 # Avg. profile R data. 309 # Avg. profile R data.
311 prof.dat <- file.path(oname1, 'avgprof.RData') 310 prof.dat <- file.path(oname1, 'avgprof.RData')
312 save(plot.width, plot.height, regcovMat, ctg.tbl, bam.pair, xticks, pts, 311 save(plot.width, plot.height, regcovMat, ctg.tbl, bam.pair, xticks, pts,
313 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc, se, v.lib.size, 312 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc, se, v.lib.size,
314 font.size, 313 font.size,
315 file=prof.dat) 314 file=prof.dat)
316 315
317 # Heatmap R data. 316 # Heatmap R data.
318 heat.dat <- file.path(oname1, 'heatmap.RData') 317 heat.dat <- file.path(oname1, 'heatmap.RData')
319 save(reg.list, uniq.reg, ng.list, pts, enrichList, v.low.cutoff, go.algo, 318 save(reg.list, uniq.reg, ng.list, pts, enrichList, v.low.cutoff, go.algo,
320 ctg.tbl, bam.pair, xticks, flood.frac, hm.color, unit.width, rr, 319 ctg.tbl, bam.pair, xticks, flood.frac, hm.color, unit.width, rr,
321 go.list, color.scale, v.lib.size, font.size, go.paras, low.count, 320 go.list, color.scale, v.lib.size, font.size, go.paras, low.count,
322 color.distr, 321 color.distr,
323 file=heat.dat) 322 file=heat.dat)
324 cat("Done\n") 323 cat("Done\n")
325 324
326 # Wrap results up. 325 # Wrap results up.
327 cat("Wrapping results up...") 326 cat("Wrapping results up...")
328 cur.dir <- getwd() 327 cur.dir <- getwd()
328 cat("cur.dir")
329 cat(cur.dir)
329 out.dir <- dirname(oname1) 330 out.dir <- dirname(oname1)
331 cat("out.dir")
332 cat(out.dir)
330 out.zip <- basename(oname1) 333 out.zip <- basename(oname1)
331 setwd(out.dir) 334 cat("out.zip")
332 if(!zip(paste(out.zip, '.zip', sep=''), out.zip, extras='-q')) { 335 cat(out.zip)
333 if(unlink(oname, recursive=T)) { 336 #setwd(out.dir)
334 warning(sprintf("Unable to delete intermediate result folder: %s", 337 #cat("Reaching this point without failure")
335 oname)) 338 #zip(paste(out.zip, '.zip', sep=''), out.zip, extras='-q')
336 } 339 zip("data.zip", "data", extras='-q')
337 } 340 #if(!zip(paste(out.zip, '.zip', sep=''), out.zip, extras='-q')) {
338 setwd(cur.dir) 341 # if(unlink(oname, recursive=T)) {
342 # warning(sprintf("Unable to delete intermediate result folder: %s",
343 # oname))
344 # }
345 #}
346 #setwd(cur.dir)
339 cat("Done\n") 347 cat("Done\n")
340 cat("All done. Cheers!\n") 348 cat("All done. Cheers!\n")
341