Mercurial > repos > artbio > ngsplot
comparison ngs.plot.r @ 3:de27d4172d19 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/ngsplot commit b'60a55d19937d9dc976020e1c396948b41967a7f8\n'
author | artbio |
---|---|
date | Fri, 08 Dec 2017 17:14:03 -0500 |
parents | 3ca58369469c |
children |
comparison
equal
deleted
inserted
replaced
2:4b4d858c0aa9 | 3:de27d4172d19 |
---|---|
11 | 11 |
12 ngsplot.version <- '2.63_artbio' | 12 ngsplot.version <- '2.63_artbio' |
13 # Program environment variable. | 13 # Program environment variable. |
14 progpath <- Sys.getenv('NGSPLOT') | 14 progpath <- Sys.getenv('NGSPLOT') |
15 if(progpath == "") { | 15 if(progpath == "") { |
16 stop("Set environment variable NGSPLOT before run the program. See README | 16 stop("Set environment variable NGSPLOT before run the program. See README |
17 for details.\n") | 17 for details.\n") |
18 } | 18 } |
19 | 19 |
20 source(file.path(progpath, 'lib', 'parse.args.r')) | 20 source(file.path(progpath, 'lib', 'parse.args.r')) |
21 source(file.path(progpath, 'lib', 'genedb.r')) | 21 source(file.path(progpath, 'lib', 'genedb.r')) |
46 #################### Deal with program input arguments #################### | 46 #################### Deal with program input arguments #################### |
47 args <- commandArgs(T) | 47 args <- commandArgs(T) |
48 # args <- unlist(strsplit('-G mm10 -R tss -C K27M_no_stim_3_GATCAG_L006_R1_001Aligned.out.srt.rmdup.bam -O test -Debug 1', ' ')) | 48 # args <- unlist(strsplit('-G mm10 -R tss -C K27M_no_stim_3_GATCAG_L006_R1_001Aligned.out.srt.rmdup.bam -O test -Debug 1', ' ')) |
49 | 49 |
50 # Input argument parser. | 50 # Input argument parser. |
51 args.tbl <- parseArgs(args, c('-G', '-C', '-R', '-O')) # see lib/parse.args.r | 51 args.tbl <- parseArgs(args, c('-G', '-C', '-R')) # see lib/parse.args.r |
52 if(is.null(args.tbl)){ | 52 if(is.null(args.tbl)){ |
53 cmd.help() | 53 cmd.help() |
54 stop('Error in parsing command line arguments. Stop.\n') | 54 stop('Error in parsing command line arguments. Stop.\n') |
55 } | 55 } |
56 genome <- args.tbl['-G'] | 56 genome <- args.tbl['-G'] |
57 reg2plot <- args.tbl['-R'] | 57 reg2plot <- args.tbl['-R'] |
58 oname <- args.tbl['-O'] | |
59 warning(sprintf("%s", args.tbl)) | 58 warning(sprintf("%s", args.tbl)) |
60 | 59 |
61 cat("Configuring variables...") | 60 cat("Configuring variables...") |
62 # Load tables of database: default.tbl, dbfile.tbl | 61 # Load tables of database: default.tbl, dbfile.tbl |
63 default.tbl <- read.delim(file.path(progpath, 'database', 'default.tbl')) | 62 default.tbl <- read.delim(file.path(progpath, 'database', 'default.tbl')) |
72 | 71 |
73 # Configuration: coverage-genelist-title relationships. | 72 # Configuration: coverage-genelist-title relationships. |
74 ctg.tbl <- ConfigTbl(args.tbl, fraglen) | 73 ctg.tbl <- ConfigTbl(args.tbl, fraglen) |
75 | 74 |
76 # Setup plot-related coordinates and variables. | 75 # Setup plot-related coordinates and variables. |
77 plotvar.list <- SetupPlotCoord(args.tbl, ctg.tbl, default.tbl, dbfile.tbl, | 76 plotvar.list <- SetupPlotCoord(args.tbl, ctg.tbl, default.tbl, dbfile.tbl, |
78 progpath, genome, reg2plot, inttag, flanksize, | 77 progpath, genome, reg2plot, inttag, flanksize, |
79 samprate, galaxy) | 78 samprate, galaxy) |
80 attach(plotvar.list) | 79 attach(plotvar.list) |
81 | 80 |
82 # Setup data points for plot. | 81 # Setup data points for plot. |
83 pts.list <- SetPtsSpline(pint, lgint) | 82 pts.list <- SetPtsSpline(pint, lgint) |
140 } | 139 } |
141 cat('.') | 140 cat('.') |
142 cat("Done\n") | 141 cat("Done\n") |
143 | 142 |
144 ####################################################################### | 143 ####################################################################### |
145 # Here start to extract coverages for all genomic regions and calculate | 144 # Here start to extract coverages for all genomic regions and calculate |
146 # data for plotting. | 145 # data for plotting. |
147 | 146 |
148 cat("Analyze bam files and calculate coverage") | 147 cat("Analyze bam files and calculate coverage") |
149 # Extract bam file names from configuration and determine if bam-pair is used. | 148 # Extract bam file names from configuration and determine if bam-pair is used. |
150 bfl.res <- bamFileList(ctg.tbl) | 149 bfl.res <- bamFileList(ctg.tbl) |
186 chr.tag <- NA # do NOT modify the chromosome names. | 185 chr.tag <- NA # do NOT modify the chromosome names. |
187 is.bowtie <- v.map.bowtie[bam.files[1]] | 186 is.bowtie <- v.map.bowtie[bam.files[1]] |
188 cat("\nreport reg2plot\n") | 187 cat("\nreport reg2plot\n") |
189 cat(reg2plot) | 188 cat(reg2plot) |
190 cat("\n") | 189 cat("\n") |
191 result.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, | 190 result.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, |
192 exonmodel, libsize, TRUE, chr.tag, pint, | 191 exonmodel, libsize, TRUE, chr.tag, pint, |
193 reg2plot, flanksize, flankfactor, m.pts, f.pts, | 192 reg2plot, flanksize, flankfactor, m.pts, f.pts, |
194 bufsize, cov.algo, bam.files[1], sn.inbam, | 193 bufsize, cov.algo, bam.files[1], sn.inbam, |
195 fraglens[1], map.qual, is.bowtie, | 194 fraglens[1], map.qual, is.bowtie, |
196 strand.spec=strand.spec) | 195 strand.spec=strand.spec) |
197 # Rprof(NULL) | 196 # Rprof(NULL) |
198 if(bam.pair) { # calculate background. | 197 if(bam.pair) { # calculate background. |
199 fraglen2 <- ifelse(length(fraglens) > 1, fraglens[2], fraglens[1]) | 198 fraglen2 <- ifelse(length(fraglens) > 1, fraglens[2], fraglens[1]) |
200 libsize <- v.lib.size[bam.files[2]] | 199 libsize <- v.lib.size[bam.files[2]] |
204 chr.tag <- NA | 203 chr.tag <- NA |
205 is.bowtie <- v.map.bowtie[bam.files[2]] | 204 is.bowtie <- v.map.bowtie[bam.files[2]] |
206 # if(class(chr.tag) == 'character') { | 205 # if(class(chr.tag) == 'character') { |
207 # stop(sprintf("Read %s error: %s", bam.files[2], chr.tag)) | 206 # stop(sprintf("Read %s error: %s", bam.files[2], chr.tag)) |
208 # } | 207 # } |
209 bkg.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, | 208 bkg.matrix <- covMatrix(debug, chkidx.list, coord.list[[reg]], rnaseq.gb, |
210 exonmodel, libsize, TRUE, chr.tag, pint, | 209 exonmodel, libsize, TRUE, chr.tag, pint, |
211 reg2plot, flanksize, flankfactor, m.pts, f.pts, | 210 reg2plot, flanksize, flankfactor, m.pts, f.pts, |
212 bufsize, cov.algo, bam.files[2], sn.inbam, | 211 bufsize, cov.algo, bam.files[2], sn.inbam, |
213 fraglen2, map.qual, is.bowtie, | 212 fraglen2, map.qual, is.bowtie, |
214 strand.spec=strand.spec) | 213 strand.spec=strand.spec) |
215 # browser() | 214 # browser() |
216 result.matrix <- log2((result.matrix + result.pseudo.rpm) / | 215 result.matrix <- log2((result.matrix + result.pseudo.rpm) / |
217 (bkg.matrix + bkg.pseudo.rpm)) | 216 (bkg.matrix + bkg.pseudo.rpm)) |
218 } | 217 } |
219 | 218 |
220 # Calculate SEM if needed. Shut off SEM in single gene case. | 219 # Calculate SEM if needed. Shut off SEM in single gene case. |
221 if(nrow(result.matrix) > 1 && se){ | 220 if(nrow(result.matrix) > 1 && se){ |
224 | 223 |
225 # Book-keep this matrix for heatmap. | 224 # Book-keep this matrix for heatmap. |
226 enrichList[[r]] <- result.matrix | 225 enrichList[[r]] <- result.matrix |
227 | 226 |
228 # Return avg. profile. | 227 # Return avg. profile. |
229 regcovMat[, r] <- apply(result.matrix, 2, function(x) mean(x, trim=robust, | 228 regcovMat[, r] <- apply(result.matrix, 2, function(x) mean(x, trim=robust, |
230 na.rm=T)) | 229 na.rm=T)) |
231 } | 230 } |
232 # browser() | 231 # browser() |
233 cat("Done\n") | 232 cat("Done\n") |
234 | 233 |
235 ######################################## | 234 ######################################## |
236 # Add row names to heatmap data. | 235 # Add row names to heatmap data. |
237 for(i in 1:length(enrichList)) { | 236 for(i in 1:length(enrichList)) { |
238 reg <- ctg.tbl$glist[i] # gene list name. | 237 reg <- ctg.tbl$glist[i] # gene list name. |
239 rownames(enrichList[[i]]) <- paste(coord.list[[reg]]$gname, | 238 rownames(enrichList[[i]]) <- paste(coord.list[[reg]]$gname, |
240 coord.list[[reg]]$tid, sep=':') | 239 coord.list[[reg]]$tid, sep=':') |
241 } | 240 } |
242 # Some basic parameters. | 241 # Some basic parameters. |
243 xticks <- genXticks(reg2plot, pint, lgint, pts, flanksize, flankfactor, Labs) | 242 xticks <- genXticks(reg2plot, pint, lgint, pts, flanksize, flankfactor, Labs) |
244 unit.width <- 4 | 243 unit.width <- 4 |
248 if(!fi_tag){ | 247 if(!fi_tag){ |
249 cat("Plotting figures...") | 248 cat("Plotting figures...") |
250 #### Average profile plot. #### | 249 #### Average profile plot. #### |
251 out.plot <- avgname | 250 out.plot <- avgname |
252 pdf(out.plot, width=plot.width, height=plot.height, pointsize=font.size) | 251 pdf(out.plot, width=plot.width, height=plot.height, pointsize=font.size) |
253 plotmat(regcovMat, ctg.tbl$title, ctg.tbl$color, bam.pair, xticks, pts, | 252 plotmat(regcovMat, ctg.tbl$title, ctg.tbl$color, bam.pair, xticks, pts, |
254 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc) | 253 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc) |
255 out.dev <- dev.off() | 254 out.dev <- dev.off() |
256 | 255 |
257 #### Heatmap. #### | 256 #### Heatmap. #### |
258 # Setup output device. | 257 # Setup output device. |
259 hd <- SetupHeatmapDevice(reg.list, uniq.reg, ng.list, pts, font.size, | 258 hd <- SetupHeatmapDevice(reg.list, uniq.reg, ng.list, pts, font.size, |
260 unit.width, rr) | 259 unit.width, rr) |
261 reg.hei <- hd$reg.hei # list of image heights for unique regions. | 260 reg.hei <- hd$reg.hei # list of image heights for unique regions. |
262 hm.width <- hd$hm.width # image width. | 261 hm.width <- hd$hm.width # image width. |
263 hm.height <- hd$hm.height # image height. | 262 hm.height <- hd$hm.height # image height. |
264 lay.mat <- hd$lay.mat # matrix for heatmap layout. | 263 lay.mat <- hd$lay.mat # matrix for heatmap layout. |
268 pdf(out.hm, width=hm.width, height=hm.height, pointsize=font.size) | 267 pdf(out.hm, width=hm.width, height=hm.height, pointsize=font.size) |
269 par(mai=heatmap.mar) | 268 par(mai=heatmap.mar) |
270 layout(lay.mat, heights=reg.hei) | 269 layout(lay.mat, heights=reg.hei) |
271 | 270 |
272 # Do heatmap plotting. | 271 # Do heatmap plotting. |
273 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, | 272 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, |
274 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, | 273 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, |
275 do.plot=T, hm.color=hm.color, color.distr=color.distr, | 274 do.plot=T, hm.color=hm.color, color.distr=color.distr, |
276 color.scale=color.scale) | 275 color.scale=color.scale) |
277 out.dev <- dev.off() | 276 out.dev <- dev.off() |
278 cat("Done\n") | 277 cat("Done\n") |
279 } else { | 278 } else { |
280 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, | 279 go.list <- plotheat(reg.list, uniq.reg, enrichList, v.low.cutoff, go.algo, |
281 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, | 280 go.paras, ctg.tbl$title, bam.pair, xticks, flood.frac, |
282 do.plot=F, hm.color=hm.color, color.distr=color.distr, | 281 do.plot=F, hm.color=hm.color, color.distr=color.distr, |
283 color.scale=color.scale) | 282 color.scale=color.scale) |
284 } | 283 } |
285 | 284 |
286 # Save plotting data. | 285 # Save plotting data. |
287 oname1="data" | 286 oname1="data" |
300 | 299 |
301 # Heatmap density values. | 300 # Heatmap density values. |
302 for(i in 1:length(enrichList)) { | 301 for(i in 1:length(enrichList)) { |
303 reg <- ctg.tbl$glist[i] # gene list name. | 302 reg <- ctg.tbl$glist[i] # gene list name. |
304 out.heat <- file.path(oname1, paste('hm', i, '.txt', sep='')) | 303 out.heat <- file.path(oname1, paste('hm', i, '.txt', sep='')) |
305 write.table(cbind(coord.list[[reg]][, c('gid', 'gname', 'tid', 'strand')], | 304 write.table(cbind(coord.list[[reg]][, c('gid', 'gname', 'tid', 'strand')], |
306 enrichList[[i]]), | 305 enrichList[[i]]), |
307 file=out.heat, row.names=F, sep="\t", quote=F) | 306 file=out.heat, row.names=F, sep="\t", quote=F) |
308 } | 307 } |
309 | 308 |
310 # Avg. profile R data. | 309 # Avg. profile R data. |
311 prof.dat <- file.path(oname1, 'avgprof.RData') | 310 prof.dat <- file.path(oname1, 'avgprof.RData') |
312 save(plot.width, plot.height, regcovMat, ctg.tbl, bam.pair, xticks, pts, | 311 save(plot.width, plot.height, regcovMat, ctg.tbl, bam.pair, xticks, pts, |
313 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc, se, v.lib.size, | 312 m.pts, f.pts, pint, shade.alp, confiMat, mw, prof.misc, se, v.lib.size, |
314 font.size, | 313 font.size, |
315 file=prof.dat) | 314 file=prof.dat) |
316 | 315 |
317 # Heatmap R data. | 316 # Heatmap R data. |
318 heat.dat <- file.path(oname1, 'heatmap.RData') | 317 heat.dat <- file.path(oname1, 'heatmap.RData') |
319 save(reg.list, uniq.reg, ng.list, pts, enrichList, v.low.cutoff, go.algo, | 318 save(reg.list, uniq.reg, ng.list, pts, enrichList, v.low.cutoff, go.algo, |
320 ctg.tbl, bam.pair, xticks, flood.frac, hm.color, unit.width, rr, | 319 ctg.tbl, bam.pair, xticks, flood.frac, hm.color, unit.width, rr, |
321 go.list, color.scale, v.lib.size, font.size, go.paras, low.count, | 320 go.list, color.scale, v.lib.size, font.size, go.paras, low.count, |
322 color.distr, | 321 color.distr, |
323 file=heat.dat) | 322 file=heat.dat) |
324 cat("Done\n") | 323 cat("Done\n") |
325 | 324 |
326 # Wrap results up. | 325 # Wrap results up. |
327 cat("Wrapping results up...") | 326 cat("Wrapping results up...") |
328 cur.dir <- getwd() | 327 cur.dir <- getwd() |
328 cat("cur.dir") | |
329 cat(cur.dir) | |
329 out.dir <- dirname(oname1) | 330 out.dir <- dirname(oname1) |
331 cat("out.dir") | |
332 cat(out.dir) | |
330 out.zip <- basename(oname1) | 333 out.zip <- basename(oname1) |
331 setwd(out.dir) | 334 cat("out.zip") |
332 if(!zip(paste(out.zip, '.zip', sep=''), out.zip, extras='-q')) { | 335 cat(out.zip) |
333 if(unlink(oname, recursive=T)) { | 336 #setwd(out.dir) |
334 warning(sprintf("Unable to delete intermediate result folder: %s", | 337 #cat("Reaching this point without failure") |
335 oname)) | 338 #zip(paste(out.zip, '.zip', sep=''), out.zip, extras='-q') |
336 } | 339 zip("data.zip", "data", extras='-q') |
337 } | 340 #if(!zip(paste(out.zip, '.zip', sep=''), out.zip, extras='-q')) { |
338 setwd(cur.dir) | 341 # if(unlink(oname, recursive=T)) { |
342 # warning(sprintf("Unable to delete intermediate result folder: %s", | |
343 # oname)) | |
344 # } | |
345 #} | |
346 #setwd(cur.dir) | |
339 cat("Done\n") | 347 cat("Done\n") |
340 cat("All done. Cheers!\n") | 348 cat("All done. Cheers!\n") |
341 |