Mercurial > repos > artbio > ngsplot
diff lib/coverage.r @ 0:3ca58369469c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/ngsplot commit b'e9fcc157a7f2f2fa9d6ac9a58d425ff17c975f5c\n'
author | artbio |
---|---|
date | Wed, 06 Dec 2017 19:01:53 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/coverage.r Wed Dec 06 19:01:53 2017 -0500 @@ -0,0 +1,701 @@ +# Function to check if the range exceeds coverage vector boundaries. +checkBound <- function(start, end, range, chrlen){ + if(end + range > chrlen || start - range < 1) + return(FALSE) # out of boundary. + else + return(TRUE) +} + +Bin <- function(v, n) { +# Function to bin a coverage vector. +# Args: +# v: coverage vector. +# n: number of bins. +# Return: vector of binned values. + + bin.bound <- seq(1, length(v), length.out=n + 1) + sapply(1:n, function(i) { + mean(v[bin.bound[i]:bin.bound[i + 1]]) + }) +} + +SplineRev3Sec <- function(cov.list, v.fls, pts.list, v.strand, algo="spline") { +# For 3 sections of continuous coverage, spline each according to specified +# data points and return concatenated, interpolated curves. +# Args: +# cov.list: a list of coverage vectors. Each vector represents a gene. +# v.fls: vector of flanking region size. +# pts.list: a list of three integers for data points. +# v.strand: factor vector of strands. +# algo: algorithm used to normalize coverage vectors (spline, bin). +# Return: matrix of interpolated coverage: each row represents a gene; each +# column represents a data point. Coverage from exons are concatenated. +# Notes: the names of cov.list and pts.list must be the same for index purpose. +# Suggested: use 'l', 'm', and 'r' for the names. + + # Create an empty coveage matrix first. + tot.pts <- sum(unlist(pts.list)) - 2 + cov.mat <- matrix(nrow=length(cov.list), ncol=tot.pts) + + for(i in 1:length(cov.list)) { + left.cov <- head(cov.list[[i]], n=v.fls[i]) + right.cov <- tail(cov.list[[i]], n=v.fls[i]) + mid.cov <- window(cov.list[[i]], start=v.fls[i] + 1, + width=length(cov.list[[i]]) - 2*v.fls[i]) + if(algo == "spline") { + left.cov <- spline(1:length(left.cov), left.cov, n=pts.list$l)$y + right.cov <- spline(1:length(right.cov), right.cov, n=pts.list$r)$y + mid.cov <- spline(1:length(mid.cov), mid.cov, n=pts.list$m)$y + } else if(algo == "bin") { + left.cov <- Bin(left.cov, n=pts.list$l) + right.cov <- Bin(right.cov, n=pts.list$r) + mid.cov <- Bin(mid.cov, n=pts.list$m) + } else { + # pass. + } + # Fuse the two points at the boundary and concatenate. + f1 <- (tail(left.cov, n=1) + head(mid.cov, n=1)) / 2 + f2 <- (tail(mid.cov, n=1) + head(right.cov, n=1)) / 2 + con.cov <- c(left.cov[-length(left.cov)], f1, + mid.cov[-c(1, length(mid.cov))], + f2, right.cov[-1]) + # browser() + if(v.strand[i] == '+') { + cov.mat[i, ] <- con.cov + } else { + cov.mat[i, ] <- rev(con.cov) + } + } + + cov.mat +} + +extrCov3Sec <- function(v.chrom, v.start, v.end, v.fls, v.strand, m.pts, f.pts, + bufsize, algo, ...) { +# Extract and interpolate coverage vectors from genomic regions with 3 sections. +# Args: +# v.chrom: factor vector of chromosome names. +# v.start: integer vector of region start. +# v.end: integer vector of region end. +# v.fls: integer vector of flanking region size in bps. +# v.strand: factor vector of gene strands. +# m.pts: data points for middle interval. +# f.pts: data points for flanking region. +# bufsize: integer; buffers are added to both ends of each region. +# algo: algorithm used to normalize coverage vectors. +# Return: matrix of interpolated coverage: each row represents a gene; each +# column represents a data point. + + interflank.gr <- GRanges(seqnames=v.chrom, ranges=IRanges( + start=v.start - v.fls - bufsize, + end=v.end + v.fls + bufsize)) + interflank.cov <- covBamExons(interflank.gr, v.strand, ...) + + # Trim buffers from coverage vectors. + interflank.cov <- lapply(interflank.cov, function(v) { + window(v, start=bufsize + 1, width=length(v) - 2 * bufsize) + }) + + # Interpolate and reverse coverage vectors. + SplineRev3Sec(interflank.cov, v.fls, list(l=f.pts, m=m.pts, r=f.pts), + v.strand, algo) +} + +sub3CovList <- function(all.cov, v.left, v.right) { +# For a list of coverage vectors, separate them into three lists. +# Args: +# all.cov: list of coverage vectors. +# v.left: integer vector of left flanking size. +# v.right: integer vector of right flanking size. +# Return: list of lists of coverage vectors. The outer list is named as: +# "l", "m", "r". + + left.cov <- foreach(i=icount(length(all.cov))) %do% { + head(all.cov[[i]], n=v.left[i]) + } + mid.cov <- foreach(i=icount(length(all.cov))) %do% { + len <- length(all.cov[[i]]) + all.cov[[i]][ (v.left[i] + 1) : (len - v.right[i]) ] + } + right.cov <- foreach(i=icount(length(all.cov))) %do% { + tail(all.cov[[i]], n=v.right[i]) + } + list(l=left.cov, m=mid.cov, r=right.cov) +} + +extrCovExons <- function(v.chrom, exonranges.list, v.fls, v.strand, + m.pts, f.pts, bufsize, algo, ...) { +# Extract coverage vectors for transcripts with exon models. +# Args: +# v.chrom: factor vector of chromosome names. +# exonranges.list: list of IRanges objects for exon coordinates. +# v.fls: integer vector of flanking region size in bps. +# v.strand: factor vector of gene strands. +# m.pts: data points for middle interval. +# f.pts: data points for flanking region. +# bufsize: integer; buffers are added to both ends of each region. +# algo: algorithm used to normalize coverage vectors. +# Return: matrix of interpolated coverage: each row represents a gene; each +# column represents a data point. Coverage from exons are concatenated. + + # Construct ranges including exon and flanking regions. + exonflank.list <- vector('list', length=length(exonranges.list)) + for(i in 1:length(exonranges.list)) { + r.mod <- exonranges.list[[i]] # IRanges object to be modified. + n <- length(r.mod) + # Add flanking and buffer regions. + start(r.mod)[1] <- start(r.mod)[1] - v.fls[i] - bufsize + end(r.mod)[n] <- end(r.mod)[n] + v.fls[i] + bufsize + exonflank.list[[i]] <- GRanges(seqnames=v.chrom[i], ranges=r.mod) + } + + exonflank.cov <- covBamExons(exonflank.list, v.strand, ...) + + # Trim buffers from coverage vectors. + exonflank.cov <- lapply(exonflank.cov, function(v) { + window(v, start=bufsize + 1, width=length(v) - 2 * bufsize) + }) + + SplineRev3Sec(exonflank.cov, v.fls, list(l=f.pts, m=m.pts, r=f.pts), + v.strand, algo) +} + + +extrCovMidp <- function(v.chrom, v.midp, flanksize, v.strand, pts, bufsize, + algo, ...) { +# Extract coverage vectors with a middle point and symmetric flanking regions. +# Args: +# v.chrom: factor vector of chromosome names. +# v.midp: integer vector of middle points. +# flanksize: integer of flanking region size in bps. +# v.strand: factor vector of gene strands. +# pts: data points to spline into. +# bufsize: integer; buffers are added to both ends of each region. +# algo: algorithm used to normalize coverage vectors. +# Return: matrix of interpolated coverage: each row represents a gene; each +# column represents a data point. + + granges <- GRanges(seqnames=v.chrom, ranges=IRanges( + start=v.midp - flanksize - bufsize, + end=v.midp + flanksize + bufsize)) + cov.list <- covBamExons(granges, v.strand, ...) + + # Trim buffers from coverage vectors. + cov.list <- lapply(cov.list, function(v) { + window(v, start=bufsize + 1, width=length(v) - 2 * bufsize) + }) + + # Interpolate and reverse coverage vectors and assemble into a matrix. + cov.mat <- matrix(nrow=length(cov.list), ncol=pts) + for(i in 1:length(cov.list)) { + if(is.null(cov.list[[i]])) { + cov.mat[i, ] <- vector('integer', length=pts) + } else { + if(algo == "spline") { + cov.mat[i, ] <- spline(1:length(cov.list[[i]]), cov.list[[i]], + n=pts)$y + } else if(algo == "bin") { + cov.mat[i, ] <- Bin(cov.list[[i]], n=pts) + } else { + # pass. + } + if(v.strand[i] == '-') { + cov.mat[i, ] <- rev(cov.mat[i, ]) + } + } + } + cov.mat +} + +scanBamRevOrder <- function(org.gr, sbp) { +# ScanBamParam re-arranges the input genomic ranges. Use range info to +# construct a string vector to find the order to reverse it. + org.grnames <- with(org.gr, paste(seqnames, start, end, sep=':')) + sbw.gr <- as.data.frame(bamWhich(sbp)) # scan-bam-ed + if('space' %in% names(sbw.gr)) { + sbw.grnames <- with(sbw.gr, paste(space, start, end, sep=':')) + } else if('group_name' %in% names(sbw.gr)) { + sbw.grnames <- with(sbw.gr, paste(group_name, start, end, sep=':')) + } else { + stop("Cannot locate chromosome names in extracted short reads. Report +this problem using issue tracking or discussion forum.\n") + } + + match(org.grnames, sbw.grnames) +} + +genZeroList <- function(llen, v.vlen) { +# Generate a list of specific length with each element being an Rle vector of +# zeros with specific length. +# Args: +# llen: list length +# v.vlen: vector of vector lengths. + + llen <- as.integer(llen) + stopifnot(llen > 0) + res <- vector('list', length=llen) + for(i in 1:llen) { + res[[i]] <- Rle(0, v.vlen[i]) + } + res +} + +covBamExons <- function(granges.dat, v.strand, bam.file, sn.inbam, fraglen, + map.qual=20, bowtie=F, + strand.spec=c('both', 'same', 'opposite')) { +# Extract coverage vectors from bam file for a list of transcripts of multiple +# exons. +# Args: +# granges.dat: a GRanges object representing a set of genomic ranges or a +# list of GRanges objects each representing a set of exonic +# ranges. +# v.strand: vector of strand info. +# bam.file: character string refers to the path of a bam file. +# sn.inbam: vector of chromosome names in the bam file. +# fraglen: fragment length. +# map.qual: mapping quality to filter reads. +# bowtie: boolean to indicate whether the aligner was Bowtie-like or not. +# strand.spec: string desc. for strand-specific coverage calculation. +# Return: list of coverage vectors, each vector represents a transcript. + + strand.spec <- match.arg(strand.spec) + + if(class(granges.dat) == 'list') { + # Construct a GRanges object representing DNA sequences. + v.seqnames <- sapply(granges.dat, + function(x) as.character(seqnames(x)[1])) + v.start <- sapply(granges.dat, function(x) start(ranges(x))[1]) + v.end <- sapply(granges.dat, function(x) tail(end(ranges(x)), n=1)) + granges.dna <- GRanges(seqnames=v.seqnames, + ranges=IRanges(start=v.start, end=v.end)) + # Obtain mRNA(including flanking) length for each gene. + repr.lens <- sapply(granges.dat, function(g) { + sum(end(g) - start(g) + 1) + }) + } else { + v.seqnames <- as.character(seqnames(granges.dat)) + v.start <- start(granges.dat) + v.end <- end(granges.dat) + granges.dna <- granges.dat + repr.lens <- v.end - v.start + 1 + granges.dat <- vector('list', length(granges.dna)) # set null tags. + } + + # Filter transcripts whose chromosomes do not match bam file. + inbam.mask <- as.character(seqnames(granges.dna)) %in% sn.inbam + if(!any(inbam.mask)) { # none matches. + return(genZeroList(length(granges.dna), repr.lens)) + } + + # scanBamWhat: the info that need to be extracted from a bam file. + sbw <- c('pos', 'qwidth', 'mapq', 'strand', 'rname', + 'mrnm', 'mpos', 'isize') + sbp <- ScanBamParam(what=sbw, which=granges.dna[inbam.mask], + flag=scanBamFlag(isUnmappedQuery=F, + isNotPassingQualityControls=F, + isDuplicate=F)) + + # Scan bam file to retrieve short reads. + sr.in.ranges <- tryCatch(scanBam(bam.file, param=sbp), error=function(e) e) + if(class(sr.in.ranges)[1] == 'simpleError') { + # This is not supposed to happen after those unmatched seqnames are + # removed. I keep it for safty. + return(genZeroList(length(granges.dna), repr.lens)) + } + + # Restore the original order. + sr.in.ranges <- sr.in.ranges[scanBamRevOrder( + as.data.frame(granges.dna[inbam.mask]), sbp)] + + CalcReadsCov <- function(srg, start, end, gr.rna, repr.len, strand) { + # Calculate short read coverage for each gene/region. + # Args: + # srg: extracted short reads in gene. + # start: start position of the DNA sequence. + # end: end position of the DNA sequence. + # gr.rna: GRanges object (multiple ranges) representing exon sequences. + # This can be NULL indicating the input ranges are DNAs. + # repr.len: DNA or mRNA sequence length. + # strand: transcript strand (+/-). + # Returns: a coverage vector for the gene. + + # browser() + # Special handling for bowtie mapping. + if(bowtie) { + srg <- within(srg, mapq[is.na(mapq)] <- 254) # within! + } + # Filter short reads by mapping quality. + all.mask <- srg$mapq >= map.qual + + # Subset by strand info. + if(strand.spec != 'both') { + if(strand.spec == 'same') { + s.mask <- srg$strand == as.character(strand) + } else { + s.mask <- srg$strand != as.character(strand) + } + all.mask <- all.mask & s.mask + } + + # If paired, filter reads that are not properly paired. + paired <- all(with(srg, is.na(isize) | isize != 0)) + if(paired) { + p.mask <- with(srg, rname == mrnm & xor(strand == '+', isize < 0)) + all.mask <- all.mask & p.mask + } + + # Apply all the filters on short reads. + srg <- lapply(srg, `[`, which(all.mask)) + + # Calculate coverage. + if(length(srg[[1]]) > 0) { + if(paired) { + cov.pos <- with(srg, ifelse(isize < 0, mpos, pos)) + cov.wd <- abs(srg$isize) + } else { + # Adjust negative read positions for physical coverage. + cov.pos <- with(srg, ifelse(strand == '-', + pos - fraglen + qwidth, pos)) + cov.wd <- fraglen + } + # Shift reads by subtracting start positions. + cov.pos <- cov.pos - start + 1 + # Calculate physical coverage on the whole genebody. + covg <- coverage(IRanges(start=cov.pos, width=cov.wd), + width=end - start + 1, method='sort') + + if(!is.null(gr.rna)) { # RNA-seq. + # Shift exonic ranges by subtracting start positions. + # BE careful with negative start positions! Need to adjust end + # positions first(or the GRanges lib will emit errors if + # start > end). + # Negative start positions happen when flanking region exceeds + # the chromosomal start. + if(start > 0) { + start(gr.rna) <- start(gr.rna) - start + 1 + end(gr.rna) <- end(gr.rna) - start + 1 + } else { + end(gr.rna) <- end(gr.rna) - start + 1 + start(gr.rna) <- start(gr.rna) - start + 1 + } + # Concatenate all exon coverages. + covg[ranges(gr.rna)] + } else { # ChIP-seq. + covg + } + } else { + Rle(0, repr.len) + } + } + + covg.allgenes <- mapply(CalcReadsCov, srg=sr.in.ranges, + start=v.start, end=v.end, gr.rna=granges.dat, + repr.len=repr.lens, strand=v.strand, SIMPLIFY=F) +} + +bamFileList <- function(ctg.tbl) { +# Determine the bam files involved in the configuration and whether it is a +# bam file pair setup. +# Args: +# ctg.tbl: coverage-genelist-title table. + + cov.uniq <- unique(ctg.tbl$cov) + cov.list <- strsplit(cov.uniq, ':') + v.nbam <- sapply(cov.list, length) + v.bbp <- v.nbam == 2 + if(all(v.bbp)) { + bbp <- T + } else if(all(!v.bbp)) { + bbp <- F + } else { + stop("No mix of bam and bam-pair allowed in configuration.\n") + } + + list(bbp=bbp, bam.list=unique(unlist(cov.list))) +} + +estiMapqStyle <- function(bam.file){ +# Estimate the mapping quality style. Return TRUE if it is SAM standard. +# Sample 1000 mapped reads from bam file, and if the mapq of reads +# over half are NA, then return FALSE, because it is quite possible that +# the aligner using coding style as bowtie, 255 as highest score. +# Args: +# bam.file: bam file to be sampled. + + sbw <- c('pos', 'qwidth', 'mapq', 'strand') + sbp <- ScanBamParam(what=sbw, flag=scanBamFlag( + isUnmappedQuery=F, isDuplicate=F)) + samp <- BamSampler(bam.file, yieldSize=500) + samp.reads <- scanBam(samp, param=sbp)[[1]] + samp.len <- length(samp.reads[["mapq"]]) + mapq.255 <- sum(is.na(samp.reads[["mapq"]])) + if(mapq.255/samp.len >= 0.5){ + return(FALSE) + }else{ + return(TRUE) + } +} + +headerIndexBam <- function(bam.list) { +# Read bam header to determine mapping method. +# Index bam files if not done yet. +# Args: +# ctg.tbl: coverage-genelist-title table. + + v.map.bowtie <- vector('logical', length=length(bam.list)) + for(i in 1:length(bam.list)) { + bam.file <- bam.list[i] + + # Index bam file. + if(!file.exists(paste(bam.file, ".bai", sep=""))) { + indexBam(bam.file) + } + + # Derive mapping program. + header <- scanBamHeader(bam.file) + map.prog <- try(strsplit(header[[1]]$text$'@PG'[[1]], ':')[[1]][2], + silent=T) + if(class(map.prog) != "try-error") { + map.style <- grepl('tophat|bowtie|bedtools|star', map.prog, + ignore.case=T) + if(map.style){ + v.map.bowtie[i] <- TRUE + next + } + map.style <- grepl('bwa|casava|gem', map.prog, ignore.case=T) + if(map.style) { + v.map.bowtie[i] <- FALSE + next + } +# if(estiMapqStyle(bam.file)){ +# warning(sprintf("Aligner for: %s cannot be determined. Style of +# standard SAM mapping score will be used.", bam.file)) +# v.map.bowtie[i] <- FALSE +# }else{ +# warning(sprintf("Aligner for: %s cannot be determined. Style of +# Bowtie-like SAM mapping score will be used. Would you mind to tell us what +# aligner you are using?", bam.file)) +# v.map.bowtie[i] <- TRUE +# } + } +# else { +# cat("\n") +# if(estiMapqStyle(bam.file)){ +# warning(sprintf("Aligner for: %s cannot be determined. Style of +# standard SAM mapping score will be used.", bam.file)) +# v.map.bowtie[i] <- FALSE +# }else{ +# warning(sprintf("Aligner for: %s cannot be determined. Style of +# Bowtie-like SAM mapping score will be used.", bam.file)) +# v.map.bowtie[i] <- TRUE +# } +# } + warning(sprintf("Aligner for: %s cannot be determined. Style of +standard SAM mapping score will be used. Would you mind submitting an issue +report to us on Github? This will benefit people using the same aligner.", + bam.file)) + v.map.bowtie[i] <- FALSE + } + names(v.map.bowtie) <- bam.list + + v.map.bowtie +} + +libSizeBam <- function(bam.list) { +# Obtain library sizes by counting qualified bam records. +# Args: +# ctg.tbl: coverage-genelist-title table. + + # Count only reads that are mapped, primary, passed quality control and + # un-duplicated. + sbp <- ScanBamParam(flag=scanBamFlag(isUnmappedQuery=F, isSecondaryAlignment=F, + isNotPassingQualityControls=F, isDuplicate=F)) + v.lib.size <- vector('integer', length=length(bam.list)) + for(i in 1:length(bam.list)) { + bfn <- bam.list[i] # bam file name. + cfn <- paste(basename(bfn), '.cnt', sep='') # count file name. + if(file.exists(cfn)) { + v.lib.size[i] <- as.integer(readLines(cfn, n=1)) + } else { + cnt.bam <- countBam(bfn, param=sbp) + v.lib.size[i] <- cnt.bam$records + writeLines(as.character(v.lib.size[i]), cfn) + } + names(v.lib.size)[i] <- bfn + } + + v.lib.size +} + +seqnamesBam <- function(bam.list) { +# Obtain chromosome names for each bam file. This list must be used to filter +# genomic regions before scanBam or it terminates immaturely. +# ctg.tbl: coverage-genelist-title table. + + # browser() + sn.list <- lapply(scanBamHeader(bam.list), function(h) { + names(h$targets) + }) + names(sn.list) <- bam.list + + sn.list +} + +chrTag <- function(sn.inbam) { +# Check whether the chromosome name format in the bam file contains 'chr' or not. +# Args: +# sn.inbam: seqnames in the bam file. + + n.chr <- length(grep('^chr', sn.inbam)) + if(n.chr == 0) { + chr.tag <- F + } else if(n.chr == length(sn.inbam)) { + chr.tag <- T + } else { + return("Inconsistent chromosome names in bam file. Check bam header.") + } + + chr.tag +} + +chunkIndex <- function(tot.gene, gcs) { +# Create chunk indices according to total number of genes and chunk size. +# Args: +# tot.gene: total number of genes. +# gcs: gene chunk size. + + nchk <- ceiling(tot.gene / gcs) # number of chunks. + chkidx.list <- vector('list', length=nchk) # chunk indices list. + chk.start <- 1 # chunk start. + i.chk <- idiv(tot.gene, chunkSize=gcs) + for(i in 1:nchk) { + chk.size <- nextElem(i.chk) + chkidx.list[[i]] <- c(chk.start, chk.start + chk.size - 1) + chk.start <- chk.start + chk.size + } + + chkidx.list +} + +covMatrix <- function(debug, chkidx.list, coord, rnaseq.gb, exonmodel, libsize, + spit.dot=T, ...) { +# Function to generate a coverage matrix for all genes. +# Args: +# debug: boolean tag for debugging. +# chkidx.list: list of (start, end) indices for each chunk. +# coord: dataframe of gene coordinates. +# rnaseq.gb: boolean for RNA-seq genebody plot. +# exonmodel: exon model data object. +# libsize: total read count for this bam file. +# spit.dot: boolean to control sptting '.' to consoles. +# Return: normalized coverage matrix for all genes, each row represents a gene. + + + if(!debug) { + # Extract coverage and combine into a matrix. + result.matrix <- foreach(chk=chkidx.list, .combine='rbind', + .multicombine=T) %dopar% { + if(spit.dot) { + cat(".") + } + i <- chk[1]:chk[2] # chunk: start -> end + # If RNA-seq, retrieve exon ranges. + if(rnaseq.gb) { + exonranges.list <- unlist(exonmodel[coord[i, ]$tid]) + } else { + exonranges.list <- NULL + } + doCov(coord[i, ], exonranges.list, ...) + } + + # Floor negative values which are caused by spline. + result.matrix[result.matrix < 0] <- 0 + result.matrix / libsize * 1e6 # normalize to RPM. + + } else { + for(c in 1:length(chkidx.list)) { + chk <- chkidx.list[[c]] + i <- chk[1]:chk[2] # chunk: start -> end + cat(".") + # If RNA-seq, retrieve exon ranges. + if(rnaseq.gb) { + exonranges.list <- unlist(exonmodel[coord[i, ]$tid]) + } else { + exonranges.list <- NULL + } + cov <- doCov(coord[i, ], exonranges.list, ...) + if(c == 1) { + result.matrix <- matrix(0, nrow=nrow(coord), ncol=ncol(cov)) + } + result.matrix[i, ] <- cov + } + # Floor negative values which are caused by spline. + # browser() + result.matrix[result.matrix < 0] <- 0 + result.matrix / libsize * 1e6 # normalize to RPM. + + } +} + + +doCov <- function(coord.mat, exonranges.list, chr.tag, pint, reg2plot, + flanksize, flankfactor, m.pts, f.pts, ...) { +# Extract coverage from bam file into a matrix. According to the parameter +# values, call corresponding functions. +# Args: +# coord.mat: matrix of genomic coordinates to extract coverage. +# exonranges.list: list of IRanges objects, each represents a group of exons. +# pint: boolean of point interval. +# reg2plot: string of region to plot. +# flanksize: flanking region size. +# flankfactor: flanking region factor. +# m.pts: data points for middle interval. +# f.pts: data points for flanking region. +# Return: matrix of interpolated coverage: each row represents a gene; each +# column represents a data point. Coverage from exons are concatenated. + + v.chrom <- coord.mat$chrom + # if(!chr.tag) { + # v.chrom <- sub('chr', '', v.chrom) + # } + v.chrom <- as.factor(v.chrom) + v.strand <- as.factor(coord.mat$strand) + + # Figure out interval region sizes and calculate flanking region sizes. + if(!pint) { # interval regions. + if(!is.null(exonranges.list)) { # RNA-seq + if(flankfactor > 0) { + v.fls <- sapply(exonranges.list, function(t) { + sum(end(t) - start(t) + 1) * flankfactor + }) + } else { + v.fls <- rep(flanksize, length=nrow(coord.mat)) + } + extrCovExons(v.chrom, exonranges.list, v.fls, v.strand, + m.pts, f.pts, ...) + } else { # ChIP-seq with intervals. + v.start <- coord.mat$start + v.end <- coord.mat$end + if(flankfactor > 0) { + v.fls <- round((v.end - v.start + 1) * flankfactor) + } else { + v.fls <- rep(flanksize, length=nrow(coord.mat)) + } + extrCov3Sec(v.chrom, v.start, v.end, v.fls, v.strand, + m.pts, f.pts, ...) + } + } else { # point center with flanking regions. + v.midp <- vector('integer', length=nrow(coord.mat)) + for(r in 1:nrow(coord.mat)) { + if(reg2plot == 'tss' && v.strand[r] == '+' || + reg2plot == 'tes' && v.strand[r] == '-') { + # the left site is center. + v.midp[r] <- coord.mat$start[r] + } else { # this also includes BED supplied point center. + v.midp[r] <- coord.mat$end[r] + } + } + # browser() + extrCovMidp(v.chrom, v.midp, flanksize, v.strand, m.pts + f.pts*2, ...) + } +}