Mercurial > repos > iuc > dropletutils
diff scripts/dropletutils.Rscript @ 6:a79cf05ca77d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit ed0625fe59342d14a08745996e3e32c6f922a738"
| author | iuc | 
|---|---|
| date | Thu, 10 Dec 2020 13:49:40 +0000 | 
| parents | d5e4b59d5331 | 
| children | da5ecdcfc9ee | 
line wrap: on
 line diff
--- a/scripts/dropletutils.Rscript Wed Jan 29 20:07:15 2020 +0000 +++ b/scripts/dropletutils.Rscript Thu Dec 10 13:49:40 2020 +0000 @@ -1,6 +1,6 @@ ## Load in data -args = commandArgs(trailingOnly = T) -if (length(args) != 1){ +args <- commandArgs(trailingOnly = T) +if (length(args) != 1) { stop("Please provide the config file") } @@ -11,57 +11,60 @@ source(args[1]) ## Helper functions -setSparse <- function(obj){ +set_sparse <- function(obj) { return(as(obj, "dgCMatrix")) } -writeTSV <- function(fileout, obj){ - write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE) +write_tsv <- function(fileout, obj) { + write.table(as.matrix(obj), file = fileout, + col.names = NA, sep = "\t", quote = FALSE) } -determineGeneIDs <- function(object){ - if (!is.null(rowData(object)$Symbol)){ +determine_geneids <- function(object) { + if (!is.null(rowData(object)$Symbol)) { return(rowData(object)$Symbol) } return(rownames(object)) } -getCounts <- function(object){ - return(Matrix(counts(object), sparse=TRUE)) +get_counts <- function(object) { + return(Matrix(counts(object), sparse = TRUE)) } -writeOut <- function(object, fileout, typeout){ - if (typeout == "tsv"){ - writeTSV(fileout, getCounts(object)) +write_out <- function(object, fileout, typeout) { + if (typeout == "tsv") { + write_tsv(fileout, get_counts(object)) } - else if (typeout == "h5"){ - write10xCounts(fileout, getCounts(object), - type="HDF5", - gene.symbol=determineGeneIDs(object), - overwrite=TRUE) + else if (typeout == "h5") { + write10xCounts(fileout, get_counts(object), + type = "HDF5", + gene.symbol = determine_geneids(object), + overwrite = TRUE) } - else if (typeout == "directory"){ - write10xCounts(fileout, getCounts(object), - type="sparse", - gene.symbol=determineGeneIDs(object), - overwrite=TRUE) + else if (typeout == "directory") { + write10xCounts(fileout, get_counts(object), + type = "sparse", + gene.symbol = determine_geneids(object), + overwrite = TRUE) } } -read10xFiles <- function(filein, typein){ +read_10x_files <- function(filein, typein) { sce <- NULL - if (typein == "tsv"){ + if (typein == "tsv") { ## Exploding memory problems occured here ## - solution is to use the readSparseCounts function from scater - sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein))) - } - else if (typein == "h5"){ - sce <- read10xCounts(filein, col.names=T, type="HDF5") # use barcodes.tsv as column names + sce <- SingleCellExperiment(assays = + list(counts = readSparseCounts(filein))) } - else if (typein == "directory"){ - sce <- read10xCounts(filein, col.names=T, type="sparse") + else if (typein == "h5") { + # use barcodes.tsv as column names + sce <- read10xCounts(filein, col.names = T, type = "HDF5") } - counts(sce) <- setSparse(counts(sce)) + else if (typein == "directory") { + sce <- read10xCounts(filein, col.names = T, type = "sparse") + } + counts(sce) <- set_sparse(counts(sce)) return(sce) } @@ -69,97 +72,113 @@ ## Methods -doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5", fdr_threshold = 0.01){ - sce <- read10xFiles(files$infile, in.type) +do_empty_drops <- function(files, eparams, intype = "directory", + outtype = "h5", fdr_threshold = 0.01) { + sce <- read_10x_files(files$infile, intype) + + eparams$... <- NULL ## hack to remove other parameters from being + eparams$m <- Matrix(counts(sce), sparse = TRUE) + + ## Determine sensible lowerbound + m_stats <- summary(colSums(counts(sce))) + print("Cell Library Size Distribution:") + print(m_stats) - eparams$... <- NULL ## hack - eparams$m = Matrix(counts(sce), sparse=TRUE) + if (m_stats["Min."] > eparams$lower) { + print(paste0("CAUTION: Min. Lib. Size (", m_stats["Min."] + , ") < requested lowerbound (", eparams$lower, ")")) + message(paste0("Setting lowerbound to Mean: ", m_stats["Mean"])) + eparams$lower <- m_stats["Mean"] + } - e.out <- do.call(emptyDrops, c(eparams)) + e_out <- do.call(emptyDrops, c(eparams)) - bar.names <- colnames(sce) - if (length(bar.names) != nrow(e.out)){ + bar_names <- colnames(sce) + if (length(bar_names) != nrow(e_out)) { stop("Length of barcodes and output metrics don't match.") } - e.out <- cbind(bar.names, e.out) - e.out$is.Cell <- e.out$FDR <= fdr_threshold - e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited + e_out <- cbind(bar_names, e_out) + e_out$is_cell <- e_out$FDR <= fdr_threshold + e_out$is_cellandlimited <- e_out$is_cell & e_out$Limited ## Write to Plot - e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE - xlim.dat <- e.out[complete.cases(e.out),]$Total + e_out$is_cell[is.na(e_out$is_cell)] <- FALSE + xlim_dat <- e_out[complete.cases(e_out), ]$Total ## Write to table - writeTSV(files$table, e.out[complete.cases(e.out),]) + write_tsv(files$table, e_out[complete.cases(e_out), ]) png(files$plot) - plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), - xlab="Total UMI count", ylab="-Log Probability", - xlim=c(min(xlim.dat),max(xlim.dat))) + plot(e_out$Total, -e_out$LogProb, col = ifelse(e_out$is_cell, + "red", "black"), + xlab = "Total UMI count", ylab = "-Log Probability", + xlim = c(min(xlim_dat), max(xlim_dat))) dev.off() ## Filtered called <- NULL - if (fdr_threshold != 0){ - called <- e.out$is.CellAndLimited + if (fdr_threshold != 0) { + called <- e_out$is_cellandlimited } else { - called <- e.out$is.Cell + called <- e_out$is_cell } called[is.na(called)] <- FALSE # replace NA's with FALSE - sce.filtered <- sce[,called] + sce_filtered <- sce[, called] - writeOut(sce.filtered, files$out, out.type) + write_out(sce_filtered, files$out, outtype) - message(paste("Cells:", sum(na.omit(e.out$is.Cell)))) - message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited)))) + message(paste("Cells:", sum(na.omit(e_out$is_cell)))) + message(paste("Cells and Limited:", sum(na.omit(e_out$is_cellandlimited)))) } -doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5"){ - sce <- read10xFiles(files$infile, in.type) +do_default_drops <- function(files, dparams, + intype = "directory", outtype = "h5") { + sce <- read_10x_files(files$infile, intype) - dparams$m = counts(sce) + dparams$m <- counts(sce) called <- do.call(defaultDrops, c(dparams)) # Filtered - sce.filtered <- sce[,called] + sce_filtered <- sce[, called] - writeOut(sce.filtered, files$out, out.type) + write_out(sce_filtered, files$out, outtype) message(paste("Cells:", sum(called))) } - -doBarcodeRankings <- function(files, bparams, in.type="directory"){ - sce <- read10xFiles(files$infile, in.type) +do_barcode_rankings <- function(files, bparams, intype = "directory") { + sce <- read_10x_files(files$infile, intype) bparams$... <- NULL ## hack - bparams$m = counts(sce) + bparams$m <- counts(sce) - br.out <- do.call(barcodeRanks, c(bparams)) + brout <- do.call(barcodeRanks, c(bparams)) png(files$plot) - plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") - o <- order(br.out$rank) - lines(br.out$rank[o], br.out$fitted[o], col="red") + plot(brout$rank, brout$total, log = "xy", + xlab = "(log) Rank", ylab = "(log) Total Number of Barcodes") + o <- order(brout$rank) + lines(brout$rank[o], brout$fitted[o], col = "red") - abline(h=br.out$knee, col="dodgerblue", lty=2) - abline(h=br.out$inflection, col="forestgreen", lty=2) - legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) + abline(h = brout$knee, col = "dodgerblue", lty = 2) + abline(h = brout$inflection, col = "forestgreen", lty = 2) + legend("bottomleft", lty = 2, col = c("dodgerblue", "forestgreen"), + legend = c("knee", "inflection")) dev.off() - print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) + print(paste("knee =", brout$knee, ", inflection = ", brout$inflection)) } ## Main set.seed(seed.val) if (do.method == "barcodeRankings") { - doBarcodeRankings(files, bparams, in.type) + do_barcode_rankings(files, bparams, intype) } else if (do.method == "defaultDrops") { - doDefaultDrops(files, dparams, in.type, out.type) + do_default_drops(files, dparams, intype, outtype) } else if (do.method == "emptyDrops") { - doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold) + do_empty_drops(files, eparams, intype, outtype, empty_fdr_threshold) }
