edger: edger.R comparison

comparison edger.R @ 14:c5fa04118f83 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 4b17306763bc8eb92c8967c7b4b57abcc514e670

author	iuc
date	Wed, 04 Sep 2024 15:49:53 +0000
parents	838b481dc6f9
children

comparison

equal deleted inserted replaced

-:838b481dc6f9
+:c5fa04118f83
 #       outPath", "o", 1, "character"       -Path to folder to write all output to
 #       filesPath", "j", 2, "character"     -JSON list object if multiple files input
 #       matrixPath", "m", 2, "character"    -Path to count matrix
 #       factFile", "f", 2, "character"      -Path to factor information file
 #       factInput", "i", 2, "character"     -String containing factors if manually input
+#       formula", "F", 2, "character".      -String containing a formula to override default use of factInput
 #       annoPath", "a", 2, "character"      -Path to input containing gene annotations
 #       contrastData", "C", 1, "character"  -String containing contrasts of interest
 #       cpmReq", "c", 2, "double"           -Float specifying cpm requirement
 #       cntReq", "z", 2, "integer"          -Integer specifying minimum total count requirement
 #       sampleReq", "s", 2, "integer"       -Integer specifying cpm requirement
 # Record starting time
 time_start <- as.character(Sys.time())
 # setup R error handling to go to stderr
 options(show.error.messages = FALSE, error = function() {
 cat(geterrmessage(), file = stderr())
 q("no", 1, FALSE)
 })
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 ### Function Delcaration
 ################################################################################
 # Function to sanitise contrast equations so there are no whitespaces
 # surrounding the arithmetic operators, leading or trailing whitespace
 sanitise_equation <- function(equation) {
 equation <- gsub(" *[+] *", "+", equation)
 equation <- gsub(" *[-] *", "-", equation)
 equation <- gsub(" *[/] *", "/", equation)
 equation <- gsub(" *[*] *", "*", equation)
 equation <- gsub("^\\s+|\\s+$", "", equation)
 return(equation)
 }
 # Function to sanitise group information
 sanitise_groups <- function(string) {
 string <- gsub(" *[,] *", ",", string)
 string <- gsub("^\\s+|\\s+$", "", string)
 return(string)
 }
 # Function to change periods to whitespace in a string
 unmake_names <- function(string) {
 string <- gsub(".", " ", string, fixed = TRUE)
 return(string)
+}
+# Sanitise file base names coming from factors or contrasts
+sanitise_basename <- function(string) {
+string <- gsub("[/^]", "_", string)
+return(string)
 }
 # Generate output folder and paths
 make_out <- function(filename) {
 return(paste0(out_path, "/", filename))
 }
 # Generating design information
 paste_listname <- function(string) {
 return(paste0("factors$", string))
 }
 # Create cata function: default path set, default seperator empty and appending
 # true by default (Ripped straight from the cat function with altered argument
 # defaults)
 cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
 append = TRUE) {
 if (is.character(file)) {
 if (file == "") {
 file <- stdout()
 } else if (substring(file, 1L, 1L) == "|") {
 file <- pipe(substring(file, 2L), "w")
 on.exit(close(file))
 } else {
 file <- file(file, ifelse(append, "a", "w"))
 on.exit(close(file))
 }
 }
 .Internal(cat(list(...), file, sep, fill, labels, append))
 }
 # Function to write code for html head and title
 html_head <- function(title) {
 cata("<head>\n")
 cata("<title>", title, "</title>\n")
 cata("</head>\n")
 }
 # Function to write code for html links
 html_link <- function(address, label = address) {
 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
 }
 # Function to write code for html images
 html_image <- function(source, label = source, height = 600, width = 600) {
 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
 cata("\" width=\"", width, "\"/>\n")
 }
 # Function to write code for html list items
 list_item <- function(...) {
 cata("<li>", ..., "</li>\n")
 }
 table_item <- function(...) {
 cata("<td>", ..., "</td>\n")
 }
 table_head_item <- function(...) {
 cata("<th>", ..., "</th>\n")
 }
 ################################################################################
 ### Input Processing
 ################################################################################
 # Collect arguments from command line
 args <- commandArgs(trailingOnly = TRUE)
 # Get options, using the spec as defined by the enclosed list.
 # Read the options from the default: commandArgs(TRUE).
-spec <- matrix(c(
+spec <- matrix(
-"htmlPath", "R", 1, "character",
+c(
-"outPath", "o", 1, "character",
+"htmlPath", "R", 1, "character",
-"filesPath", "j", 2, "character",
+"outPath", "o", 1, "character",
-"matrixPath", "m", 2, "character",
+"filesPath", "j", 2, "character",
-"factFile", "f", 2, "character",
+"matrixPath", "m", 2, "character",
-"factInput", "i", 2, "character",
+"factFile", "f", 2, "character",
-"annoPath", "a", 2, "character",
+"formula", "F", 2, "character",
-"contrastData", "C", 1, "character",
+"factInput", "i", 2, "character",
-"cpmReq", "c", 1, "double",
+"annoPath", "a", 2, "character",
-"totReq", "y", 0, "logical",
+"contrastData", "C", 1, "character",
-"cntReq", "z", 1, "integer",
+"cpmReq", "c", 1, "double",
-"sampleReq", "s", 1, "integer",
+"totReq", "y", 0, "logical",
-"normCounts", "x", 0, "logical",
+"cntReq", "z", 1, "integer",
-"rdaOpt", "r", 0, "logical",
+"sampleReq", "s", 1, "integer",
-"lfcReq", "l", 1, "double",
+"normCounts", "x", 0, "logical",
-"pValReq", "p", 1, "double",
+"rdaOpt", "r", 0, "logical",
-"pAdjOpt", "d", 1, "character",
+"lfcReq", "l", 1, "double",
-"normOpt", "n", 1, "character",
+"pValReq", "p", 1, "double",
-"robOpt", "b", 0, "logical",
+"pAdjOpt", "d", 1, "character",
-"lrtOpt", "t", 0, "logical"
+"normOpt", "n", 1, "character",
-),
+"robOpt", "b", 0, "logical",
-byrow = TRUE, ncol = 4
+"lrtOpt", "t", 0, "logical"
+),
+byrow = TRUE, ncol = 4
 )
 opt <- getopt(spec)
 if (is.null(opt$matrixPath) && is.null(opt$filesPath)) {
 cat("A counts matrix (or a set of counts files) is required.\n")
 q(status = 1)
 }
 if (is.null(opt$cpmReq)) {
 filt_cpm <- FALSE
 } else {
 filt_cpm <- TRUE
 }
 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
 filt_smpcount <- FALSE
 } else {
 filt_smpcount <- TRUE
 }
 if (is.null(opt$totReq)) {
 filt_totcount <- FALSE
 } else {
 filt_totcount <- TRUE
 }
 if (is.null(opt$lrtOpt)) {
 want_lrt <- FALSE
 } else {
 want_lrt <- TRUE
 }
 if (is.null(opt$rdaOpt)) {
 want_rda <- FALSE
 } else {
 want_rda <- TRUE
 }
 if (is.null(opt$annoPath)) {
 have_anno <- FALSE
 } else {
 have_anno <- TRUE
 }
 if (is.null(opt$normCounts)) {
 want_norm <- FALSE
 } else {
 want_norm <- TRUE
 }
 if (is.null(opt$robOpt)) {
 want_robust <- FALSE
 } else {
 want_robust <- TRUE
 }
 if (!is.null(opt$filesPath)) {
 # Process the separate count files (adapted from DESeq2 wrapper)
 library("rjson")
 parser <- newJSONParser()
 parser$addData(opt$filesPath)
 factor_list <- parser$getObject()
 factors <- sapply(factor_list, function(x) x[[1]])
 filenames_in <- unname(unlist(factor_list[[1]][[2]]))
 sampletable <- data.frame(
 sample = basename(filenames_in),
 filename = filenames_in,
 row.names = filenames_in,
 stringsAsFactors = FALSE
 )
 for (factor in factor_list) {
-factorname  <- factor[[1]]
+factorname <- factor[[1]]
 sampletable[[factorname]] <- character(nrow(sampletable))
 lvls <- sapply(factor[[2]], function(x) names(x))
 for (i in seq_along(factor[[2]])) {
 files <- factor[[2]][[i]][[1]]
 sampletable[files, factorname] <- lvls[i]
 }
 sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls)
 }
 rownames(sampletable) <- sampletable$sample
 rem <- c("sample", "filename")
 factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]
 # read in count files and create single table
 countfiles <- lapply(sampletable$filename, function(x) {
 read.delim(x, row.names = 1)
 })
 counts <- do.call("cbind", countfiles)
 } else {
 # Process the single count matrix
 counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE)
 row.names(counts) <- counts[, 1]
 counts <- counts[, -1]
 countsrows <- nrow(counts)
 # Process factors
 if (is.null(opt$factInput)) {
 factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE)
 # check samples names match
 if (!any(factordata[, 1] %in% colnames(counts))) {
 stop("Sample IDs in factors file and count matrix don't match")
 }
 # order samples as in counts matrix
 factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
 factors <- data.frame(sapply(factordata[, -1, drop = FALSE], make.names))
 } else {
 factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
 factordata <- list()
 for (fact in factors) {
 newfact <- unlist(strsplit(fact, split = "::"))
 factordata <- rbind(factordata, newfact)
 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
 # Set the row names to be the name of the factor and delete first row
 row.names(factordata) <- factordata[, 1]
 factordata <- factordata[, -1]
 factordata <- sapply(factordata, sanitise_groups)
 factordata <- sapply(factordata, strsplit, split = ",")
 factordata <- sapply(factordata, make.names)
 # Transform factor data into data frame of R factor objects
 factors <- data.frame(factordata)
 }
 }
 # if annotation file provided
 if (have_anno) {
 geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
 }
 # Create output directory
 out_path <- opt$outPath
 dir.create(out_path, showWarnings = FALSE)
-# Split up contrasts separated by comma into a vector then sanitise
+# Check if contrastData is a file or not
-contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
+if (file.exists(opt$contrastData)) {
+contrast_data <- unlist(read.table(opt$contrastData, sep = "\t", header = TRUE)[[1]])
+} else {
+# Split up contrasts separated by comma into a vector then sanitise
+contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
+}
 contrast_data <- sanitise_equation(contrast_data)
 contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE)
 bcv_pdf <- make_out("bcvplot.pdf")
 bcv_png <- make_out("bcvplot.png")
 ql_pdf <- make_out("qlplot.pdf")
 ql_png <- make_out("qlplot.png")
 mds_pdf <- character() # Initialise character vector
 mds_png <- character()
 for (i in seq_len(ncol(factors))) {
-mds_pdf[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".pdf"))
+mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf"))
-mds_png[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".png"))
+mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png"))
 }
 md_pdf <- character()
 md_png <- character()
 top_out <- character()
 for (i in seq_along(contrast_data)) {
-md_pdf[i] <- make_out(paste0("mdplot_", contrast_data[i], ".pdf"))
+md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf"))
-md_png[i] <- make_out(paste0("mdplot_", contrast_data[i], ".png"))
+md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png"))
-top_out[i] <- make_out(paste0("edgeR_", contrast_data[i], ".tsv"))
+top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv"))
 } # Save output paths for each contrast as vectors
 norm_out <- make_out("edgeR_normcounts.tsv")
 rda_out <- make_out("edgeR_analysis.RData")
 session_out <- make_out("session_info.txt")
 # Extract counts and annotation data
 data <- list()
 data$counts <- counts
 if (have_anno) {
 # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
 annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
 data$genes <- annoord
 } else {
 data$genes <- data.frame(GeneID = row.names(counts))
 }
 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
 # samples. Default is no filtering
 prefilter_count <- nrow(data$counts)
 if (filt_cpm || filt_smpcount || filt_totcount) {
 if (filt_totcount) {
 keep <- rowSums(data$counts) >= opt$cntReq
 } else if (filt_smpcount) {
 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
 } else if (filt_cpm) {
 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
 }
 data$counts <- data$counts[keep, ]
 data$genes <- data$genes[keep, , drop = FALSE]
 }
 postfilter_count <- nrow(data$counts)
 filtered_count <- prefilter_count - postfilter_count
 colnames(data) <- samplenames
 data$samples <- factors
 data$genes <- genes
+if (!is.null(opt$formula)) {
-formula <- "~0"
+formula <- opt$formula
-for (i in seq_along(factor_list)) {
+# sanitisation can be getting rid of the "~"
-formula <- paste(formula, factor_list[i], sep = "+")
+if (!startsWith(formula, "~")) {
+formula <- paste0("~", formula)
+}
+} else {
+formula <- "~0"
+for (i in seq_along(factor_list)) {
+formula <- paste(formula, factor_list[i], sep = "+")
+}
 }
 formula <- formula(formula)
 design <- model.matrix(formula, factors)
 for (i in seq_along(factor_list)) {
 colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE)
 }
 # Calculating normalising factor, estimating dispersion
 data <- calcNormFactors(data, method = opt$normOpt)
 if (want_robust) {
 data <- estimateDisp(data, design = design, robust = TRUE)
 } else {
 data <- estimateDisp(data, design = design)
 }
 # Generate contrasts information
 contrasts <- makeContrasts(contrasts = contrast_data, levels = design)
 labels <- names(counts)
 # MDS plot
 png(mds_png, width = 600, height = 600)
 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
-img_name <- paste0("MDS Plot_", names(factors)[1], ".png")
+img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".png")
-img_addr <- paste0("mdsplot_", names(factors)[1], ".png")
+img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".png")
 image_data[1, ] <- c(img_name, img_addr)
 invisible(dev.off())
 pdf(mds_pdf)
 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
-link_name <- paste0("MDS Plot_", names(factors)[1], ".pdf")
+link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".pdf")
-link_addr <- paste0("mdsplot_", names(factors)[1], ".pdf")
+link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".pdf")
 link_data[1, ] <- c(link_name, link_addr)
 invisible(dev.off())
 # If additional factors create additional MDS plots coloured by factor
 if (ncol(factors) > 1) {
 for (i in 2:ncol(factors)) {
 png(mds_png[i], width = 600, height = 600)
 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
-img_name <- paste0("MDS Plot_", names(factors)[i], ".png")
+img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png")
-img_addr <- paste0("mdsplot_", names(factors)[i], ".png")
+img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")
 image_data <- rbind(image_data, c(img_name, img_addr))
 invisible(dev.off())
 pdf(mds_pdf[i])
 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
-link_name <- paste0("MDS Plot_", names(factors)[i], ".pdf")
+link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf")
-link_addr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")
 link_data <- rbind(link_data, c(link_name, link_addr))
 invisible(dev.off())
 }
 }
 # BCV Plot
 png(bcv_png, width = 600, height = 600)
 plotBCV(data, main = "BCV Plot")
 link_data <- rbind(link_data, c(link_name, link_addr))
 invisible(dev.off())
 # Generate fit
 if (want_lrt) {
 fit <- glmFit(data, design)
 } else {
 if (want_robust) {
 fit <- glmQLFit(data, design, robust = TRUE)
 } else {
 fit <- glmQLFit(data, design)
 }
 # Plot QL dispersions
 png(ql_png, width = 600, height = 600)
 plotQLDisp(fit, main = "QL Plot")
 img_name <- "QL Plot"
 img_addr <- "qlplot.png"
 image_data <- rbind(image_data, c(img_name, img_addr))
 invisible(dev.off())
 pdf(ql_pdf)
 plotQLDisp(fit, main = "QL Plot")
 link_name <- "QL Plot.pdf"
 link_addr <- "qlplot.pdf"
 link_data <- rbind(link_data, c(link_name, link_addr))
 invisible(dev.off())
 }
 # Save normalised counts (log2cpm)
 if (want_norm) {
 normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE)
 normalised_counts <- data.frame(data$genes, normalised_counts)
 write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
 link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
 }
 for (i in seq_along(contrast_data)) {
 if (want_lrt) {
 res <- glmLRT(fit, contrast = contrasts[, i])
 } else {
 res <- glmQLFTest(fit, contrast = contrasts[, i])
 }
 status <- decideTestsDGE(res,
 adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
 lfc = opt$lfcReq
 )
 sum_status <- summary(status)
 # Collect counts for differential expression
 up_count[i] <- sum_status["Up", ]
 down_count[i] <- sum_status["Down", ]
 flat_count[i] <- sum_status["NotSig", ]
 # Write top expressions table
 top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
 write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
-link_name <- paste0("edgeR_", contrast_data[i], ".tsv")
+link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
-link_addr <- paste0("edgeR_", contrast_data[i], ".tsv")
+link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
 link_data <- rbind(link_data, c(link_name, link_addr))
 # Plot MD (log ratios vs mean difference) using limma package
 pdf(md_pdf[i])
 limma::plotMD(res,
 status = status,
 main = paste("MD Plot:", unmake_names(contrast_data[i])),
 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
 xlab = "Average Expression", ylab = "logFC"
 )
 abline(h = 0, col = "grey", lty = 2)
-link_name <- paste0("MD Plot_", contrast_data[i], ".pdf")
+link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf")
-link_addr <- paste0("mdplot_", contrast_data[i], ".pdf")
+link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")
 link_data <- rbind(link_data, c(link_name, link_addr))
 invisible(dev.off())
 png(md_png[i], height = 600, width = 600)
 limma::plotMD(res,
 status = status,
 main = paste("MD Plot:", unmake_names(contrast_data[i])),
 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
 xlab = "Average Expression", ylab = "logFC"
 )
 abline(h = 0, col = "grey", lty = 2)
-img_name <- paste0("MD Plot_", contrast_data[i], ".png")
+img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png")
-img_addr <- paste0("mdplot_", contrast_data[i], ".png")
+img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")
 image_data <- rbind(image_data, c(img_name, img_addr))
 invisible(dev.off())
 }
 sig_diff <- data.frame(Up = up_count, Flat = flat_count, Down = down_count)
 row.names(sig_diff) <- contrast_data
 # Save relevant items as rda object
 if (want_rda) {
 if (want_norm) {
 save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design,
 file = rda_out, ascii = TRUE
 )
 } else {
 save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
 file = rda_out, ascii = TRUE
 )
 }
 link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
 }
 # Record session info
 writeLines(capture.output(sessionInfo()), session_out)
 link_data <- rbind(link_data, c("Session Info", "session_info.txt"))
 cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n")
 html_image(image_data$Link[1], image_data$Label[1])
 for (i in 2:nrow(image_data)) {
 html_image(image_data$Link[i], image_data$Label[i])
 }
 cata("<h4>Differential Expression Counts:</h4>\n")
 cata("<table border=\"1\" cellpadding=\"4\">\n")
 cata("<tr>\n")
 table_item()
 for (i in colnames(sig_diff)) {
 table_head_item(i)
 }
 cata("</tr>\n")
 for (i in seq_len(nrow(sig_diff))) {
 cata("<tr>\n")
 table_head_item(unmake_names(row.names(sig_diff)[i]))
 for (j in seq_len(ncol(sig_diff))) {
 table_item(as.character(sig_diff[i, j]))
 }
 cata("</tr>\n")
 }
 cata("</table>")
 cata("<h4>Plots:</h4>\n")
 for (i in seq_len(nrow(link_data))) {
 if (grepl(".pdf", link_data$Link[i])) {
 html_link(link_data$Link[i], link_data$Label[i])
 }
 }
 cata("<h4>Tables:</h4>\n")
 for (i in seq_len(nrow(link_data))) {
 if (grepl(".tsv", link_data$Link[i])) {
 html_link(link_data$Link[i], link_data$Label[i])
 }
 }
 if (want_rda) {
 cata("<h4>R Data Objects:</h4>\n")
 for (i in seq_len(nrow(link_data))) {
 if (grepl(".RData", link_data$Link[i])) {
 html_link(link_data$Link[i], link_data$Label[i])
 }
 }
 }
 cata("<p>Alt-click links to download file.</p>\n")
 cata("<p>Click floppy disc icon associated history item to download ")
 cata("all files.</p>\n")
 cata("<h4>Additional Information</h4>\n")
 cata("<ul>\n")
 if (filt_cpm || filt_smpcount || filt_totcount) {
 if (filt_cpm) {
 temp_str <- paste(
 "Genes without more than", opt$cpmReq,
 "CPM in at least", opt$sampleReq, "samples are insignificant",
 "and filtered out."
+)
+} else if (filt_smpcount) {
+temp_str <- paste(
+"Genes without more than", opt$cntReq,
+"counts in at least", opt$sampleReq, "samples are insignificant",
+"and filtered out."
+)
+} else if (filt_totcount) {
+temp_str <- paste(
+"Genes without more than", opt$cntReq,
+"counts, after summing counts for all samples, are insignificant",
+"and filtered out."
+)
+}
+list_item(temp_str)
+filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
+temp_str <- paste0(
+filtered_count, " of ", prefilter_count, " (", filter_prop,
+"%) genes were filtered out for low expression."
 )
-} else if (filt_smpcount) {
+list_item(temp_str)
-temp_str <- paste(
-"Genes without more than", opt$cntReq,
-"counts in at least", opt$sampleReq, "samples are insignificant",
-"and filtered out."
-)
-} else if (filt_totcount) {
-temp_str <- paste(
-"Genes without more than", opt$cntReq,
-"counts, after summing counts for all samples, are insignificant",
-"and filtered out."
-)
-}
-list_item(temp_str)
-filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
-temp_str <- paste0(
-filtered_count, " of ", prefilter_count, " (", filter_prop,
-"%) genes were filtered out for low expression."
-)
-list_item(temp_str)
 }
 list_item(opt$normOpt, " was the method used to normalise library sizes.")
 if (want_lrt) {
 list_item("The edgeR likelihood ratio test was used.")
 } else {
 if (want_robust) {
 list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
 } else {
 list_item("The edgeR quasi-likelihood test was used.")
 }
 }
 if (opt$pAdjOpt != "none") {
 if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
+temp_str <- paste0(
+"MD-Plot highlighted genes are significant at FDR ",
+"of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+"least ", opt$lfcReq, "."
+)
+list_item(temp_str)
+} else if (opt$pAdjOpt == "holm") {
+temp_str <- paste0(
+"MD-Plot highlighted genes are significant at adjusted ",
+"p-value of ", opt$pValReq, "  by the Holm(1979) ",
+"method, and exhibit log2-fold-change of at least ",
+opt$lfcReq, "."
+)
+list_item(temp_str)
+}
+} else {
 temp_str <- paste0(
-"MD-Plot highlighted genes are significant at FDR ",
+"MD-Plot highlighted genes are significant at p-value ",
 "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
 "least ", opt$lfcReq, "."
 )
 list_item(temp_str)
-} else if (opt$pAdjOpt == "holm") {
-temp_str <- paste0(
-"MD-Plot highlighted genes are significant at adjusted ",
-"p-value of ", opt$pValReq, "  by the Holm(1979) ",
-"method, and exhibit log2-fold-change of at least ",
-opt$lfcReq, "."
-)
-list_item(temp_str)
-}
-} else {
-temp_str <- paste0(
-"MD-Plot highlighted genes are significant at p-value ",
-"of ", opt$pValReq, " and exhibit log2-fold-change of at ",
-"least ", opt$lfcReq, "."
-)
-list_item(temp_str)
 }
 cata("</ul>\n")
 cata("<h4>Summary of experimental data:</h4>\n")
 cata("<tr>\n")
 table_head_item("SampleID")
 table_head_item(names(factors)[1], " (Primary Factor)")
 if (ncol(factors) > 1) {
 for (i in names(factors)[2:length(names(factors))]) {
 table_head_item(i)
 }
 cata("</tr>\n")
 }
 for (i in seq_len(nrow((factors)))) {
 cata("<tr>\n")
 table_head_item(row.names(factors)[i])
 for (j in seq_len(ncol(factors))) {
 table_item(as.character(unmake_names(factors[i, j])))
 }
 cata("</tr>\n")
 }
 cata("</table>")
 for (i in seq_len(nrow(link_data))) {
 if (grepl("session_info", link_data$Link[i])) {
 html_link(link_data$Link[i], link_data$Label[i])
 }
 }
 cata("<table border=\"0\">\n")
 cata("<tr>\n")
 table_item("Task started at:")

Mercurial > repos > iuc > edger

comparison edger.R @ 14:c5fa04118f83 draft default tip