diff univariate_wrapper.R @ 0:ab2ee3414e4e draft

planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit 98e8f4464b2f7321acb010e26e2a1c82fe37096e
author ethevenot
date Tue, 24 Oct 2017 08:57:25 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/univariate_wrapper.R	Tue Oct 24 08:57:25 2017 -0400
@@ -0,0 +1,190 @@
+#!/usr/bin/env Rscript
+
+library(batch) ## parseCommandArgs
+
+# Constants
+argv <- commandArgs(trailingOnly = FALSE)
+script.path <- sub("--file=","",argv[grep("--file=",argv)])
+prog.name <- basename(script.path)
+
+# Print help
+if (length(grep('-h', argv)) >0) {
+	cat("Usage:", prog.name,
+	    "dataMatrix_in myDataMatrix.tsv",
+	    "sampleMetadata_in mySampleData.tsv",
+	    "variableMetadata_in myVariableMetadata.tsv",
+	    "facC qual",
+	    "tesC kruskal",
+	    "adjC fdr",
+	    "thrN 0.05",
+	    "variableMetadata_out myVariableMetadata_out.tsv",
+	    "figure figure.pdf",
+	    "information information.txt",
+		"\n")
+	quit(status = 0)
+}
+
+source_local <- function(fname){
+    argv <- commandArgs(trailingOnly = FALSE)
+    base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+    source(paste(base_dir, fname, sep="/"))
+}
+
+source_local("univariate_script.R")
+
+argVc <- unlist(parseCommandArgs(evaluate=FALSE))
+
+##------------------------------
+## Initializing
+##------------------------------
+
+## options
+##--------
+
+strAsFacL <- options()$stringsAsFactors
+options(stringsAsFactors = FALSE)
+
+## packages
+##---------
+
+library(PMCMR)
+
+## constants
+##----------
+
+modNamC <- "Univariate" ## module name
+
+topEnvC <- environment()
+flagC <- "\n"
+
+## functions
+##----------
+
+flgF <- function(tesC,
+                 envC = topEnvC,
+                 txtC = NA) { ## management of warning and error messages
+
+    tesL <- eval(parse(text = tesC), envir = envC)
+
+    if(!tesL) {
+
+        sink(NULL)
+        stpTxtC <- ifelse(is.na(txtC),
+                          paste0(tesC, " is FALSE"),
+                          txtC)
+
+        stop(stpTxtC,
+             call. = FALSE)
+
+    }
+
+} ## flgF
+
+## log file
+##---------
+
+sink(argVc["information"])
+
+cat("\nStart of the '", modNamC, "' Galaxy module call: ",
+    format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
+
+## loading
+##--------
+
+datMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
+                                check.names = FALSE,
+                                header = TRUE,
+                                row.names = 1,
+                                sep = "\t")))
+
+samDF <- read.table(argVc["sampleMetadata_in"],
+                    check.names = FALSE,
+                    header = TRUE,
+                    row.names = 1,
+                    sep = "\t")
+
+varDF <- read.table(argVc["variableMetadata_in"],
+                    check.names = FALSE,
+                    header = TRUE,
+                    row.names = 1,
+                    sep = "\t")
+
+tesC <- argVc["tesC"]
+
+## checking
+##---------
+
+flgF("identical(rownames(datMN), rownames(samDF))", txtC = "Column names of the dataMatrix are not identical to the row names of the sampleMetadata; check your data with the 'Check Format' module in the 'Quality Control' section")
+flgF("identical(colnames(datMN), rownames(varDF))", txtC = "Row names of the dataMatrix are not identical to the row names of the variableMetadata; check your data with the 'Check Format' module in the 'Quality Control' section")
+
+flgF("argVc['facC'] %in% colnames(samDF)", txtC = paste0("Required factor of interest '", argVc['facC'], "' could not be found in the column names of the sampleMetadata"))
+flgF("mode(samDF[, argVc['facC']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc['facC'], "' column of the sampleMetadata should contain either number only, or character only"))
+
+flgF("!(tesC %in% c('ttest', 'wilcoxon')) || (mode(samDF[, argVc['facC']]) == 'character' && length(unique(samDF[, argVc['facC']])) == 2)", txtC = paste0("For 'ttest' and 'wilcoxon', the chosen factor column ('", argVc['facC'], "') of the sampleMetadata should contain characters with only two different classes"))
+flgF("!(tesC %in% c('anova', 'kruskal')) || (mode(samDF[, argVc['facC']]) == 'character' && length(unique(samDF[, argVc['facC']])) > 2)", txtC = paste0("For 'anova' and 'kruskal', the chosen factor column ('", argVc['facC'], "') of the sampleMetadata should contain characters with at least three different classes"))
+flgF("!(tesC %in% c('pearson', 'spearman')) || mode(samDF[, argVc['facC']]) == 'numeric'", txtC = paste0("For 'pearson' and 'spearman', the chosen factor column ('", argVc['facC'], "') of the sampleMetadata should contain numbers only"))
+
+flgF("argVc['adjC'] %in% c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')")
+
+flgF("0 <= as.numeric(argVc['thrN']) && as.numeric(argVc['thrN']) <= 1",
+     txtC = "(corrected) p-value threshold must be between 0 and 1")
+
+
+##------------------------------
+## Computation
+##------------------------------
+
+
+varDF <- univariateF(datMN = datMN,
+                     samDF = samDF,
+                     varDF = varDF,
+                     facC = argVc["facC"],
+                     tesC = tesC,
+                     adjC = argVc["adjC"],
+                     thrN = as.numeric(argVc["thrN"]),
+                     pdfC = argVc["figure"])
+
+
+##------------------------------
+## Ending
+##------------------------------
+
+
+## saving
+##--------
+
+varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
+                          varDF)
+
+write.table(varDF,
+            file = argVc["variableMetadata_out"],
+            quote = FALSE,
+            row.names = FALSE,
+            sep = "\t")
+
+## closing
+##--------
+
+cat("\nEnd of '", modNamC, "' Galaxy module call: ",
+    as.character(Sys.time()), "\n", sep = "")
+
+cat("\n\n\n============================================================================")
+cat("\nAdditional information about the call:\n")
+cat("\n1) Parameters:\n")
+print(cbind(value = argVc))
+
+cat("\n2) Session Info:\n")
+sessioninfo <- sessionInfo()
+cat(sessioninfo$R.version$version.string,"\n")
+cat("Main packages:\n")
+for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+cat("Other loaded packages:\n")
+for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+
+cat("============================================================================\n")
+
+sink()
+
+options(stringsAsFactors = strAsFacL)
+
+rm(list = ls())