comparison biosigner_wrapper.R @ 0:d0bcc990abcf draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/biosigner.git commit 23d58cfd97411ad5d272971896914ce99e30b0ab
author ethevenot
date Tue, 24 Oct 2017 09:03:31 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d0bcc990abcf
1 #!/usr/bin/Rscript --vanilla --slave --no-site-file
2
3
4 library(batch) ## parseCommandArgs
5
6 argVc <- unlist(parseCommandArgs(evaluate=FALSE))
7
8
9 ##------------------------------
10 ## Initializing
11 ##------------------------------
12
13 ## options
14 ##--------
15
16 strAsFacL <- options()$stringsAsFactors
17 options(stringsAsFactors = FALSE)
18
19 ## libraries
20 ##----------
21
22 suppressMessages(library(biosigner))
23
24 if(packageVersion("biosigner") < "1.0.0")
25 stop("Please use 'biosigner' versions of 1.0.0 and above")
26 if(packageVersion("ropls") < "1.4.0")
27 stop("Please use 'ropls' versions of 1.4.0 and above")
28
29 ## constants
30 ##----------
31
32 modNamC <- "Biosigner" ## module name
33
34 topEnvC <- environment()
35 flgC <- "\n"
36
37 ## functions
38 ##----------
39
40 flgF <- function(tesC,
41 envC = topEnvC,
42 txtC = NA) { ## management of warning and error messages
43
44 tesL <- eval(parse(text = tesC), envir = envC)
45
46 if(!tesL) {
47
48 sink(NULL)
49 stpTxtC <- ifelse(is.na(txtC),
50 paste0(tesC, " is FALSE"),
51 txtC)
52
53 stop(stpTxtC,
54 call. = FALSE)
55
56 }
57
58 } ## flgF
59
60
61 ## log file
62 ##---------
63
64 sink(argVc["information"])
65
66 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
67 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
68
69
70 ## arguments
71 ##----------
72
73 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
74 check.names = FALSE,
75 header = TRUE,
76 row.names = 1,
77 sep = "\t")))
78
79 samDF <- read.table(argVc["sampleMetadata_in"],
80 check.names = FALSE,
81 header = TRUE,
82 row.names = 1,
83 sep = "\t")
84 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
85
86 varDF <- read.table(argVc["variableMetadata_in"],
87 check.names = FALSE,
88 header = TRUE,
89 row.names = 1,
90 sep = "\t")
91 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
92
93 flgF("argVc['respC'] %in% colnames(samDF)",
94 txtC = paste0("Class argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata"))
95 respVc <- samDF[, argVc["respC"]]
96 flgF("mode(respVc) == 'character'",
97 txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only characters"))
98 respFc <- factor(respVc)
99 flgF("length(levels(respFc)) == 2",
100 txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only 2 types of characters (e.g., 'case' and 'control')"))
101 tierMaxC <- ifelse("tierC" %in% names(argVc), argVc["tierC"], "S")
102 pvalN <- ifelse("pvalN" %in% names(argVc), as.numeric(argVc["pvalN"]), 0.05)
103
104
105 ##------------------------------
106 ## Computation and plot
107 ##------------------------------
108
109
110 sink()
111
112 optWrnN <- options()$warn
113 options(warn = -1)
114
115 if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
116 set.seed(as.integer(argVc["seedI"]))
117
118 bsnLs <- biosign(x = xMN,
119 y = respFc,
120 methodVc = ifelse("methodC" %in% names(argVc), argVc["methodC"], "all"),
121 bootI = ifelse("bootI" %in% names(argVc), as.numeric(argVc["bootI"]), 50),
122 pvalN = pvalN,
123 printL = FALSE,
124 plotL = FALSE,
125 .sinkC = argVc["information"])
126
127 if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
128 set.seed(NULL)
129
130 tierMC <- bsnLs@tierMC
131
132 if(!is.null(tierMC)) {
133 plot(bsnLs,
134 tierMaxC = tierMaxC,
135 file.pdfC = "figure_tier.pdf",
136 .sinkC = argVc["information"])
137 file.rename("figure_tier.pdf", argVc["figure_tier"])
138 plot(bsnLs,
139 tierMaxC = tierMaxC,
140 typeC = "boxplot",
141 file.pdfC = "figure_boxplot.pdf",
142 .sinkC = argVc["information"])
143 file.rename("figure_boxplot.pdf", argVc["figure_boxplot"])
144 } else {
145 pdf(argVc["figure_tier"])
146 plot(1, bty = "n", type = "n",
147 xaxt = "n", yaxt = "n", xlab = "", ylab = "")
148 text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
149 labels = "No significant variable to display")
150 dev.off()
151 pdf(argVc["figure_boxplot"])
152 plot(1, bty = "n", type = "n",
153 xaxt = "n", yaxt = "n", xlab = "", ylab = "")
154 text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
155 labels = "No significant variable to display")
156 dev.off()
157 }
158
159
160 options(warn = optWrnN)
161
162
163 ##------------------------------
164 ## Print
165 ##------------------------------
166
167 sink(argVc["information"], append = TRUE)
168
169 tierFullVc <- c("S", LETTERS[1:5])
170 tierVc <- tierFullVc[1:which(tierFullVc == tierMaxC)]
171
172 if(sum(tierMC %in% tierVc)) {
173 cat("\nSignificant features from '", paste(tierVc, collapse = "', '"), "' tiers:\n", sep = "")
174 print(tierMC[apply(tierMC, 1, function(rowVc) sum(rowVc %in% tierVc) > 0), ,
175 drop = FALSE])
176 cat("\nAccuracy:\n")
177 print(round(getAccuracyMN(bsnLs), 3))
178 } else
179 cat("\nNo significant variable found for any classifier\n")
180
181
182 ##------------------------------
183 ## Ending
184 ##------------------------------
185
186 ## Saving
187 ##-------
188
189 if(!is.null(tierMC)) {
190 tierDF <- data.frame(tier = sapply(rownames(varDF),
191 function(varC) {
192 varTirVc <- tierMC[varC, ]
193 varTirVc <- names(varTirVc)[varTirVc %in% tierVc]
194 paste(varTirVc, collapse = "|")
195 }),
196 stringsAsFactors = FALSE)
197 colnames(tierDF) <- paste(argVc["respC"],
198 colnames(tierDF),
199 paste(tierVc, collapse = ""),
200 sep = "_")
201 varDF <- cbind.data.frame(varDF, tierDF)
202 }
203
204 ## variableMetadata
205
206 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
207 varDF)
208 write.table(varDF,
209 file = argVc["variableMetadata_out"],
210 quote = FALSE,
211 row.names = FALSE,
212 sep = "\t")
213
214
215 ## Closing
216 ##--------
217
218 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
219 as.character(Sys.time()), "\n", sep = "")
220
221 cat("\n\n\n============================================================================")
222
223 cat("\nAdditional information about the call:\n")
224
225 cat("\n1) Parameters:\n")
226 print(cbind(value = argVc))
227
228 cat("\n2) Session Info:\n")
229 sessioninfo <- sessionInfo()
230 cat(sessioninfo$R.version$version.string,"\n")
231 cat("Main packages:\n")
232 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
233 cat("Other loaded packages:\n")
234 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
235
236 cat("============================================================================\n")
237
238 sink()
239
240 options(stringsAsFactors = strAsFacL)
241
242 rm(list = ls())