Mercurial > repos > ethevenot > profia
diff profia_wrapper.R @ 3:5215be2bdc9e draft
planemo upload for repository https://github.com/workflow4metabolomics/profia.git commit 19ed25c048232776369a392ddb8c1860471acd29
| author | ethevenot |
|---|---|
| date | Tue, 09 Jan 2018 12:44:23 -0500 |
| parents | e29c563df582 |
| children |
line wrap: on
line diff
--- a/profia_wrapper.R Thu Jun 29 09:21:32 2017 -0400 +++ b/profia_wrapper.R Tue Jan 09 12:44:23 2018 -0500 @@ -1,262 +1,266 @@ -#!/usr/bin/env Rscript - -library(batch) ## parseCommandArgs - -argVc <- unlist(parseCommandArgs(evaluate=FALSE)) - -##------------------------------ -## Initializing -##------------------------------ - - -## libraries -##---------- - -suppressMessages(library(proFIA)) - - -## constants -##---------- - -modNamC <- "proFIA" ## module name - - -## log file -##--------- - -sink(argVc["information"]) - -cat("\nStart of the '", modNamC, "' Galaxy module call: ", - format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") - - -## arguments -##---------- - - -if("zipfile" %in% names(argVc)) { - - zipfile <- argVc["zipfile"] - - ## We unzip automatically the raw files from the zip file - - if(exists("zipfile") && (zipfile!="")) { - if(!file.exists(zipfile)){ - error_message=paste("Cannot access the Zip file:", zipfile) - print(error_message) - stop(error_message) - } - - ## unzip - - suppressWarnings(unzip(zipfile, unzip="unzip")) - - ## get the directory name - - filesInZip=unzip(zipfile, list=T); - directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); - directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] - directory = "." - if (length(directories) == 1) directory = directories - - cat("files_root_directory\t",directory,"\n") - - } - -} else if ("library" %in% names(argVc)) { - - directory <- argVc["library"] - - if(!file.exists(directory)) { - - error_message=paste("Cannot access the directory:", directory,". Please check that the directory really exists.") - print(error_message) - stop(error_message) - - } - -} else { - - error_message <- "No zipfile nor input library available" - print(error_message) - stop(error_message) - -} - -##------------------------------ -## Computations -##------------------------------ - - -optWrnN <- options()$warn -options(warn = -1) - -stpI <- 1 - -cat("\n", stpI, ") Peak detection step ('proFIAset'):\n", sep = "") - -if("sizeMinN" %in% names(argVc) && argVc["sizeMinN"] != "none") { - if("scanMaxI" %in% names(argVc) && argVc["scanMaxI"] != "none") { - fiaset <- proFIAset(directory, - ppm = as.numeric(argVc["ppmN"]), - dmz = as.numeric(argVc["dmzN"]), - bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), - sizeMin = as.numeric(argVc["sizeMinN"]), - scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), - scanmax = as.numeric(argVc["scanMaxI"]), - parallel = FALSE) - } else { - fiaset <- proFIAset(directory, - ppm = as.numeric(argVc["ppmN"]), - dmz = as.numeric(argVc["dmzN"]), - bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), - sizeMin = as.numeric(argVc["sizeMinN"]), - scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), - parallel = FALSE) - } -} else { - if("scanMaxI" %in% names(argVc) && argVc["scanMaxI"] != "none") { - fiaset <- proFIAset(directory, - ppm = as.numeric(argVc["ppmN"]), - dmz = as.numeric(argVc["dmzN"]), - bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), - scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), - scanmax = as.numeric(argVc["scanMaxI"]), - parallel = FALSE) - } else { - fiaset <- proFIAset(directory, - ppm = as.numeric(argVc["ppmN"]), - dmz = as.numeric(argVc["dmzN"]), - bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), - scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), - parallel = FALSE) - } -} - -stpI <- stpI + 1 - -cat("\n", stpI, ") Peak alignment ('group.FIA'):\n", sep = "") - -fiaset <- group.FIA(fiaset, - ppmGroup = as.numeric(argVc["ppmGroupN"]), - dmz = as.numeric(argVc["dmzGroupN"]), - fracGroup = as.numeric(argVc["fracGroupN"])) - -stpI <- stpI + 1 - -cat("\n", stpI, ") Creating the peak table ('makeDataMatrix'):\n", sep = "") - -fiaset <- makeDataMatrix(fiaset, - maxo = FALSE) - -stpI <- stpI + 1 - -kI <- as.integer(argVc["kI"]) - - -###TODO add the two method for imputation. -if(kI > 0) { - - cat("\n", stpI, ") Imputing missing values ('imputeMissingValues.WKNN_TN'):\n", sep = "") - - fiaset <- imputeMissingValues.WKNN_TN(fiaset, - k = kI) - - stpI <- stpI + 1 -} - -options(warn = optWrnN) - - -##------------------------------ -## Ending -##------------------------------ - - -## Plotting -##--------- - -cat("\n", stpI, ") Plotting ('plot'):\n", sep = "") - -pdf(argVc["figure"]) - -plot(fiaset) - -dev.off() - -stpI <- stpI + 1 - -## Printing -##--------- - -cat("\n", stpI, ") Printing ('show'):\n", sep = "") - -fiaset - -stpI <- stpI + 1 - -## Exporting -##---------- - -cat("\n", stpI, ") Exporting ('exportDataMatrix', 'exportSampleMetadata', 'exportVariableMetadata'):\n", sep = "") - -datMN <- exportDataMatrix(fiaset) -samDF <- exportSampleMetadata(fiaset) -varDF <- exportVariableMetadata(fiaset) - -if(nrow(datMN) == nrow(samDF) && ncol(datMN) == nrow(varDF)) { - datDF <- as.data.frame(t(datMN)) -} else { - datDF <- as.data.frame(datMN) -} -rownames(varDF) <- rownames(datDF) - -datDF <- cbind.data.frame(dataMatrix = rownames(datDF), - datDF) -write.table(datDF, - file = argVc["dataMatrix_out"], - quote = FALSE, - row.names = FALSE, - sep = "\t") - -samDF <- cbind.data.frame(sampleMetadata = samDF[, "sampleID"], - class = samDF[, "class"]) -write.table(samDF, - file = argVc["sampleMetadata_out"], - quote = FALSE, - row.names = FALSE, - sep = "\t") - -varDF <- cbind.data.frame(variableMetadata = rownames(varDF), - varDF) -write.table(varDF, - file = argVc["variableMetadata_out"], - quote = FALSE, - row.names = FALSE, - sep = "\t") - - -## Closing -##-------- - -cat("\nEnd of '", modNamC, "' Galaxy module call: ", - as.character(Sys.time()), "\n", sep = "") - -cat("\n\n\n============================================================================") -cat("\nAdditional information about the call:\n") -cat("\n1) Parameters:\n") -print(cbind(value = argVc)) - -cat("\n2) Session Info:\n") -sessioninfo <- sessionInfo() -cat(sessioninfo$R.version$version.string,"\n") -cat("Main packages:\n") -for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") -cat("Other loaded packages:\n") -for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") - -cat("============================================================================\n") - -sink() - -rm(list = ls()) +#!/usr/bin/env Rscript + +library(batch) ## parseCommandArgs + +argVc <- unlist(parseCommandArgs(evaluate=FALSE)) + +##------------------------------ +## Initializing +##------------------------------ + + +## libraries +##---------- + +suppressMessages(library(proFIA)) + + +## constants +##---------- + +modNamC <- "proFIA" ## module name + + +## log file +##--------- + +sink(argVc["information"]) + +cat("\nStart of the '", modNamC, "' Galaxy module call: ", + format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") + + +## arguments +##---------- + + +if("zipfile" %in% names(argVc)) { + + zipfile <- argVc["zipfile"] + + ## We unzip automatically the raw files from the zip file + + if(exists("zipfile") && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:", zipfile) + print(error_message) + stop(error_message) + } + + ## unzip + + suppressWarnings(unzip(zipfile, unzip="unzip")) + + ## get the directory name + + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories + + cat("files_root_directory\t",directory,"\n") + + } + +} else if ("library" %in% names(argVc)) { + + directory <- argVc["library"] + + if(!file.exists(directory)) { + + error_message=paste("Cannot access the directory:", directory,". Please check that the directory really exists.") + print(error_message) + stop(error_message) + + } + +} else { + + error_message <- "No zipfile nor input library available" + print(error_message) + stop(error_message) + +} + +##------------------------------ +## Computations +##------------------------------ + + +optWrnN <- options()$warn +options(warn = -1) + +stpI <- 1 + +cat("\n", stpI, ") Peak detection step ('proFIAset'):\n", sep = "") + +if("sizeMinN" %in% names(argVc) && argVc["sizeMinN"] != "none") { + if("scanMaxI" %in% names(argVc) && argVc["scanMaxI"] != "none") { + fiaset <- proFIAset(directory, + ppm = as.numeric(argVc["ppmN"]), + dmz = as.numeric(argVc["dmzN"]), + bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), + sizeMin = as.numeric(argVc["sizeMinN"]), + scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), + scanmax = as.numeric(argVc["scanMaxI"]), + parallel = TRUE) + } else { + fiaset <- proFIAset(directory, + ppm = as.numeric(argVc["ppmN"]), + dmz = as.numeric(argVc["dmzN"]), + bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), + sizeMin = as.numeric(argVc["sizeMinN"]), + scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), + parallel = TRUE) + } +} else { + if("scanMaxI" %in% names(argVc) && argVc["scanMaxI"] != "none") { + fiaset <- proFIAset(directory, + ppm = as.numeric(argVc["ppmN"]), + dmz = as.numeric(argVc["dmzN"]), + bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), + scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), + scanmax = as.numeric(argVc["scanMaxI"]), + parallel = TRUE) + } else { + fiaset <- proFIAset(directory, + ppm = as.numeric(argVc["ppmN"]), + dmz = as.numeric(argVc["dmzN"]), + bandCoverage = as.numeric(ifelse("bandCoverageN" %in% names(argVc), argVc["bandCoverageN"], "0.3")), + scanmin = as.numeric(ifelse("scanMinI" %in% names(argVc), argVc["scanMinI"], "1")), + parallel = TRUE) + } +} + +stpI <- stpI + 1 + +cat("\n", stpI, ") Peak alignment ('group.FIA'):\n", sep = "") + +fiaset <- group.FIA(fiaset, + ppmGroup = as.numeric(argVc["ppmGroupN"]), + dmz = as.numeric(argVc["dmzGroupN"]), + fracGroup = as.numeric(argVc["fracGroupN"])) + +stpI <- stpI + 1 + +cat("\n", stpI, ") Creating the peak table ('makeDataMatrix'):\n", sep = "") + +fiaset <- makeDataMatrix(fiaset, + maxo = FALSE) + +stpI <- stpI + 1 + + +if(argVc["imputeC"] == "randomForest") { + + cat("\n", stpI, ") Imputing missing values ('impute.randomForest'):\n", sep = "") + + set.seed(123) + + fiaset <- impute.randomForest(fiaset, + parallel = FALSE) + + set.seed(NULL) + + stpI <- stpI + 1 +} +###TODO Fix the error message to set 'parallel' to TRUE: You must register a 'foreach' parallel backend to run 'missForest' in parallel. Set 'parallelize' to 'no' to compute serially. +###TODO add the impute.KNN_TN method + + +options(warn = optWrnN) + + +##------------------------------ +## Ending +##------------------------------ + + +## Plotting +##--------- + +cat("\n", stpI, ") Plotting ('plot'):\n", sep = "") + +pdf(argVc["figure"]) + +plot(fiaset) + +dev.off() + +stpI <- stpI + 1 + +## Printing +##--------- + +cat("\n", stpI, ") Printing ('show'):\n", sep = "") + +fiaset + +stpI <- stpI + 1 + +## Exporting +##---------- + +cat("\n", stpI, ") Exporting ('exportDataMatrix', 'exportSampleMetadata', 'exportVariableMetadata'):\n", sep = "") + +datMN <- exportDataMatrix(fiaset) +samDF <- exportSampleMetadata(fiaset) +varDF <- exportVariableMetadata(fiaset) + +if(nrow(datMN) == nrow(samDF) && ncol(datMN) == nrow(varDF)) { + datDF <- as.data.frame(t(datMN)) +} else { + datDF <- as.data.frame(datMN) +} +rownames(varDF) <- rownames(datDF) + +datDF <- cbind.data.frame(dataMatrix = rownames(datDF), + datDF) +write.table(datDF, + file = argVc["dataMatrix_out"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + +samDF <- cbind.data.frame(sampleMetadata = samDF[, "sampleID"], + class = samDF[, "class"]) +write.table(samDF, + file = argVc["sampleMetadata_out"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + +varDF <- cbind.data.frame(variableMetadata = rownames(varDF), + varDF) +write.table(varDF, + file = argVc["variableMetadata_out"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + + +## Closing +##-------- + +cat("\nEnd of '", modNamC, "' Galaxy module call: ", + as.character(Sys.time()), "\n", sep = "") + +cat("\n\n\n============================================================================") +cat("\nAdditional information about the call:\n") +cat("\n1) Parameters:\n") +print(cbind(value = argVc)) + +cat("\n2) Session Info:\n") +sessioninfo <- sessionInfo() +cat(sessioninfo$R.version$version.string,"\n") +cat("Main packages:\n") +for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") +cat("Other loaded packages:\n") +for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") + +cat("============================================================================\n") + +sink() + +rm(list = ls())
