Mercurial > repos > lecorguille > xcms_merge
changeset 16:40791ab6c06b draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e131bacd37bfaf2c4132fd214c81db9b8a9df513
author | lecorguille |
---|---|
date | Mon, 17 Sep 2018 08:44:25 -0400 |
parents | 2c7d3db37974 |
children | 64a3067ba81e |
files | lib-xcms3.x.x.r lib.r macros.xml macros_xcms.xml xcms_merge.r xcms_merge.xml |
diffstat | 6 files changed, 177 insertions(+), 161 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib-xcms3.x.x.r Mon Sep 17 08:44:25 2018 -0400 @@ -0,0 +1,152 @@ + + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/250 +groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) { + mzfmt <- paste("%.", mzdec, "f", sep = "") + rtfmt <- paste("%.", rtdec, "f", sep = "") + + gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T", + sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "") + + if (any(dup <- duplicated(gnames))) + for (dupname in unique(gnames[dup])) { + dupidx <- which(gnames == dupname) + gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_") + } + + return (gnames) +} + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +.concatenate_XCMSnExp <- function(...) { + x <- list(...) + if (length(x) == 0) + return(NULL) + if (length(x) == 1) + return(x[[1]]) + ## Check that all are XCMSnExp objects. + if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp"))))) + stop("All passed objects should be 'XCMSnExp' objects") + new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp") + ## If any of the XCMSnExp has alignment results or detected features drop + ## them! + x <- lapply(x, function(z) { + if (hasAdjustedRtime(z)) { + z <- dropAdjustedRtime(z) + warning("Adjusted retention times found, had to drop them.") + } + if (hasFeatures(z)) { + z <- dropFeatureDefinitions(z) + warning("Feature definitions found, had to drop them.") + } + z + }) + ## Combine peaks + fls <- lapply(x, fileNames) + startidx <- cumsum(lengths(fls)) + pks <- lapply(x, chromPeaks) + procH <- lapply(x, processHistory) + for (i in 2:length(fls)) { + pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1] + procH[[i]] <- lapply(procH[[i]], function(z) { + z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1]) + z + }) + } + pks <- do.call(rbind, pks) + new_x@.processHistory <- unlist(procH) + chromPeaks(new_x) <- pks + if (validObject(new_x)) + new_x +} + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +.concatenate_OnDiskMSnExp <- function(...) { + x <- list(...) + if (length(x) == 0) + return(NULL) + if (length(x) == 1) + return(x[[1]]) + ## Check that all are XCMSnExp objects. + if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp"))))) + stop("All passed objects should be 'OnDiskMSnExp' objects") + ## Check processingQueue + procQ <- lapply(x, function(z) z@spectraProcessingQueue) + new_procQ <- procQ[[1]] + is_ok <- unlist(lapply(procQ, function(z) + !is.character(all.equal(new_procQ, z)) + )) + if (any(!is_ok)) { + warning("Processing queues from the submitted objects differ! ", + "Dropping the processing queue.") + new_procQ <- list() + } + ## processingData + fls <- lapply(x, function(z) z@processingData@files) + startidx <- cumsum(lengths(fls)) + ## featureData + featd <- lapply(x, fData) + ## Have to update the file index and the spectrum names. + for (i in 2:length(featd)) { + featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1] + rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames( + fileIds = featd[[i]]$fileIdx, + spectrumIds = featd[[i]]$spIdx, + nSpectra = nrow(featd[[i]]), + nFiles = length(unlist(fls)) + ) + } + featd <- do.call(rbind, featd) + featd$spectrum <- 1:nrow(featd) + ## experimentData + expdata <- lapply(x, function(z) { + ed <- z@experimentData + data.frame(instrumentManufacturer = ed@instrumentManufacturer, + instrumentModel = ed@instrumentModel, + ionSource = ed@ionSource, + analyser = ed@analyser, + detectorType = ed@detectorType, + stringsAsFactors = FALSE) + }) + expdata <- do.call(rbind, expdata) + expdata <- new("MIAPE", + instrumentManufacturer = expdata$instrumentManufacturer, + instrumentModel = expdata$instrumentModel, + ionSource = expdata$ionSource, + analyser = expdata$analyser, + detectorType = expdata$detectorType) + + ## protocolData + protodata <- lapply(x, function(z) z@protocolData) + if (any(unlist(lapply(protodata, nrow)) > 0)) + warning("Found non-empty protocol data, but merging protocol data is", + " currently not supported. Skipped.") + ## phenoData + pdata <- do.call(rbind, lapply(x, pData)) + res <- new( + "OnDiskMSnExp", + phenoData = new("NAnnotatedDataFrame", data = pdata), + featureData = new("AnnotatedDataFrame", featd), + processingData = new("MSnProcess", + processing = paste0("Concatenated [", date(), "]"), + files = unlist(fls), smoothed = NA), + experimentData = expdata, + spectraProcessingQueue = new_procQ) + if (validObject(res)) + res +} + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +c.XCMSnExp <- function(...) { + .concatenate_XCMSnExp(...) +} + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +c.MSnbase <- function(...) { + .concatenate_OnDiskMSnExp(...) +}
--- a/lib.r Wed Sep 05 05:57:56 2018 -0400 +++ b/lib.r Mon Sep 17 08:44:25 2018 -0400 @@ -134,6 +134,15 @@ } #@author G. Le Corguille +# This function convert the remain NA to 0 in the dataMatrix +naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { + if (naTOzero){ + dataMatrix[is.na(dataMatrix)] <- 0 + } + return (dataMatrix) +} + +#@author G. Le Corguille # Draw the plotChromPeakDensity 3 per page in a pdf file getPlotChromPeakDensity <- function(xdata, mzdigit=4) { pdf(file="plotChromPeakDensity.pdf", width=16, height=12) @@ -177,7 +186,7 @@ #@author G. Le Corguille # value: intensity values to be used into, maxo or intb -getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) { +getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput) { dataMatrix <- featureValues(xdata, method="medret", value=intval) colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix)) dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix) @@ -187,6 +196,7 @@ variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) + dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) @@ -498,155 +508,3 @@ return (xset) } } - - -#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 -# https://github.com/sneumann/xcms/issues/250 -groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) { - mzfmt <- paste("%.", mzdec, "f", sep = "") - rtfmt <- paste("%.", rtdec, "f", sep = "") - - gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T", - sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "") - - if (any(dup <- duplicated(gnames))) - for (dupname in unique(gnames[dup])) { - dupidx <- which(gnames == dupname) - gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_") - } - - return (gnames) -} - -#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 -# https://github.com/sneumann/xcms/issues/247 -.concatenate_XCMSnExp <- function(...) { - x <- list(...) - if (length(x) == 0) - return(NULL) - if (length(x) == 1) - return(x[[1]]) - ## Check that all are XCMSnExp objects. - if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp"))))) - stop("All passed objects should be 'XCMSnExp' objects") - new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp") - ## If any of the XCMSnExp has alignment results or detected features drop - ## them! - x <- lapply(x, function(z) { - if (hasAdjustedRtime(z)) { - z <- dropAdjustedRtime(z) - warning("Adjusted retention times found, had to drop them.") - } - if (hasFeatures(z)) { - z <- dropFeatureDefinitions(z) - warning("Feature definitions found, had to drop them.") - } - z - }) - ## Combine peaks - fls <- lapply(x, fileNames) - startidx <- cumsum(lengths(fls)) - pks <- lapply(x, chromPeaks) - procH <- lapply(x, processHistory) - for (i in 2:length(fls)) { - pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1] - procH[[i]] <- lapply(procH[[i]], function(z) { - z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1]) - z - }) - } - pks <- do.call(rbind, pks) - new_x@.processHistory <- unlist(procH) - chromPeaks(new_x) <- pks - if (validObject(new_x)) - new_x -} - -#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 -# https://github.com/sneumann/xcms/issues/247 -.concatenate_OnDiskMSnExp <- function(...) { - x <- list(...) - if (length(x) == 0) - return(NULL) - if (length(x) == 1) - return(x[[1]]) - ## Check that all are XCMSnExp objects. - if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp"))))) - stop("All passed objects should be 'OnDiskMSnExp' objects") - ## Check processingQueue - procQ <- lapply(x, function(z) z@spectraProcessingQueue) - new_procQ <- procQ[[1]] - is_ok <- unlist(lapply(procQ, function(z) - !is.character(all.equal(new_procQ, z)) - )) - if (any(!is_ok)) { - warning("Processing queues from the submitted objects differ! ", - "Dropping the processing queue.") - new_procQ <- list() - } - ## processingData - fls <- lapply(x, function(z) z@processingData@files) - startidx <- cumsum(lengths(fls)) - ## featureData - featd <- lapply(x, fData) - ## Have to update the file index and the spectrum names. - for (i in 2:length(featd)) { - featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1] - rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames( - fileIds = featd[[i]]$fileIdx, - spectrumIds = featd[[i]]$spIdx, - nSpectra = nrow(featd[[i]]), - nFiles = length(unlist(fls)) - ) - } - featd <- do.call(rbind, featd) - featd$spectrum <- 1:nrow(featd) - ## experimentData - expdata <- lapply(x, function(z) { - ed <- z@experimentData - data.frame(instrumentManufacturer = ed@instrumentManufacturer, - instrumentModel = ed@instrumentModel, - ionSource = ed@ionSource, - analyser = ed@analyser, - detectorType = ed@detectorType, - stringsAsFactors = FALSE) - }) - expdata <- do.call(rbind, expdata) - expdata <- new("MIAPE", - instrumentManufacturer = expdata$instrumentManufacturer, - instrumentModel = expdata$instrumentModel, - ionSource = expdata$ionSource, - analyser = expdata$analyser, - detectorType = expdata$detectorType) - - ## protocolData - protodata <- lapply(x, function(z) z@protocolData) - if (any(unlist(lapply(protodata, nrow)) > 0)) - warning("Found non-empty protocol data, but merging protocol data is", - " currently not supported. Skipped.") - ## phenoData - pdata <- do.call(rbind, lapply(x, pData)) - res <- new( - "OnDiskMSnExp", - phenoData = new("NAnnotatedDataFrame", data = pdata), - featureData = new("AnnotatedDataFrame", featd), - processingData = new("MSnProcess", - processing = paste0("Concatenated [", date(), "]"), - files = unlist(fls), smoothed = NA), - experimentData = expdata, - spectraProcessingQueue = new_procQ) - if (validObject(res)) - res -} - -#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 -# https://github.com/sneumann/xcms/issues/247 -c.XCMSnExp <- function(...) { - .concatenate_XCMSnExp(...) -} - -#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 -# https://github.com/sneumann/xcms/issues/247 -c.MSnbase <- function(...) { - .concatenate_OnDiskMSnExp(...) -}
--- a/macros.xml Wed Sep 05 05:57:56 2018 -0400 +++ b/macros.xml Mon Sep 17 08:44:25 2018 -0400 @@ -31,7 +31,7 @@ <token name="@INPUT_IMAGE_LABEL@">RData file</token> - <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token> + <token name="@INPUT_IMAGE_HELP@">It contains a xcms3::XCMSnExp object (named xdata)</token> <!-- MISC -->
--- a/macros_xcms.xml Wed Sep 05 05:57:56 2018 -0400 +++ b/macros_xcms.xml Mon Sep 17 08:44:25 2018 -0400 @@ -29,7 +29,7 @@ <xml name="input_file_load"> <section name="file_load_section" title="Resubmit your raw dataset or your zip file"> <conditional name="file_load_conditional"> - <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > + <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message saying that your original dataset or zip file have been deleted on the server." > <option value="no" >no need</option> <option value="yes" >yes</option> </param> @@ -85,6 +85,7 @@ numDigitsMZ $peaklist.numDigitsMZ numDigitsRT $peaklist.numDigitsRT intval $peaklist.intval + naTOzero $peaklist.naTOzero #end if </token> @@ -100,6 +101,7 @@ <option value="maxo">maxo</option> <option value="intb">intb</option> </param> + <param name="naTOzero" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Replace the remain NA by 0 in the dataMatrix" help="Rather mandatory for some downstream statistical steps"/> </when> <when value="false" /> </conditional> @@ -225,7 +227,7 @@ <token name="@HELP_XCMS_MANUAL@"> -For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ +For details and explanations concerning all the parameters and workflow of xcms_ package, see its manual_ and this example_ .. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html .. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf
--- a/xcms_merge.r Wed Sep 05 05:57:56 2018 -0400 +++ b/xcms_merge.r Mon Sep 17 08:44:25 2018 -0400 @@ -3,6 +3,7 @@ #Import the different functions source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } source_local("lib.r") +source_local("lib-xcms3.x.x.r") pkgs <- c("xcms","batch") loadAndDisplayPackages(pkgs)
--- a/xcms_merge.xml Wed Sep 05 05:57:56 2018 -0400 +++ b/xcms_merge.xml Mon Sep 17 08:44:25 2018 -0400 @@ -1,5 +1,5 @@ <tool id="xcms_merge" name="xcms findChromPeaks Merger" version="@WRAPPER_VERSION@.0"> - <description>Merge xcms findChromPeaks RData in one to be used by group</description> + <description>Merge xcms findChromPeaks RData into a unique file to be used by group</description> <macros> <import>macros.xml</import> @@ -90,10 +90,10 @@ Description ----------- -This tool will allow you to run one xcms findChromPeaks process per sample in parallel and then to merge all RData images in one. +This tool allows you to run one xcms findChromPeaks process per sample in parallel and then to merge all RData images into one. The result is then suitable for xcms groupChromPeaks. -You can provide a sampleMetadata table to attribute phenotypic value to your samples. +You can provide a sampleMetadata table to attribute phenotypic values to your samples. ----------------- @@ -145,7 +145,9 @@ Example of a sampleMetadata: ---------------------------- ------------ +=========================== ============ +Samples class +=========================== ============ HU_neg_028 bio --------------------------- ------------ HU_neg_034 bio @@ -155,7 +157,8 @@ Blanc06 blank --------------------------- ------------ Blanc09 blank ---------------------------- ------------ +=========================== ============ + ------------ Output files