changeset 38:67ee46ce9781 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e131bacd37bfaf2c4132fd214c81db9b8a9df513
author lecorguille
date Mon, 17 Sep 2018 08:46:01 -0400
parents 35a20d7c9f33
children b0d4fe748c9d
files abims_xcms_retcor.xml lib-xcms3.x.x.r lib.r macros.xml macros_xcms.xml xcms_retcor.r
diffstat 6 files changed, 170 insertions(+), 157 deletions(-) [+]
line wrap: on
line diff
--- a/abims_xcms_retcor.xml	Wed Sep 05 05:59:21 2018 -0400
+++ b/abims_xcms_retcor.xml	Mon Sep 17 08:46:01 2018 -0400
@@ -314,7 +314,7 @@
 
 - IMPROVEMENT: the advanced options are now in sections. It will allow you to access to all the parameters and to know their default values.
 
-- CHANGE: removing of the TIC and BPC plots. You can new use the dedicated tool "xcms plot chromatogram"
+- CHANGE: removing of the TIC and BPC plots. You can now use the dedicated tool "xcms plot chromatogram"
 
 
 **Version 2.1.1 - 29/11/2017**
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib-xcms3.x.x.r	Mon Sep 17 08:46:01 2018 -0400
@@ -0,0 +1,152 @@
+
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/250
+groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) {
+    mzfmt <- paste("%.", mzdec, "f", sep = "")
+    rtfmt <- paste("%.", rtdec, "f", sep = "")
+
+    gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T",
+                    sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "")
+
+    if (any(dup <- duplicated(gnames)))
+        for (dupname in unique(gnames[dup])) {
+            dupidx <- which(gnames == dupname)
+            gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_")
+        }
+
+    return (gnames)
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+.concatenate_XCMSnExp <- function(...) {
+    x <- list(...)
+    if (length(x) == 0)
+        return(NULL)
+    if (length(x) == 1)
+        return(x[[1]])
+    ## Check that all are XCMSnExp objects.
+    if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp")))))
+        stop("All passed objects should be 'XCMSnExp' objects")
+    new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp")
+    ## If any of the XCMSnExp has alignment results or detected features drop
+    ## them!
+    x <- lapply(x, function(z) {
+        if (hasAdjustedRtime(z)) {
+            z <- dropAdjustedRtime(z)
+            warning("Adjusted retention times found, had to drop them.")
+        }
+        if (hasFeatures(z)) {
+            z <- dropFeatureDefinitions(z)
+            warning("Feature definitions found, had to drop them.")
+        }
+        z
+    })
+    ## Combine peaks
+    fls <- lapply(x, fileNames)
+    startidx <- cumsum(lengths(fls))
+    pks <- lapply(x, chromPeaks)
+    procH <- lapply(x, processHistory)
+    for (i in 2:length(fls)) {
+        pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1]
+        procH[[i]] <- lapply(procH[[i]], function(z) {
+            z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1])
+            z
+            })
+    }
+    pks <- do.call(rbind, pks)
+    new_x@.processHistory <- unlist(procH)
+    chromPeaks(new_x) <- pks
+    if (validObject(new_x))
+        new_x
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+.concatenate_OnDiskMSnExp <- function(...) {
+    x <- list(...)
+    if (length(x) == 0)
+        return(NULL)
+    if (length(x) == 1)
+        return(x[[1]])
+    ## Check that all are XCMSnExp objects.
+    if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp")))))
+        stop("All passed objects should be 'OnDiskMSnExp' objects")
+    ## Check processingQueue
+    procQ <- lapply(x, function(z) z@spectraProcessingQueue)
+    new_procQ <- procQ[[1]]
+    is_ok <- unlist(lapply(procQ, function(z)
+        !is.character(all.equal(new_procQ, z))
+        ))
+    if (any(!is_ok)) {
+        warning("Processing queues from the submitted objects differ! ",
+                "Dropping the processing queue.")
+        new_procQ <- list()
+    }
+    ## processingData
+    fls <- lapply(x, function(z) z@processingData@files)
+    startidx <- cumsum(lengths(fls))
+    ## featureData
+    featd <- lapply(x, fData)
+    ## Have to update the file index and the spectrum names.
+    for (i in 2:length(featd)) {
+        featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1]
+        rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames(
+                                              fileIds = featd[[i]]$fileIdx,
+                                              spectrumIds = featd[[i]]$spIdx,
+                                              nSpectra = nrow(featd[[i]]),
+                                              nFiles = length(unlist(fls))
+                                          )
+    }
+    featd <- do.call(rbind, featd)
+    featd$spectrum <- 1:nrow(featd)
+    ## experimentData
+    expdata <- lapply(x, function(z) {
+        ed <- z@experimentData
+        data.frame(instrumentManufacturer = ed@instrumentManufacturer,
+                   instrumentModel = ed@instrumentModel,
+                   ionSource = ed@ionSource,
+                   analyser = ed@analyser,
+                   detectorType = ed@detectorType,
+                   stringsAsFactors = FALSE)
+    })
+    expdata <- do.call(rbind, expdata)
+    expdata <- new("MIAPE",
+                   instrumentManufacturer = expdata$instrumentManufacturer,
+                   instrumentModel = expdata$instrumentModel,
+                   ionSource = expdata$ionSource,
+                   analyser = expdata$analyser,
+                   detectorType = expdata$detectorType)
+
+    ## protocolData
+    protodata <- lapply(x, function(z) z@protocolData)
+    if (any(unlist(lapply(protodata, nrow)) > 0))
+        warning("Found non-empty protocol data, but merging protocol data is",
+                " currently not supported. Skipped.")
+    ## phenoData
+    pdata <- do.call(rbind, lapply(x, pData))
+    res <- new(
+        "OnDiskMSnExp",
+        phenoData = new("NAnnotatedDataFrame", data = pdata),
+        featureData = new("AnnotatedDataFrame", featd),
+        processingData = new("MSnProcess",
+                             processing = paste0("Concatenated [", date(), "]"),
+                             files = unlist(fls), smoothed = NA),
+        experimentData = expdata,
+        spectraProcessingQueue = new_procQ)
+    if (validObject(res))
+        res
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+c.XCMSnExp <- function(...) {
+    .concatenate_XCMSnExp(...)
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+c.MSnbase <- function(...) {
+    .concatenate_OnDiskMSnExp(...)
+}
--- a/lib.r	Wed Sep 05 05:59:21 2018 -0400
+++ b/lib.r	Mon Sep 17 08:46:01 2018 -0400
@@ -134,6 +134,15 @@
 }
 
 #@author G. Le Corguille
+# This function convert the remain NA to 0 in the dataMatrix
+naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
+    if (naTOzero){
+        dataMatrix[is.na(dataMatrix)] <- 0
+    }
+    return (dataMatrix)
+}
+
+#@author G. Le Corguille
 # Draw the plotChromPeakDensity 3 per page in a pdf file
 getPlotChromPeakDensity <- function(xdata, mzdigit=4) {
     pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
@@ -177,7 +186,7 @@
 
 #@author G. Le Corguille
 # value: intensity values to be used into, maxo or intb
-getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) {
+getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput) {
     dataMatrix <- featureValues(xdata, method="medret", value=intval)
     colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix))
     dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix)
@@ -187,6 +196,7 @@
 
     variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
     variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
+    dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
 
     write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
     write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
@@ -498,155 +508,3 @@
         return (xset)
     }
 }
-
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/250
-groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) {
-    mzfmt <- paste("%.", mzdec, "f", sep = "")
-    rtfmt <- paste("%.", rtdec, "f", sep = "")
-
-    gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T",
-                    sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "")
-
-    if (any(dup <- duplicated(gnames)))
-        for (dupname in unique(gnames[dup])) {
-            dupidx <- which(gnames == dupname)
-            gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_")
-        }
-
-    return (gnames)
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-.concatenate_XCMSnExp <- function(...) {
-    x <- list(...)
-    if (length(x) == 0)
-        return(NULL)
-    if (length(x) == 1)
-        return(x[[1]])
-    ## Check that all are XCMSnExp objects.
-    if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp")))))
-        stop("All passed objects should be 'XCMSnExp' objects")
-    new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp")
-    ## If any of the XCMSnExp has alignment results or detected features drop
-    ## them!
-    x <- lapply(x, function(z) {
-        if (hasAdjustedRtime(z)) {
-            z <- dropAdjustedRtime(z)
-            warning("Adjusted retention times found, had to drop them.")
-        }
-        if (hasFeatures(z)) {
-            z <- dropFeatureDefinitions(z)
-            warning("Feature definitions found, had to drop them.")
-        }
-        z
-    })
-    ## Combine peaks
-    fls <- lapply(x, fileNames)
-    startidx <- cumsum(lengths(fls))
-    pks <- lapply(x, chromPeaks)
-    procH <- lapply(x, processHistory)
-    for (i in 2:length(fls)) {
-        pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1]
-        procH[[i]] <- lapply(procH[[i]], function(z) {
-            z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1])
-            z
-            })
-    }
-    pks <- do.call(rbind, pks)
-    new_x@.processHistory <- unlist(procH)
-    chromPeaks(new_x) <- pks
-    if (validObject(new_x))
-        new_x
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-.concatenate_OnDiskMSnExp <- function(...) {
-    x <- list(...)
-    if (length(x) == 0)
-        return(NULL)
-    if (length(x) == 1)
-        return(x[[1]])
-    ## Check that all are XCMSnExp objects.
-    if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp")))))
-        stop("All passed objects should be 'OnDiskMSnExp' objects")
-    ## Check processingQueue
-    procQ <- lapply(x, function(z) z@spectraProcessingQueue)
-    new_procQ <- procQ[[1]]
-    is_ok <- unlist(lapply(procQ, function(z)
-        !is.character(all.equal(new_procQ, z))
-        ))
-    if (any(!is_ok)) {
-        warning("Processing queues from the submitted objects differ! ",
-                "Dropping the processing queue.")
-        new_procQ <- list()
-    }
-    ## processingData
-    fls <- lapply(x, function(z) z@processingData@files)
-    startidx <- cumsum(lengths(fls))
-    ## featureData
-    featd <- lapply(x, fData)
-    ## Have to update the file index and the spectrum names.
-    for (i in 2:length(featd)) {
-        featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1]
-        rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames(
-                                              fileIds = featd[[i]]$fileIdx,
-                                              spectrumIds = featd[[i]]$spIdx,
-                                              nSpectra = nrow(featd[[i]]),
-                                              nFiles = length(unlist(fls))
-                                          )
-    }
-    featd <- do.call(rbind, featd)
-    featd$spectrum <- 1:nrow(featd)
-    ## experimentData
-    expdata <- lapply(x, function(z) {
-        ed <- z@experimentData
-        data.frame(instrumentManufacturer = ed@instrumentManufacturer,
-                   instrumentModel = ed@instrumentModel,
-                   ionSource = ed@ionSource,
-                   analyser = ed@analyser,
-                   detectorType = ed@detectorType,
-                   stringsAsFactors = FALSE)
-    })
-    expdata <- do.call(rbind, expdata)
-    expdata <- new("MIAPE",
-                   instrumentManufacturer = expdata$instrumentManufacturer,
-                   instrumentModel = expdata$instrumentModel,
-                   ionSource = expdata$ionSource,
-                   analyser = expdata$analyser,
-                   detectorType = expdata$detectorType)
-
-    ## protocolData
-    protodata <- lapply(x, function(z) z@protocolData)
-    if (any(unlist(lapply(protodata, nrow)) > 0))
-        warning("Found non-empty protocol data, but merging protocol data is",
-                " currently not supported. Skipped.")
-    ## phenoData
-    pdata <- do.call(rbind, lapply(x, pData))
-    res <- new(
-        "OnDiskMSnExp",
-        phenoData = new("NAnnotatedDataFrame", data = pdata),
-        featureData = new("AnnotatedDataFrame", featd),
-        processingData = new("MSnProcess",
-                             processing = paste0("Concatenated [", date(), "]"),
-                             files = unlist(fls), smoothed = NA),
-        experimentData = expdata,
-        spectraProcessingQueue = new_procQ)
-    if (validObject(res))
-        res
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-c.XCMSnExp <- function(...) {
-    .concatenate_XCMSnExp(...)
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-c.MSnbase <- function(...) {
-    .concatenate_OnDiskMSnExp(...)
-}
--- a/macros.xml	Wed Sep 05 05:59:21 2018 -0400
+++ b/macros.xml	Mon Sep 17 08:46:01 2018 -0400
@@ -31,7 +31,7 @@
 
 
     <token name="@INPUT_IMAGE_LABEL@">RData file</token>
-    <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token>
+    <token name="@INPUT_IMAGE_HELP@">It contains a xcms3::XCMSnExp object (named xdata)</token>
 
 
     <!-- MISC -->
--- a/macros_xcms.xml	Wed Sep 05 05:59:21 2018 -0400
+++ b/macros_xcms.xml	Mon Sep 17 08:46:01 2018 -0400
@@ -29,7 +29,7 @@
     <xml name="input_file_load">
         <section name="file_load_section" title="Resubmit your raw dataset or your zip file">
             <conditional name="file_load_conditional">
-                <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." >
+                <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message saying that your original dataset or zip file have been deleted on the server." >
                     <option value="no" >no need</option>
                     <option value="yes" >yes</option>
                 </param>
@@ -85,6 +85,7 @@
             numDigitsMZ $peaklist.numDigitsMZ
             numDigitsRT $peaklist.numDigitsRT
             intval $peaklist.intval
+            naTOzero $peaklist.naTOzero
         #end if
     </token>
 
@@ -100,6 +101,7 @@
                   <option value="maxo">maxo</option>
                   <option value="intb">intb</option>
               </param>
+              <param name="naTOzero" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Replace the remain NA by 0 in the dataMatrix" help="Rather mandatory for some downstream statistical steps"/>
             </when>
             <when value="false" />
         </conditional>
@@ -225,7 +227,7 @@
 
     <token name="@HELP_XCMS_MANUAL@">
 
-For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_
+For details and explanations concerning all the parameters and workflow of xcms_ package, see its manual_ and this example_
 
 .. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html
 .. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf
--- a/xcms_retcor.r	Wed Sep 05 05:59:21 2018 -0400
+++ b/xcms_retcor.r	Mon Sep 17 08:46:01 2018 -0400
@@ -12,6 +12,7 @@
 #Import the different functions
 source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
 source_local("lib.r")
+source_local("lib-xcms3.x.x.r")
 
 pkgs <- c("xcms","batch","RColorBrewer")
 loadAndDisplayPackages(pkgs)