Mercurial > repos > lecorguille > msnbase_readmsdata

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib-xcms3.x.x.r	Mon Sep 17 08:42:17 2018 -0400
@@ -0,0 +1,152 @@
+
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/250
+groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) {
+    mzfmt <- paste("%.", mzdec, "f", sep = "")
+    rtfmt <- paste("%.", rtdec, "f", sep = "")
+
+    gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T",
+                    sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "")
+
+    if (any(dup <- duplicated(gnames)))
+        for (dupname in unique(gnames[dup])) {
+            dupidx <- which(gnames == dupname)
+            gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_")
+        }
+
+    return (gnames)
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+.concatenate_XCMSnExp <- function(...) {
+    x <- list(...)
+    if (length(x) == 0)
+        return(NULL)
+    if (length(x) == 1)
+        return(x[[1]])
+    ## Check that all are XCMSnExp objects.
+    if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp")))))
+        stop("All passed objects should be 'XCMSnExp' objects")
+    new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp")
+    ## If any of the XCMSnExp has alignment results or detected features drop
+    ## them!
+    x <- lapply(x, function(z) {
+        if (hasAdjustedRtime(z)) {
+            z <- dropAdjustedRtime(z)
+            warning("Adjusted retention times found, had to drop them.")
+        }
+        if (hasFeatures(z)) {
+            z <- dropFeatureDefinitions(z)
+            warning("Feature definitions found, had to drop them.")
+        }
+        z
+    })
+    ## Combine peaks
+    fls <- lapply(x, fileNames)
+    startidx <- cumsum(lengths(fls))
+    pks <- lapply(x, chromPeaks)
+    procH <- lapply(x, processHistory)
+    for (i in 2:length(fls)) {
+        pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1]
+        procH[[i]] <- lapply(procH[[i]], function(z) {
+            z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1])
+            z
+            })
+    }
+    pks <- do.call(rbind, pks)
+    new_x@.processHistory <- unlist(procH)
+    chromPeaks(new_x) <- pks
+    if (validObject(new_x))
+        new_x
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+.concatenate_OnDiskMSnExp <- function(...) {
+    x <- list(...)
+    if (length(x) == 0)
+        return(NULL)
+    if (length(x) == 1)
+        return(x[[1]])
+    ## Check that all are XCMSnExp objects.
+    if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp")))))
+        stop("All passed objects should be 'OnDiskMSnExp' objects")
+    ## Check processingQueue
+    procQ <- lapply(x, function(z) z@spectraProcessingQueue)
+    new_procQ <- procQ[[1]]
+    is_ok <- unlist(lapply(procQ, function(z)
+        !is.character(all.equal(new_procQ, z))
+        ))
+    if (any(!is_ok)) {
+        warning("Processing queues from the submitted objects differ! ",
+                "Dropping the processing queue.")
+        new_procQ <- list()
+    }
+    ## processingData
+    fls <- lapply(x, function(z) z@processingData@files)
+    startidx <- cumsum(lengths(fls))
+    ## featureData
+    featd <- lapply(x, fData)
+    ## Have to update the file index and the spectrum names.
+    for (i in 2:length(featd)) {
+        featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1]
+        rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames(
+                                              fileIds = featd[[i]]$fileIdx,
+                                              spectrumIds = featd[[i]]$spIdx,
+                                              nSpectra = nrow(featd[[i]]),
+                                              nFiles = length(unlist(fls))
+                                          )
+    }
+    featd <- do.call(rbind, featd)
+    featd$spectrum <- 1:nrow(featd)
+    ## experimentData
+    expdata <- lapply(x, function(z) {
+        ed <- z@experimentData
+        data.frame(instrumentManufacturer = ed@instrumentManufacturer,
+                   instrumentModel = ed@instrumentModel,
+                   ionSource = ed@ionSource,
+                   analyser = ed@analyser,
+                   detectorType = ed@detectorType,
+                   stringsAsFactors = FALSE)
+    })
+    expdata <- do.call(rbind, expdata)
+    expdata <- new("MIAPE",
+                   instrumentManufacturer = expdata$instrumentManufacturer,
+                   instrumentModel = expdata$instrumentModel,
+                   ionSource = expdata$ionSource,
+                   analyser = expdata$analyser,
+                   detectorType = expdata$detectorType)
+
+    ## protocolData
+    protodata <- lapply(x, function(z) z@protocolData)
+    if (any(unlist(lapply(protodata, nrow)) > 0))
+        warning("Found non-empty protocol data, but merging protocol data is",
+                " currently not supported. Skipped.")
+    ## phenoData
+    pdata <- do.call(rbind, lapply(x, pData))
+    res <- new(
+        "OnDiskMSnExp",
+        phenoData = new("NAnnotatedDataFrame", data = pdata),
+        featureData = new("AnnotatedDataFrame", featd),
+        processingData = new("MSnProcess",
+                             processing = paste0("Concatenated [", date(), "]"),
+                             files = unlist(fls), smoothed = NA),
+        experimentData = expdata,
+        spectraProcessingQueue = new_procQ)
+    if (validObject(res))
+        res
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+c.XCMSnExp <- function(...) {
+    .concatenate_XCMSnExp(...)
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+c.MSnbase <- function(...) {
+    .concatenate_OnDiskMSnExp(...)
+}
--- a/lib.r	Wed Sep 05 05:56:03 2018 -0400
+++ b/lib.r	Mon Sep 17 08:42:17 2018 -0400
@@ -134,6 +134,15 @@
 }

 #@author G. Le Corguille
+# This function convert the remain NA to 0 in the dataMatrix
+naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
+    if (naTOzero){
+        dataMatrix[is.na(dataMatrix)] <- 0
+    }
+    return (dataMatrix)
+}
+
+#@author G. Le Corguille
 # Draw the plotChromPeakDensity 3 per page in a pdf file
 getPlotChromPeakDensity <- function(xdata, mzdigit=4) {
     pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
@@ -177,7 +186,7 @@

 #@author G. Le Corguille
 # value: intensity values to be used into, maxo or intb
-getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) {
+getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput) {
     dataMatrix <- featureValues(xdata, method="medret", value=intval)
     colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix))
     dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix)
@@ -187,6 +196,7 @@

     variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
     variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
+    dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)

     write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
     write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
@@ -498,155 +508,3 @@
         return (xset)
     }
 }
-
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/250
-groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) {
-    mzfmt <- paste("%.", mzdec, "f", sep = "")
-    rtfmt <- paste("%.", rtdec, "f", sep = "")
-
-    gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T",
-                    sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "")
-
-    if (any(dup <- duplicated(gnames)))
-        for (dupname in unique(gnames[dup])) {
-            dupidx <- which(gnames == dupname)
-            gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_")
-        }
-
-    return (gnames)
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-.concatenate_XCMSnExp <- function(...) {
-    x <- list(...)
-    if (length(x) == 0)
-        return(NULL)
-    if (length(x) == 1)
-        return(x[[1]])
-    ## Check that all are XCMSnExp objects.
-    if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp")))))
-        stop("All passed objects should be 'XCMSnExp' objects")
-    new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp")
-    ## If any of the XCMSnExp has alignment results or detected features drop
-    ## them!
-    x <- lapply(x, function(z) {
-        if (hasAdjustedRtime(z)) {
-            z <- dropAdjustedRtime(z)
-            warning("Adjusted retention times found, had to drop them.")
-        }
-        if (hasFeatures(z)) {
-            z <- dropFeatureDefinitions(z)
-            warning("Feature definitions found, had to drop them.")
-        }
-        z
-    })
-    ## Combine peaks
-    fls <- lapply(x, fileNames)
-    startidx <- cumsum(lengths(fls))
-    pks <- lapply(x, chromPeaks)
-    procH <- lapply(x, processHistory)
-    for (i in 2:length(fls)) {
-        pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1]
-        procH[[i]] <- lapply(procH[[i]], function(z) {
-            z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1])
-            z
-            })
-    }
-    pks <- do.call(rbind, pks)
-    new_x@.processHistory <- unlist(procH)
-    chromPeaks(new_x) <- pks
-    if (validObject(new_x))
-        new_x
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-.concatenate_OnDiskMSnExp <- function(...) {
-    x <- list(...)
-    if (length(x) == 0)
-        return(NULL)
-    if (length(x) == 1)
-        return(x[[1]])
-    ## Check that all are XCMSnExp objects.
-    if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp")))))
-        stop("All passed objects should be 'OnDiskMSnExp' objects")
-    ## Check processingQueue
-    procQ <- lapply(x, function(z) z@spectraProcessingQueue)
-    new_procQ <- procQ[[1]]
-    is_ok <- unlist(lapply(procQ, function(z)
-        !is.character(all.equal(new_procQ, z))
-        ))
-    if (any(!is_ok)) {
-        warning("Processing queues from the submitted objects differ! ",
-                "Dropping the processing queue.")
-        new_procQ <- list()
-    }
-    ## processingData
-    fls <- lapply(x, function(z) z@processingData@files)
-    startidx <- cumsum(lengths(fls))
-    ## featureData
-    featd <- lapply(x, fData)
-    ## Have to update the file index and the spectrum names.
-    for (i in 2:length(featd)) {
-        featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1]
-        rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames(
-                                              fileIds = featd[[i]]$fileIdx,
-                                              spectrumIds = featd[[i]]$spIdx,
-                                              nSpectra = nrow(featd[[i]]),
-                                              nFiles = length(unlist(fls))
-                                          )
-    }
-    featd <- do.call(rbind, featd)
-    featd$spectrum <- 1:nrow(featd)
-    ## experimentData
-    expdata <- lapply(x, function(z) {
-        ed <- z@experimentData
-        data.frame(instrumentManufacturer = ed@instrumentManufacturer,
-                   instrumentModel = ed@instrumentModel,
-                   ionSource = ed@ionSource,
-                   analyser = ed@analyser,
-                   detectorType = ed@detectorType,
-                   stringsAsFactors = FALSE)
-    })
-    expdata <- do.call(rbind, expdata)
-    expdata <- new("MIAPE",
-                   instrumentManufacturer = expdata$instrumentManufacturer,
-                   instrumentModel = expdata$instrumentModel,
-                   ionSource = expdata$ionSource,
-                   analyser = expdata$analyser,
-                   detectorType = expdata$detectorType)
-
-    ## protocolData
-    protodata <- lapply(x, function(z) z@protocolData)
-    if (any(unlist(lapply(protodata, nrow)) > 0))
-        warning("Found non-empty protocol data, but merging protocol data is",
-                " currently not supported. Skipped.")
-    ## phenoData
-    pdata <- do.call(rbind, lapply(x, pData))
-    res <- new(
-        "OnDiskMSnExp",
-        phenoData = new("NAnnotatedDataFrame", data = pdata),
-        featureData = new("AnnotatedDataFrame", featd),
-        processingData = new("MSnProcess",
-                             processing = paste0("Concatenated [", date(), "]"),
-                             files = unlist(fls), smoothed = NA),
-        experimentData = expdata,
-        spectraProcessingQueue = new_procQ)
-    if (validObject(res))
-        res
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-c.XCMSnExp <- function(...) {
-    .concatenate_XCMSnExp(...)
-}
-
-#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
-# https://github.com/sneumann/xcms/issues/247
-c.MSnbase <- function(...) {
-    .concatenate_OnDiskMSnExp(...)
-}
--- a/macros.xml	Wed Sep 05 05:56:03 2018 -0400
+++ b/macros.xml	Mon Sep 17 08:42:17 2018 -0400
@@ -31,7 +31,7 @@


     <token name="@INPUT_IMAGE_LABEL@">RData file</token>
-    <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token>
+    <token name="@INPUT_IMAGE_HELP@">It contains a xcms3::XCMSnExp object (named xdata)</token>


     <!-- MISC -->
--- a/msnbase_readmsdata.r	Wed Sep 05 05:56:03 2018 -0400
+++ b/msnbase_readmsdata.r	Mon Sep 17 08:42:17 2018 -0400
@@ -12,6 +12,7 @@
 #Import the different functions
 source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
 source_local("lib.r")
+source_local("lib-xcms3.x.x.r")

 pkgs <- c("MSnbase","batch")
 loadAndDisplayPackages(pkgs)
--- a/msnbase_readmsdata.xml	Wed Sep 05 05:56:03 2018 -0400
+++ b/msnbase_readmsdata.xml	Mon Sep 17 08:42:17 2018 -0400
@@ -23,7 +23,7 @@

     <inputs>

-        <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." />
+        <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" label="File(s) from your history containing your chromatograms" help="Single file mode for the following formats: mzxml, mzml, mzdata and netcdf. Zip file mode for the following formats: no_unzip.zip, zip. See the help section below." />

     </inputs>

@@ -132,7 +132,7 @@
 xcms.findChromPeaks         ``*``.raw.RData      rdata.msnbase.raw
 =========================== ==================== ====================

-
+

 **Example of a metabolomic workflow**

@@ -158,7 +158,7 @@

 You have two methods for your inputs:

-    | Single file (recommended): You can put a single file as input. That way, you will be able to launch several xcmsSet in parallel and use "xcms.xcmsSet Merger" before "xcms.group"
+    | Single file (recommended): You can put a single file as input. That way, you will be able to launch several readMSData and findChromPeaks in parallel and use "findChromPeaks Merger" before groupChromPeaks.
     | Zip file: You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories).

 Zip file: Steps for creating the zip file
@@ -167,7 +167,7 @@
 **Step1: Creating your directory and hierarchize the subdirectories**


-VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip_ software, otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug).
+VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip_ software, otherwise your zip will not be well unzipped on the W4M platform (corrupted zip bug).

 .. _7Zip: http://www.7-zip.org/

@@ -177,22 +177,22 @@

 **Step2: Creating a zip file**

-Create your zip file (e.g.: arabidopsis.zip).
+Create your zip file (*e.g.* arabidopsis.zip).

 **Step 3 : Uploading it to our Galaxy server**

-If your zip file is less than 2Gb, you get use the Get Data tool to upload it.
+If your zip file is less than 2Gb, you can use the Get Data tool to upload it.

 Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO_ on workflow4metabolomics.org.

 .. _HOWTO: http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf

-For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org).
+For more information, do not hesitate to send us an email at supportATworkflow4metabolomics.org.

-Advices for converting your files for the XCMS input
-----------------------------------------------------
+Advices for converting your files into mzXML format (XCMS input)
+----------------------------------------------------------------

-We recommend you to convert your raw files to **mzXML** in centroid mode (smaller files) and the files will be compatible with the xmcs centWave method.
+We recommend you to convert your raw files into **mzXML** in centroid mode (smaller files); this way the files will be compatible with the xmcs centWave algorithm.

 **We recommend you the following parameters:**

@@ -217,13 +217,13 @@

 xset.RData: rdata.msnbase.raw format

-| Rdata file that is necessary in the second step of the workflow "xcms.findChromPeaks".
+    | Rdata file that is necessary in the second step of the workflow "xcms.findChromPeaks".

 sampleMetadata.tsv (only when a zip is used)

-    | Tabular file that contains for each sample, it's associated class and polarity (positive,negative and mixed).
-    | This file is necessary in the Anova and PCA step of the workflow.
-    | You get a sampleMetadata.tsv only if you use a zip. Otherwise, you have to provide one for the findChromPeaks Merger step
+    | Tabular file that contains for each sample its associated class and polarity (positive,negative and mixed).
+    | This file is necessary in further steps of the workflow, as the Anova and PCA steps for example.
+    | You get a sampleMetadata.tsv only if you use a zip. Otherwise, you have to provide one for the findChromPeaks Merger step.

 ---------------------------------------------------