Mercurial > repos > lecorguille > xcms_plot_chromatogram
changeset 0:fe1f0f16d9e6 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit cfb08142b6bfb78002b4e0d7775adb1a58e66c33-dirty
author | lecorguille |
---|---|
date | Thu, 08 Mar 2018 02:48:44 -0500 |
parents | |
children | 86ff4497b871 |
files | lib.r macros.xml repository_dependencies.xml test-data/BPIs.pdf test-data/TICs.pdf test-data/faahKO-single.xset.merged.group.retcor.RData test-data/ko15.CDF test-data/ko16.CDF test-data/wt15.CDF test-data/wt16.CDF xcms_plot_chromatogram.r xcms_plot_chromatogram.xml |
diffstat | 12 files changed, 932 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib.r Thu Mar 08 02:48:44 2018 -0500 @@ -0,0 +1,561 @@ +#@authors ABiMS TEAM, Y. Guitton +# lib.r for Galaxy Workflow4Metabolomics xcms tools + +#@author G. Le Corguille +# solve an issue with batch if arguments are logical TRUE/FALSE +parseCommandArgs <- function(...) { + args <- batch::parseCommandArgs(...) + for (key in names(args)) { + if (args[key] %in% c("TRUE","FALSE")) + args[key] = as.logical(args[key]) + } + return(args) +} + +#@author G. Le Corguille +# This function will +# - load the packages +# - display the sessionInfo +loadAndDisplayPackages <- function(pkgs) { + for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + + sessioninfo = sessionInfo() + cat(sessioninfo$R.version$version.string,"\n") + cat("Main packages:\n") + for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") + cat("Other loaded packages:\n") + for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") +} + +#@author G. Le Corguille +# This function convert if it is required the Retention Time in minutes +RTSecondToMinute <- function(variableMetadata, convertRTMinute) { + if (convertRTMinute){ + #converting the retention times (seconds) into minutes + print("converting the retention times into minutes in the variableMetadata") + variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 + variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 + variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 + } + return (variableMetadata) +} + +#@author G. Le Corguille +# This function format ions identifiers +formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { + splitDeco <- strsplit(as.character(variableMetadata$name),"_") + idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) + namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) + variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) + return(variableMetadata) +} + +#@author G. Le Corguille +# Draw the plotChromPeakDensity 3 per page in a pdf file +getPlotChromPeakDensity <- function(xdata, mzdigit=4) { + pdf(file="plotChromPeakDensity.pdf", width=16, height=12) + + par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) + + group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + names(group_colors) <- unique(xdata$sample_group) + + xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) + for (i in 1:nrow(featureDefinitions(xdata))) { + mzmin = featureDefinitions(xdata)[i,]$mzmin + mzmax = featureDefinitions(xdata)[i,]$mzmax + plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + } + + dev.off() +} + +#@author G. Le Corguille +# Draw the plotChromPeakDensity 3 per page in a pdf file +getPlotAdjustedRtime <- function(xdata) { + + pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) + + # Color by group + group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + names(group_colors) <- unique(xdata$sample_group) + plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + + # Color by sample + plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) + legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) + + dev.off() +} + +#@author G. Le Corguille +# value: intensity values to be used into, maxo or intb +getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) { + dataMatrix <- featureValues(xdata, method="medret", value=intval) + colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix)) + dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix) + variableMetadata <- featureDefinitions(xdata) + colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" + variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata) + + variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) + variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) + + write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) + write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) + +} + +#@author G. Le Corguille +getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { + + chrom <- chromatogram(xdata, aggregationFun = aggregationFun) + if (aggregationFun == "sum") + type="Total Ion Chromatograms" + else + type="Base Peak Intensity Chromatograms" + + adjusted="Raw" + if (hasAdjustedRtime(xdata)) + adjusted="Adjusted" + + main <- paste(type,":",adjusted,"data") + + pdf(pdfname, width=16, height=10) + + # Color by group + group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + names(group_colors) <- unique(xdata$sample_group) + plot(chrom, col = group_colors[chrom$sample_group], main=main) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + + # Color by sample + plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) + legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) + + dev.off() +} + +#@author G. Le Corguille +getPlotTICs <- function(xdata, pdfname="TICs.pdf") { + getPlotChromatogram(xdata, pdfname, aggregationFun = "sum") +} + +#@author G. Le Corguille +getPlotBPIs <- function(xdata, pdfname="BPIs.pdf") { + getPlotChromatogram(xdata, pdfname, aggregationFun = "max") +} + + +# Get the polarities from all the samples of a condition +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { + cat("Creating the sampleMetadata file...\n") + + #Create the sampleMetada dataframe + sampleMetadata <- xdata@phenoData@data + rownames(sampleMetadata) <- NULL + colnames(sampleMetadata) <- c("sampleMetadata", "class") + + sampleNamesOrigin <- sampleMetadata$sampleMetadata + sampleNamesMakeNames <- make.names(sampleNamesOrigin) + + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } + + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } + } + + sampleMetadata$sampleMetadata <- sampleNamesMakeNames + + + #For each sample file, the following actions are done + for (fileIdx in 1:length(fileNames(xdata))) { + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(fileNames(xdata))) { + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA + + #Extract the polarity (a list of polarities) + polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] + #Verify if all the scans have the same polarity + uniq_list <- unique(polarity) + if (length(uniq_list)>1){ + polarity <- "mixed" + } else { + polarity <- as.character(uniq_list) + } + + #Set the polarity attribute + sampleMetadata$polarity[fileIdx] <- polarity + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) + +} + + +# This function check if xcms will found all the files +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +checkFilesCompatibilityWithXcms <- function(directory) { + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) + + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) + filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } +} + + +#This function list the compatible files within the directory as xcms did +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +getMSFiles <- function (directory) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + return(files) +} + +# This function check if XML contains special caracters. It also checks integrity and completness. +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +checkXmlStructure <- function (directory) { + cat("Checking XML structure...\n") + + cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture <- system(cmd, intern=TRUE) + + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } + +} + + +# This function check if XML contain special characters +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +deleteXmlBadCharacters<- function (directory) { + cat("Checking Non ASCII characters in the XML...\n") + + processed <- F + l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) + for (i in l){ + cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") + capture <- suppressWarnings(system(cmd, intern=TRUE)) + if (length(capture)>0){ + cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c <- system(cmd, intern=TRUE) + capture <- "" + processed <- T + } + } + if (processed) cat("\n\n") + return(processed) +} + + +# This function will compute MD5 checksum to check the data integrity +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getMd5sum <- function (directory) { + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + + library(tools) + + #cat("\n\n") + + return(as.matrix(md5sum(files))) +} + + +# This function get the raw file path from the arguments +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getRawfilePathFromArguments <- function(singlefile, zipfile, args) { + if (!is.null(args$zipfile)) zipfile <- args$zipfile + if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive + if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative + + if (!is.null(args$singlefile_galaxyPath)) { + singlefile_galaxyPaths <- args$singlefile_galaxyPath; + singlefile_sampleNames <- args$singlefile_sampleName + } + if (!is.null(args$singlefile_galaxyPathPositive)) { + singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive; + singlefile_sampleNames <- args$singlefile_sampleNamePositive + } + if (!is.null(args$singlefile_galaxyPathNegative)) { + singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative; + singlefile_sampleNames <- args$singlefile_sampleNameNegative + } + if (exists("singlefile_galaxyPaths")){ + singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) + singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) + + singlefile <- NULL + for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { + singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] + singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] + singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath + } + } + for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { + args[[argument]] <- NULL + } + return(list(zipfile=zipfile, singlefile=singlefile, args=args)) +} + + +# This function retrieve the raw file in the working directory +# - if zipfile: unzip the file with its directory tree +# - if singlefiles: set symlink with the good filename +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { + if(!is.null(singlefile) && (length("singlefile")>0)) { + for (singlefile_sampleName in names(singlefile)) { + singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] + if(!file.exists(singlefile_galaxyPath)){ + error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") + print(error_message); stop(error_message) + } + + if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) + file.copy(singlefile_galaxyPath, singlefile_sampleName) + + } + directory <- "." + + } + if(!is.null(zipfile) && (zipfile != "")) { + if(!file.exists(zipfile)){ + error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } + + #list all file in the zip file + #zip_files <- unzip(zipfile,list=T)[,"Name"] + + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) + + #get the directory name + suppressWarnings(filesInZip <- unzip(zipfile, list=T)) + directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) + directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory <- "." + if (length(directories) == 1) directory <- directories + + cat("files_root_directory\t",directory,"\n") + + } + return (directory) +} + + +# This function retrieve a xset like object +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getxcmsSetObject <- function(xobject) { + # XCMS 1.x + if (class(xobject) == "xcmsSet") + return (xobject) + # XCMS 3.x + if (class(xobject) == "XCMSnExp") { + # Get the legacy xcmsSet object + suppressWarnings(xset <- as(xobject, 'xcmsSet')) + sampclass(xset) <- xset@phenoData$sample_group + return (xset) + } +} + + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/250 +groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) { + mzfmt <- paste("%.", mzdec, "f", sep = "") + rtfmt <- paste("%.", rtdec, "f", sep = "") + + gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T", + sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "") + + if (any(dup <- duplicated(gnames))) + for (dupname in unique(gnames[dup])) { + dupidx <- which(gnames == dupname) + gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_") + } + + return (gnames) +} + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +.concatenate_XCMSnExp <- function(...) { + x <- list(...) + if (length(x) == 0) + return(NULL) + if (length(x) == 1) + return(x[[1]]) + ## Check that all are XCMSnExp objects. + if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp"))))) + stop("All passed objects should be 'XCMSnExp' objects") + new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp") + ## If any of the XCMSnExp has alignment results or detected features drop + ## them! + x <- lapply(x, function(z) { + if (hasAdjustedRtime(z)) { + z <- dropAdjustedRtime(z) + warning("Adjusted retention times found, had to drop them.") + } + if (hasFeatures(z)) { + z <- dropFeatureDefinitions(z) + warning("Feature definitions found, had to drop them.") + } + z + }) + ## Combine peaks + fls <- lapply(x, fileNames) + startidx <- cumsum(lengths(fls)) + pks <- lapply(x, chromPeaks) + procH <- lapply(x, processHistory) + for (i in 2:length(fls)) { + pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1] + procH[[i]] <- lapply(procH[[i]], function(z) { + z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1]) + z + }) + } + pks <- do.call(rbind, pks) + new_x@.processHistory <- unlist(procH) + chromPeaks(new_x) <- pks + if (validObject(new_x)) + new_x +} + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +.concatenate_OnDiskMSnExp <- function(...) { + x <- list(...) + if (length(x) == 0) + return(NULL) + if (length(x) == 1) + return(x[[1]]) + ## Check that all are XCMSnExp objects. + if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp"))))) + stop("All passed objects should be 'OnDiskMSnExp' objects") + ## Check processingQueue + procQ <- lapply(x, function(z) z@spectraProcessingQueue) + new_procQ <- procQ[[1]] + is_ok <- unlist(lapply(procQ, function(z) + !is.character(all.equal(new_procQ, z)) + )) + if (any(!is_ok)) { + warning("Processing queues from the submitted objects differ! ", + "Dropping the processing queue.") + new_procQ <- list() + } + ## processingData + fls <- lapply(x, function(z) z@processingData@files) + startidx <- cumsum(lengths(fls)) + ## featureData + featd <- lapply(x, fData) + ## Have to update the file index and the spectrum names. + for (i in 2:length(featd)) { + featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1] + rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames( + fileIds = featd[[i]]$fileIdx, + spectrumIds = featd[[i]]$spIdx, + nSpectra = nrow(featd[[i]]), + nFiles = length(unlist(fls)) + ) + } + featd <- do.call(rbind, featd) + featd$spectrum <- 1:nrow(featd) + ## experimentData + expdata <- lapply(x, function(z) { + ed <- z@experimentData + data.frame(instrumentManufacturer = ed@instrumentManufacturer, + instrumentModel = ed@instrumentModel, + ionSource = ed@ionSource, + analyser = ed@analyser, + detectorType = ed@detectorType, + stringsAsFactors = FALSE) + }) + expdata <- do.call(rbind, expdata) + expdata <- new("MIAPE", + instrumentManufacturer = expdata$instrumentManufacturer, + instrumentModel = expdata$instrumentModel, + ionSource = expdata$ionSource, + analyser = expdata$analyser, + detectorType = expdata$detectorType) + + ## protocolData + protodata <- lapply(x, function(z) z@protocolData) + if (any(unlist(lapply(protodata, nrow)) > 0)) + warning("Found non-empty protocol data, but merging protocol data is", + " currently not supported. Skipped.") + ## phenoData + pdata <- do.call(rbind, lapply(x, pData)) + res <- new( + "OnDiskMSnExp", + phenoData = new("NAnnotatedDataFrame", data = pdata), + featureData = new("AnnotatedDataFrame", featd), + processingData = new("MSnProcess", + processing = paste0("Concatenated [", date(), "]"), + files = unlist(fls), smoothed = NA), + experimentData = expdata, + spectraProcessingQueue = new_procQ) + if (validObject(res)) + res +} + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +c.XCMSnExp <- function(...) { + .concatenate_XCMSnExp(...) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Mar 08 02:48:44 2018 -0500 @@ -0,0 +1,191 @@ +<?xml version="1.0"?> +<macros> + <token name="@WRAPPER_VERSION@">3.0.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> + <requirement type="package" version="6.0">unzip</requirement> + <yield /> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1" level="fatal" /> + </stdio> + </xml> + + <token name="@COMMAND_XCMS_SCRIPT@">LC_ALL=C Rscript $__tool_directory__/</token> + + <token name="@COMMAND_LOG_EXIT@"> + ; + return=\$?; + cat 'log.txt'; + sh -c "exit \$return" + </token> + + <xml name="input_validator_range_integer"> + <validator type="regex" message="The format is 'min,max'" >[0-9]+ *, *[0-9]+</validator> + </xml> + + <xml name="input_validator_range_float"> + <validator type="regex" message="The format is 'min,max'" >[0-9]+\.?[0-9]* *, *[0-9]+\.?[0-9]*</validator> + </xml> + + <xml name="input_validator_list_integer"> + <validator type="regex" message="The format is '1,2,4,6'" >[0-9, ]+</validator> + </xml> + + <token name="@INPUT_IMAGE_LABEL@">RData file</token> + <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token> + + <!-- zipfile load for planemo test --> + + <token name="@COMMAND_FILE_LOAD@"> + #if $file_load_section.file_load_conditional.file_load_select == "yes": + #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): + #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) + #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) + + singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' + #else + zipfile '$file_load_section.file_load_conditional.input' + #end if + #end if + </token> + + <xml name="input_file_load"> + <section name="file_load_section" title="Resubmit your raw dataset or your zip file"> + <conditional name="file_load_conditional"> + <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > + <option value="no" >no need</option> + <option value="yes" >yes</option> + </param> + <when value="no"> + </when> + <when value="yes"> + <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" multiple="true" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> + </when> + </conditional> + </section> + </xml> + + <xml name="test_file_load_zip"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="faahKO_reduce.zip" ftype="zip" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_zip_sacuri"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="sacuri_dir_root.zip" ftype="zip" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_single"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="wt15.CDF,ko16.CDF,ko15.CDF,wt16.CDF" ftype="netcdf" /> + </conditional> + </section> + </xml> + + <token name="@COMMAND_PEAKLIST@"> + #if $peaklist.peaklistBool + convertRTMinute $peaklist.convertRTMinute + numDigitsMZ $peaklist.numDigitsMZ + numDigitsRT $peaklist.numDigitsRT + intval $peaklist.intval + #end if + </token> + + <xml name="input_peaklist"> + <conditional name="peaklist"> + <param name="peaklistBool" type="boolean" label="Get a Peak List" /> + <when value="true"> + <param name="convertRTMinute" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Convert retention time (seconds) into minutes" help="Convert the columns rtmed, rtmin and rtmax into minutes"/> + <param name="numDigitsMZ" type="integer" value="4" label="Number of decimal places for mass values reported in ions' identifiers." help="A minimum of 4 decimal places is recommended. Useful to avoid duplicates within identifiers" /> + <param name="numDigitsRT" type="integer" value="0" label="Number of decimal places for retention time values reported in ions' identifiers." help="Useful to avoid duplicates within identifiers" /> + <param name="intval" type="select" label="Reported intensity values" help="[intval] See the help section below"> + <option value="into" selected="true">into</option> + <option value="maxo">maxo</option> + <option value="intb">intb</option> + </param> + </when> + <when value="false" /> + </conditional> + </xml> + + <xml name="output_peaklist" token_function=""> + <data name="variableMetadata" format="tabular" label="${image.name[:-6]}.@FUNCTION@.variableMetadata.tsv" from_work_dir="variableMetadata.tsv" > + <filter>(peaklist['peaklistBool'])</filter> + </data> + <data name="dataMatrix" format="tabular" label="${image.name[:-6]}.@FUNCTION@.dataMatrix.tsv" from_work_dir="dataMatrix.tsv" > + <filter>(peaklist['peaklistBool'])</filter> + </data> + </xml> + + <token name="@HELP_AUTHORS@"> +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +.. class:: infomark + +**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] + + | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. + +--------------------------------------------------- + + </token> + + <token name="@HELP_XCMS_MANUAL@"> + +For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ + +.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html +.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf +.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html + + </token> + + <token name="@HELP_PEAKLIST@"> + +Get a Peak List +--------------- + +If 'true', the module generates two additional files corresponding to the peak list: +- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) +- the data matrix (corresponding to related intensities) + +**decimal places for [mass or retention time] values in identifiers** + + | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. + | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. + | Theses parameters do not affect decimal places in columns other than the identifier one. + +**Reported intensity values** + + | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: + | - into: integrated area of original (raw) peak + | - maxo: maximum intensity of original (raw) peak + | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) + + </token> + + <xml name="citation"> + <citations> + <citation type="doi">10.1021/ac051437y</citation> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Thu Mar 08 02:48:44 2018 -0500 @@ -0,0 +1,5 @@ +<?xml version="1.0"?> +<repositories> + <repository changeset_revision="91815b6d07fe" name="no_unzip_datatype" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="bff835d58914" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_plot_chromatogram.r Thu Mar 08 02:48:44 2018 -0500 @@ -0,0 +1,82 @@ +#!/usr/bin/env Rscript + +# ----- LOG FILE ----- +log_file=file("log.txt", open = "wt") +sink(log_file) +sink(log_file, type = "output") + + +# ----- PACKAGE ----- +cat("\tSESSION INFO\n") + +#Import the different functions +source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } +source_local("lib.r") + +pkgs <- c("xcms","batch","RColorBrewer") +loadAndDisplayPackages(pkgs) +cat("\n\n"); + + +# ----- ARGUMENTS ----- +cat("\tARGUMENTS INFO\n") +args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +write.table(as.matrix(args), col.names=F, quote=F, sep='\t') + +cat("\n\n") + +# ----- PROCESSING INFILE ----- +cat("\tARGUMENTS PROCESSING INFO\n") + +#saving the specific parameters +method <- args$method; args$method <- NULL + +cat("\n\n") + + +# ----- ARGUMENTS PROCESSING ----- +cat("\tINFILE PROCESSING INFO\n") + +#image is an .RData file necessary to use xset variable given by previous tools +load(args$image); args$image=NULL +if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") + +# Handle infiles +if (!exists("singlefile")) singlefile <- NULL +if (!exists("zipfile")) zipfile <- NULL +rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) +zipfile <- rawFilePath$zipfile +singlefile <- rawFilePath$singlefile +args <- rawFilePath$args +directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) + + +cat("\n\n") + + +# ----- MAIN PROCESSING INFO ----- +cat("\tMAIN PROCESSING INFO\n") + + +cat("\t\tDRAW GRAPHICS\n") + +#@TODO: one day, use xdata instead of xset to draw the TICs and BPC or a complete other method +getPlotTICs(xdata, pdfname="TICs.pdf") +getPlotBPIs(xdata, pdfname="BPIs.pdf") + +cat("\n\n") + +# ----- EXPORT ----- + +cat("\tXCMSnExp OBJECT INFO\n") +print(xdata) +cat("\n\n") + +cat("\txcmsSet OBJECT INFO\n") +# Get the legacy xcmsSet object +xset <- getxcmsSetObject(xdata) +print(xset) +cat("\n\n") + + +cat("\tDONE\n")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_plot_chromatogram.xml Thu Mar 08 02:48:44 2018 -0500 @@ -0,0 +1,93 @@ +<tool id="xcms_plot_chromatogram" name="xcms plot chromatogram" version="@WRAPPER_VERSION@.0"> + <description>Plot base peak intensity chromatogram (BPI) and total ion chromatogram (TIC) from xcms experience</description> + + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="requirements"/> + <expand macro="stdio"/> + + <command><![CDATA[ + @COMMAND_XCMS_SCRIPT@/xcms_plot_chromatogram.r + + image '$image' + + @COMMAND_FILE_LOAD@ + ]]></command> + + <inputs> + <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata.xcms.fillpeaks,rdata" label="@INPUT_IMAGE_LABEL@" help="@INPUT_IMAGE_HELP@ from: findChromPeaks, groupChromPeaks or adjustRtime" /> + + <expand macro="input_file_load"/> + </inputs> + + <outputs> + <data name="ticsPdf" format="pdf" label="${image.name[:-6]}.TICs.pdf" from_work_dir="TICs.pdf"/> + <data name="bpisPdf" format="pdf" label="${image.name[:-6]}.BPIs.pdf" from_work_dir="BPIs.pdf" /> + </outputs> + + <tests> + <test> + <param name="image" value="faahKO-single.xset.merged.group.retcor.RData"/> + <expand macro="test_file_load_single"/> + <output name="ticsPdf" value="TICs.pdf" ftype="pdf" compare="sim_size" delta="600" /> + <output name="bpisPdf" value="BPIs.pdf" ftype="pdf" compare="sim_size" delta="600" /> + </test> + </tests> + + <help><![CDATA[ + +@HELP_AUTHORS@ + +====================== +xcms plot chromatogram +====================== + +----------- +Description +----------- + +This tool will plot base peak intensity chromatogram (BPI) and total ion chromatogram (TIC) from xcms experience + + +----------------- +Workflow position +----------------- + +**Upstream tools** + +========================= ================= =================== ========== +Name output file format parameter +========================= ================= =================== ========== +xcms.xcmsSet xset.RData rdata.xcms.raw RData file +------------------------- ----------------- ------------------- ---------- +xcms.retcor xset.RData rdata.xcms.retcor RData file +========================= ================= =================== ========== + +.. image:: xcms_plot_chromatogram_workflow.png + + +------------ +Output files +------------ + +**Total Ion Current (TIC) chromatogram** + | Sum of intensity (Y) of all ions detected at each retention time(X) + +**Base Peak Intensity Chromatogram (BPI)** + | Sum of intensity (Y) of the most intense peaks at each retention time(X) + +--------------------------------------------------- + +Changelog/News +-------------- + +**Version 3.0.0.0 - 07/03/2018** + +- NEW: This new tool will plot base peak intensity chromatogram (BPI) and total ion chromatogram (TIC) from xcms experience. It will replace the one created by xcmsSet and retcor tools. + + ]]></help> + + <expand macro="citation" /> +</tool>