# HG changeset patch # User lecorguille # Date 1520506292 18000 # Node ID efd23113d5f45e32b8995a463f64bce40500be53 # Parent c363b9f1caefc0907b4a310e268ae1d8a7ea1eef planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 49203f8a5271fa5e6bb889e907df71ebf7757309 diff -r c363b9f1caef -r efd23113d5f4 README.rst --- a/README.rst Mon Mar 05 04:15:34 2018 -0500 +++ b/README.rst Thu Mar 08 05:51:32 2018 -0500 @@ -22,27 +22,47 @@ Changelog/News -------------- +**Version 3.0.0.0 - 08/03/2018** + +- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed. + +- NEW: a bunch of new options: Spectra Filters (previously scanrange), CentWave.(mzCenterFun, fitgauss, verboseColumns), MatchedFilter.(sigma, impute, baseValue, max), MSW.(verboseColumns), ... + +- UPDATE: since xcms 3.0.0, some options are no more available: scanrange (replace by filters), profmethod, MatchedFilter.step, MatchedFilter.sigma, MSW.winSize.noise, MSW.SNR.method + +- IMPROVEMENT: the advanced options are now in sections. It will allow you to access to all the parameters and to know their default values. + +- IMPROVEMENT: the tool "should" be now more flexible in term of file naming: it "should" accept space and comma. But don't be too imaginative :) + +- CHANGE: removing of the TIC and BPC plots. You can new use the dedicated tool "xcms plot chromatogram" + + **Version 2.1.1 - 29/11/2017** - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C + **Version 2.1.0 - 22/02/2017** - NEW: The W4M tools will be able now to take as input a single file. It will allow to submit in parallel several files and merge them afterward using "xcms.xcmsSet Merger" before "xcms.group". - BUGFIX: the default value of "matchedFilter" -> "Step size to use for profile generation" which was of 0.01 have been changed to fix with the XMCS default values to 0.1 + **Version 2.0.11 - 22/12/2016** - BUGFIX: propose scanrange for all methods + **Version 2.0.10 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph + **Version 2.0.9 - 06/07/2016** -- UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 +- UPGRADE: upgrade the xcms version from 1.44.0 to 1.46.0 + **Version 2.0.8 - 06/04/2016** @@ -59,7 +79,7 @@ - UPDATE: refactoring of internal management of inputs/outputs -- UPDATE: refactoring to feed the new report tool +- TEST: refactoring to feed the new report tool **Version 2.0.2 - 18/01/2016** diff -r c363b9f1caef -r efd23113d5f4 abims_xcms_xcmsSet.xml --- a/abims_xcms_xcmsSet.xml Mon Mar 05 04:15:34 2018 -0500 +++ b/abims_xcms_xcmsSet.xml Thu Mar 08 05:51:32 2018 -0500 @@ -181,8 +181,6 @@ input.extension not in ["mzxml","mzml","mzdata","netcdf"] - - @@ -620,40 +618,53 @@ Changelog/News -------------- -**Version 3.0.0.0 - 14/02/2018** +**Version 3.0.0.0 - 08/03/2018** -- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlining codes and methods +- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed. + +- NEW: a bunch of new options: Spectra Filters (previously scanrange), CentWave.(mzCenterFun, fitgauss, verboseColumns), MatchedFilter.(sigma, impute, baseValue, max), MSW.(verboseColumns), ... -- NEW: a bunch of new options: Spectra Filters, CentWave.mzCenterFun, CentWave.fitgauss, CentWave.verboseColumns, MatchedFilter.sigma +- UPDATE: since xcms 3.0.0, some options are no more available: scanrange (replace by filters), profmethod, MatchedFilter.step, MatchedFilter.sigma, MSW.winSize.noise, MSW.SNR.method + +- IMPROVEMENT: the advanced options are now in sections. It will allow you to access to all the parameters and to know their default values. -- UPDATE: since xcms 3.0.0, some options are no more available: scanrange, profmethod, MatchedFilter.step, MatchedFilter.sigma, MSW.winSize.noise, MSW.SNR.method +- IMPROVEMENT: the tool "should" be now more flexible in term of file naming: it "should" accept space and comma. But don't be too imaginative :) + +- CHANGE: removing of the TIC and BPC plots. You can new use the dedicated tool "xcms plot chromatogram" + **Version 2.1.1 - 29/11/2017** - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C + **Version 2.1.0 - 22/02/2017** - NEW: The W4M tools will be able now to take as input a single file. It will allow to submit in parallel several files and merge them afterward using "xcms.xcmsSet Merger" before "xcms.group". - BUGFIX: the default value of "matchedFilter" -> "Step size to use for profile generation" which was of 0.01 have been changed to fix with the XMCS default values to 0.1 + **Version 2.0.11 - 22/12/2016** - BUGFIX: propose scanrange for all methods + **Version 2.0.10 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph + **Version 2.0.9 - 06/07/2016** - UPGRADE: upgrade the xcms version from 1.44.0 to 1.46.0 + **Version 2.0.8 - 06/04/2016** - TEST: refactoring to pass planemo test using conda dependencies + **Version 2.0.7 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed @@ -666,14 +677,17 @@ - TEST: refactoring to feed the new report tool + **Version 2.0.2 - 18/01/2016** - BUGFIX: Some zip files were tag as "corrupt" by R. We have changed the extraction mode to deal with thoses cases. + **Version 2.0.2 - 09/10/2015** - BUGFIX: Some users reported a bug in xcms.xcmsSet. The preprocessing stops itself and doesn't import the whole dataset contained in the zip file without warning. But meanwhile, please check your samplemetadata dataset and the number of rows. + **Version 2.0.2 - 02/06/2015** - NEW: The W4M workflows will now take as input a zip file to ease the transfer and to improve dataset exchange between tools and users. (See How_to_upload). The previous "Library directory name" is still available but we invite user to switch on the new zip system as soon as possible. diff -r c363b9f1caef -r efd23113d5f4 destinations_input_type.py --- a/destinations_input_type.py Mon Mar 05 04:15:34 2018 -0500 +++ b/destinations_input_type.py Thu Mar 08 05:51:32 2018 -0500 @@ -22,5 +22,4 @@ if input_extension in ["mzxml","mzml","mzdata","netcdf"]: return 'thread1-mem_free8' # zip file - return 'thread9-mem_free8' - + return 'thread8-mem_free16' diff -r c363b9f1caef -r efd23113d5f4 job_conf.xml.sample --- a/job_conf.xml.sample Mon Mar 05 04:15:34 2018 -0500 +++ b/job_conf.xml.sample Thu Mar 08 05:51:32 2018 -0500 @@ -15,11 +15,11 @@ -V -w n -q galaxy.q - - -V -w n -q galaxy.q -pe thread 1 -R y -l mem_free=10G + + -V -w n -q galaxy.q -pe thread 1 -R y -l mem_free=8G - - -V -w n -q galaxy.q -pe thread 4 -R y -l mem_free=10G + + -V -w n -q galaxy.q -pe thread 8 -R y -l mem_free=16G diff -r c363b9f1caef -r efd23113d5f4 lib.r --- a/lib.r Mon Mar 05 04:15:34 2018 -0500 +++ b/lib.r Thu Mar 08 05:51:32 2018 -0500 @@ -74,15 +74,19 @@ #@author G. Le Corguille # Draw the plotChromPeakDensity 3 per page in a pdf file getPlotAdjustedRtime <- function(xdata) { + pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) + # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] names(group_colors) <- unique(xdata$sample_group) plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + # Color by sample plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) + dev.off() } @@ -104,255 +108,46 @@ } -#@author Y. Guitton -getBPC <- function(file,rtcor=NULL, ...) { - object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) - #plotChrom(xcmsRaw(file), base=T) +#@author G. Le Corguille +getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { + + chrom <- chromatogram(xdata, aggregationFun = aggregationFun) + if (aggregationFun == "sum") + type="Total Ion Chromatograms" + else + type="Base Peak Intensity Chromatograms" + + adjusted="Raw" + if (hasAdjustedRtime(xdata)) + adjusted="Adjusted" + + main <- paste(type,":",adjusted,"data") + + pdf(pdfname, width=16, height=10) + + # Color by group + group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + names(group_colors) <- unique(xdata$sample_group) + plot(chrom, col = group_colors[chrom$sample_group], main=main) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + + # Color by sample + plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) + legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) + + dev.off() } -#@author Y. Guitton -getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { - cat("Creating BIC pdf...\n") - - if (is.null(xcmsSet)) { - cat("Enter an xcmsSet \n") - stop() - } else { - files <- filepaths(xcmsSet) - } - - phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class - - classnames <- vector("list",length(phenoDataClass)) - for (i in 1:length(phenoDataClass)){ - classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i]) - } - - N <- dim(phenoData(xcmsSet))[1] - - TIC <- vector("list",N) - - - for (j in 1:N) { - - TIC[[j]] <- getBPC(files[j]) - #good for raw - # seems strange for corrected - #errors if scanrange used in xcmsSetgeneration - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[j]] - else - rtcor <- NULL - - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) - # TIC[[j]][,1]<-rtcor - } - - - - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty <- 1:N - pch <- 1:N - #search for max x and max y in BPCs - xlim <- range(sapply(TIC, function(x) range(x[,1]))) - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - ylim <- c(-ylim[2], ylim[2]) - - - ##plot start - - if (length(phenoDataClass)>2){ - for (k in 1:(length(phenoDataClass)-1)){ - for (l in (k+1):length(phenoDataClass)){ - #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i <- class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - } - } - }#end if length >2 - - if (length(phenoDataClass)==2){ - k <- 1 - l <- 2 - colvect <- NULL - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC") - - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i <- class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==2 - - #case where only one class - if (length(phenoDataClass)==1){ - k <- 1 - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - colvect <- NULL - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="BPC") - - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - - legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) +#@author G. Le Corguille +getPlotTICs <- function(xdata, pdfname="TICs.pdf") { + getPlotChromatogram(xdata, pdfname, aggregationFun = "sum") } - - -#@author Y. Guitton -getTIC <- function(file, rtcor=NULL) { - object <- xcmsRaw(file) - cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object, mzrange=range(object@env$mz))$intensity) +#@author G. Le Corguille +getPlotBPIs <- function(xdata, pdfname="BPIs.pdf") { + getPlotChromatogram(xdata, pdfname, aggregationFun = "max") } -#overlay TIC from all files in current folder or from xcmsSet, create pdf -#@author Y. Guitton -getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf", rt=c("raw","corrected")) { - cat("Creating TIC pdf...\n") - - if (is.null(xcmsSet)) { - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep=""), collapse="|") - if (is.null(files)) - files <- getwd() - info <- file.info(files) - listed <- list.files(files[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) - files <- c(files[!info$isdir], listed) - } else { - files <- filepaths(xcmsSet) - } - - phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class - classnames <- vector("list",length(phenoDataClass)) - for (i in 1:length(phenoDataClass)){ - classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i]) - } - - N <- length(files) - TIC <- vector("list",N) - - for (i in 1:N) { - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[i]] else - rtcor <- NULL - TIC[[i]] <- getTIC(files[i], rtcor=rtcor) - } - - pdf(pdfname, w=16, h=10) - cols <- rainbow(N) - lty <- 1:N - pch <- 1:N - #search for max x and max y in TICs - xlim <- range(sapply(TIC, function(x) range(x[,1]))) - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - ylim <- c(-ylim[2], ylim[2]) - - - ##plot start - if (length(phenoDataClass)>2){ - for (k in 1:(length(phenoDataClass)-1)){ - for (l in (k+1):length(phenoDataClass)){ - #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - } - } - }#end if length >2 - if (length(phenoDataClass)==2){ - k <- 1 - l <- 2 - - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i <- class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==2 - - #case where only one class - if (length(phenoDataClass)==1){ - k <- 1 - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="TIC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - - legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) -} - - # Get the polarities from all the samples of a condition #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM diff -r c363b9f1caef -r efd23113d5f4 macros.xml --- a/macros.xml Mon Mar 05 04:15:34 2018 -0500 +++ b/macros.xml Thu Mar 08 05:51:32 2018 -0500 @@ -37,6 +37,9 @@ [0-9, ]+ + RData file + It contain a xcms3::XCMSnExp object (named xdata) + diff -r c363b9f1caef -r efd23113d5f4 xcms_xcmsSet.r --- a/xcms_xcmsSet.r Mon Mar 05 04:15:34 2018 -0500 +++ b/xcms_xcmsSet.r Thu Mar 08 05:51:32 2018 -0500 @@ -115,18 +115,6 @@ # Create a sampleMetada file sampleNamesList <- getSampleMetadata(xdata=xdata, sampleMetadataOutput="sampleMetadata.tsv") -# Get the legacy xcmsSet object -xset <- getxcmsSetObject(xdata) - -cat("\n\n") - - -# -- TIC -- -cat("\t\tGET TIC GRAPH\n") -#@TODO: one day, use xdata instead of xset to draw the TICs and BPC or a complete other method -getTICs(xcmsSet=xset, rt="raw", pdfname="TICs.pdf") -getBPCs(xcmsSet=xset, rt="raw", pdfname="BICs.pdf") - cat("\n\n") # ----- EXPORT ----- @@ -136,6 +124,8 @@ cat("\n\n") cat("\txcmsSet OBJECT INFO\n") +# Get the legacy xcmsSet object +xset <- getxcmsSetObject(xdata) print(xset) cat("\n\n")