Mercurial > repos > lecorguille > xcms_retcor
changeset 33:69b5a006fca1 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 49203f8a5271fa5e6bb889e907df71ebf7757309
author | lecorguille |
---|---|
date | Thu, 08 Mar 2018 05:54:22 -0500 |
parents | 1aa7979204b4 |
children | 9714270678a7 |
files | README.rst abims_xcms_retcor.xml lib.r macros.xml xcms_retcor.r |
diffstat | 5 files changed, 73 insertions(+), 260 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Tue Mar 06 04:03:16 2018 -0500 +++ b/README.rst Thu Mar 08 05:54:22 2018 -0500 @@ -2,26 +2,42 @@ Changelog/News -------------- +**Version 3.0.0.0 - 08/03/2018** + +- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed. + +- NEW: a bunch of new options: Obiwarp.(centerSample, response, distFun, gapInit, gapExtend, factorDiag, factorGap, localAlignment, initPenalty) + +- IMPROVEMENT: the advanced options are now in sections. It will allow you to access to all the parameters and to know their default values. + +- CHANGE: removing of the TIC and BPC plots. You can new use the dedicated tool "xcms plot chromatogram" + + **Version 2.1.1 - 29/11/2017** - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C + **Version 2.1.0 - 03/02/2017** - IMPROVEMENT: xcms.retcor can deal with merged individual data + **Version 2.0.8 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph + **Version 2.0.7 - 06/07/2016** - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 + **Version 2.0.6 - 04/04/2016** - TEST: refactoring to pass planemo test using conda dependencies + **Version 2.0.5 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed
--- a/abims_xcms_retcor.xml Tue Mar 06 04:03:16 2018 -0500 +++ b/abims_xcms_retcor.xml Thu Mar 08 05:54:22 2018 -0500 @@ -43,7 +43,7 @@ ]]></command> <inputs> - <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata" label="xset RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" /> + <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata" label="@INPUT_IMAGE_LABEL@" help="@INPUT_IMAGE_HELP@ from: findChromPeaks, groupChromPeaks" /> <conditional name="methods"> <param name="method" type="select" label="Method to use for retention time correction" help="See the help section below" > <option value="PeakGroups" selected="true">PeakGroups - retention time correction based on aligment of features (peak groups) present in most/all samples.</option> @@ -128,8 +128,6 @@ <outputs> <data name="xsetRData" format="rdata.xcms.retcor" label="${image.name[:-6]}.retcor.RData" from_work_dir="retcor.RData" /> <data name="rawVSadjustedPdf" format="pdf" label="${image.name[:-6]}_rawVSadjusted.retcor.Rplots.pdf" from_work_dir="raw_vs_adjusted_rt.pdf" /> - <data name="ticsCorPdf" format="pdf" label="${image.name[:-6]}.retcor.TICs_adjusted.pdf" from_work_dir="TICs.pdf"/> - <data name="bpcsCorPdf" format="pdf" label="${image.name[:-6]}.retcor.BPCs_adjusted.pdf" from_work_dir="BICs.pdf" /> </outputs> <tests> @@ -366,26 +364,37 @@ Changelog/News -------------- -**Version 3.0.0.0 - 14/02/2018** +**Version 3.0.0.0 - 08/03/2018** + +- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed. -- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlining codes and methods +- NEW: a bunch of new options: Obiwarp.(centerSample, response, distFun, gapInit, gapExtend, factorDiag, factorGap, localAlignment, initPenalty) + +- IMPROVEMENT: the advanced options are now in sections. It will allow you to access to all the parameters and to know their default values. + +- CHANGE: removing of the TIC and BPC plots. You can new use the dedicated tool "xcms plot chromatogram" + **Version 2.1.1 - 29/11/2017** - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C + **Version 2.1.0 - 03/02/2017** - IMPROVEMENT: xcms.retcor can deal with merged individual data + **Version 2.0.8 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph + **Version 2.0.7 - 06/07/2016** - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 + **Version 2.0.6 - 04/04/2016** - TEST: refactoring to pass planemo test using conda dependencies
--- a/lib.r Tue Mar 06 04:03:16 2018 -0500 +++ b/lib.r Thu Mar 08 05:54:22 2018 -0500 @@ -74,15 +74,19 @@ #@author G. Le Corguille # Draw the plotChromPeakDensity 3 per page in a pdf file getPlotAdjustedRtime <- function(xdata) { + pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) + # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] names(group_colors) <- unique(xdata$sample_group) plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + # Color by sample plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) + dev.off() } @@ -104,255 +108,46 @@ } -#@author Y. Guitton -getBPC <- function(file,rtcor=NULL, ...) { - object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) - #plotChrom(xcmsRaw(file), base=T) +#@author G. Le Corguille +getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { + + chrom <- chromatogram(xdata, aggregationFun = aggregationFun) + if (aggregationFun == "sum") + type="Total Ion Chromatograms" + else + type="Base Peak Intensity Chromatograms" + + adjusted="Raw" + if (hasAdjustedRtime(xdata)) + adjusted="Adjusted" + + main <- paste(type,":",adjusted,"data") + + pdf(pdfname, width=16, height=10) + + # Color by group + group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + names(group_colors) <- unique(xdata$sample_group) + plot(chrom, col = group_colors[chrom$sample_group], main=main) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + + # Color by sample + plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) + legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) + + dev.off() } -#@author Y. Guitton -getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { - cat("Creating BIC pdf...\n") - - if (is.null(xcmsSet)) { - cat("Enter an xcmsSet \n") - stop() - } else { - files <- filepaths(xcmsSet) - } - - phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class - - classnames <- vector("list",length(phenoDataClass)) - for (i in 1:length(phenoDataClass)){ - classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i]) - } - - N <- dim(phenoData(xcmsSet))[1] - - TIC <- vector("list",N) - - - for (j in 1:N) { - - TIC[[j]] <- getBPC(files[j]) - #good for raw - # seems strange for corrected - #errors if scanrange used in xcmsSetgeneration - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[j]] - else - rtcor <- NULL - - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) - # TIC[[j]][,1]<-rtcor - } - - - - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty <- 1:N - pch <- 1:N - #search for max x and max y in BPCs - xlim <- range(sapply(TIC, function(x) range(x[,1]))) - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - ylim <- c(-ylim[2], ylim[2]) - - - ##plot start - - if (length(phenoDataClass)>2){ - for (k in 1:(length(phenoDataClass)-1)){ - for (l in (k+1):length(phenoDataClass)){ - #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i <- class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - } - } - }#end if length >2 - - if (length(phenoDataClass)==2){ - k <- 1 - l <- 2 - colvect <- NULL - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC") - - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i <- class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==2 - - #case where only one class - if (length(phenoDataClass)==1){ - k <- 1 - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - colvect <- NULL - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="BPC") - - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - - legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) +#@author G. Le Corguille +getPlotTICs <- function(xdata, pdfname="TICs.pdf") { + getPlotChromatogram(xdata, pdfname, aggregationFun = "sum") } - - -#@author Y. Guitton -getTIC <- function(file, rtcor=NULL) { - object <- xcmsRaw(file) - cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object, mzrange=range(object@env$mz))$intensity) +#@author G. Le Corguille +getPlotBPIs <- function(xdata, pdfname="BPIs.pdf") { + getPlotChromatogram(xdata, pdfname, aggregationFun = "max") } -#overlay TIC from all files in current folder or from xcmsSet, create pdf -#@author Y. Guitton -getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf", rt=c("raw","corrected")) { - cat("Creating TIC pdf...\n") - - if (is.null(xcmsSet)) { - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep=""), collapse="|") - if (is.null(files)) - files <- getwd() - info <- file.info(files) - listed <- list.files(files[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) - files <- c(files[!info$isdir], listed) - } else { - files <- filepaths(xcmsSet) - } - - phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class - classnames <- vector("list",length(phenoDataClass)) - for (i in 1:length(phenoDataClass)){ - classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i]) - } - - N <- length(files) - TIC <- vector("list",N) - - for (i in 1:N) { - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[i]] else - rtcor <- NULL - TIC[[i]] <- getTIC(files[i], rtcor=rtcor) - } - - pdf(pdfname, w=16, h=10) - cols <- rainbow(N) - lty <- 1:N - pch <- 1:N - #search for max x and max y in TICs - xlim <- range(sapply(TIC, function(x) range(x[,1]))) - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - ylim <- c(-ylim[2], ylim[2]) - - - ##plot start - if (length(phenoDataClass)>2){ - for (k in 1:(length(phenoDataClass)-1)){ - for (l in (k+1):length(phenoDataClass)){ - #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - } - } - }#end if length >2 - if (length(phenoDataClass)==2){ - k <- 1 - l <- 2 - - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i <- class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") - colvect <- append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==2 - - #case where only one class - if (length(phenoDataClass)==1){ - k <- 1 - ylim <- range(sapply(TIC, function(x) range(x[,2]))) - - plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="TIC") - colvect <- NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") - points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") - colvect <- append(colvect,cols[classnames[[k]][j]]) - } - - legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) -} - - # Get the polarities from all the samples of a condition #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
--- a/macros.xml Tue Mar 06 04:03:16 2018 -0500 +++ b/macros.xml Thu Mar 08 05:54:22 2018 -0500 @@ -37,6 +37,9 @@ <validator type="regex" message="The format is '1,2,4,6'" >[0-9, ]+</validator> </xml> + <token name="@INPUT_IMAGE_LABEL@">RData file</token> + <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token> + <!-- zipfile load for planemo test --> <token name="@COMMAND_FILE_LOAD@">
--- a/xcms_retcor.r Tue Mar 06 04:03:16 2018 -0500 +++ b/xcms_retcor.r Thu Mar 08 05:54:22 2018 -0500 @@ -50,11 +50,6 @@ args <- rawFilePath$args directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) -# Check some character issues -md5sumList <- list("origin" = getMd5sum(directory)) -checkXmlStructure(directory) -checkFilesCompatibilityWithXcms(directory) - cat("\n\n") @@ -70,9 +65,6 @@ print(adjustRtimeParam) xdata <- adjustRtime(xdata, param=adjustRtimeParam) -# Get the legacy xcmsSet object -xset <- getxcmsSetObject(xdata) - cat("\n\n") @@ -80,10 +72,6 @@ cat("\t\tDRAW GRAPHICS\n") getPlotAdjustedRtime(xdata) -#@TODO: one day, use xdata instead of xset to draw the TICs and BPC or a complete other method -getTICs(xcmsSet=xset, rt="corrected", pdfname="TICs.pdf") -getBPCs(xcmsSet=xset, rt="corrected", pdfname="BICs.pdf") - cat("\n\n") # ----- EXPORT ----- @@ -93,6 +81,8 @@ cat("\n\n") cat("\txcmsSet OBJECT INFO\n") +# Get the legacy xcmsSet object +xset <- getxcmsSetObject(xdata) print(xset) cat("\n\n")