Mercurial > repos > lecorguille > xcms_retcor

--- a/README.rst	Tue Mar 06 04:03:16 2018 -0500
+++ b/README.rst	Thu Mar 08 05:54:22 2018 -0500
@@ -2,26 +2,42 @@
 Changelog/News
 --------------

+**Version 3.0.0.0 - 08/03/2018**
+
+- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed.
+
+- NEW: a bunch of new options: Obiwarp.(centerSample, response, distFun, gapInit, gapExtend, factorDiag, factorGap, localAlignment, initPenalty)
+
+- IMPROVEMENT: the advanced options are now in sections. It will allow you to access to all the parameters and to know their default values.
+
+- CHANGE: removing of the TIC and BPC plots. You can new use the dedicated tool "xcms plot chromatogram"
+
+
 **Version 2.1.1 - 29/11/2017**

 - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C

+
 **Version 2.1.0 - 03/02/2017**

 - IMPROVEMENT: xcms.retcor can deal with merged individual data

+
 **Version 2.0.8 - 22/12/2016**

 - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph

+
 **Version 2.0.7 - 06/07/2016**

 - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0

+
 **Version 2.0.6 - 04/04/2016**

 - TEST: refactoring to pass planemo test using conda dependencies

+
 **Version 2.0.5 - 10/02/2016**

 - BUGFIX: better management of errors. Datasets remained green although the process failed
--- a/abims_xcms_retcor.xml	Tue Mar 06 04:03:16 2018 -0500
+++ b/abims_xcms_retcor.xml	Thu Mar 08 05:54:22 2018 -0500
@@ -43,7 +43,7 @@
     ]]></command>

     <inputs>
-        <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata" label="xset RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" />
+        <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata" label="@INPUT_IMAGE_LABEL@" help="@INPUT_IMAGE_HELP@ from: findChromPeaks, groupChromPeaks" />
         <conditional name="methods">
             <param name="method" type="select" label="Method to use for retention time correction" help="See the help section below" >
                 <option value="PeakGroups" selected="true">PeakGroups - retention time correction based on aligment of features (peak groups) present in most/all samples.</option>
@@ -128,8 +128,6 @@
     <outputs>
         <data name="xsetRData" format="rdata.xcms.retcor" label="${image.name[:-6]}.retcor.RData" from_work_dir="retcor.RData" />
         <data name="rawVSadjustedPdf" format="pdf"  label="${image.name[:-6]}_rawVSadjusted.retcor.Rplots.pdf" from_work_dir="raw_vs_adjusted_rt.pdf" />
-        <data name="ticsCorPdf"   format="pdf"  label="${image.name[:-6]}.retcor.TICs_adjusted.pdf" from_work_dir="TICs.pdf"/>
-        <data name="bpcsCorPdf"   format="pdf" label="${image.name[:-6]}.retcor.BPCs_adjusted.pdf" from_work_dir="BICs.pdf" />
     </outputs>

     <tests>
@@ -366,26 +364,37 @@
 Changelog/News
 --------------

-**Version 3.0.0.0 - 14/02/2018**
+**Version 3.0.0.0 - 08/03/2018**
+
+- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed.

-- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlining codes and methods
+- NEW: a bunch of new options: Obiwarp.(centerSample, response, distFun, gapInit, gapExtend, factorDiag, factorGap, localAlignment, initPenalty)
+
+- IMPROVEMENT: the advanced options are now in sections. It will allow you to access to all the parameters and to know their default values.
+
+- CHANGE: removing of the TIC and BPC plots. You can new use the dedicated tool "xcms plot chromatogram"
+

 **Version 2.1.1 - 29/11/2017**

 - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C

+
 **Version 2.1.0 - 03/02/2017**

 - IMPROVEMENT: xcms.retcor can deal with merged individual data

+
 **Version 2.0.8 - 22/12/2016**

 - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph

+
 **Version 2.0.7 - 06/07/2016**

 - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0

+
 **Version 2.0.6 - 04/04/2016**

 - TEST: refactoring to pass planemo test using conda dependencies
--- a/lib.r	Tue Mar 06 04:03:16 2018 -0500
+++ b/lib.r	Thu Mar 08 05:54:22 2018 -0500
@@ -74,15 +74,19 @@
 #@author G. Le Corguille
 # Draw the plotChromPeakDensity 3 per page in a pdf file
 getPlotAdjustedRtime <- function(xdata) {
+
     pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
+
     # Color by group
     group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
     names(group_colors) <- unique(xdata$sample_group)
     plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
     legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+
     # Color by sample
     plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
     legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
+
     dev.off()
 }

@@ -104,255 +108,46 @@

 }

-#@author Y. Guitton
-getBPC <- function(file,rtcor=NULL, ...) {
-    object <- xcmsRaw(file)
-    sel <- profRange(object, ...)
-    cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE]))
-    #plotChrom(xcmsRaw(file), base=T)
+#@author G. Le Corguille
+getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") {
+
+    chrom <- chromatogram(xdata, aggregationFun = aggregationFun)
+    if (aggregationFun == "sum")
+        type="Total Ion Chromatograms"
+    else
+        type="Base Peak Intensity Chromatograms"
+
+    adjusted="Raw"
+    if (hasAdjustedRtime(xdata))
+        adjusted="Adjusted"
+
+    main <- paste(type,":",adjusted,"data")
+
+    pdf(pdfname, width=16, height=10)
+
+    # Color by group
+    group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+    names(group_colors) <- unique(xdata$sample_group)
+    plot(chrom, col = group_colors[chrom$sample_group], main=main)
+    legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+
+    # Color by sample
+    plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main)
+    legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
+
+    dev.off()
 }

-#@author Y. Guitton
-getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) {
-    cat("Creating BIC pdf...\n")
-
-    if (is.null(xcmsSet)) {
-        cat("Enter an xcmsSet \n")
-        stop()
-    } else {
-        files <- filepaths(xcmsSet)
-    }
-
-    phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class
-
-    classnames <- vector("list",length(phenoDataClass))
-    for (i in 1:length(phenoDataClass)){
-        classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i])
-    }
-
-    N <- dim(phenoData(xcmsSet))[1]
-
-    TIC <- vector("list",N)
-
-
-    for (j in 1:N) {
-
-        TIC[[j]] <- getBPC(files[j])
-        #good for raw
-        # seems strange for corrected
-        #errors if scanrange used in xcmsSetgeneration
-        if (!is.null(xcmsSet) && rt == "corrected")
-            rtcor <- xcmsSet@rt$corrected[[j]]
-        else
-            rtcor <- NULL
-
-        TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
-        # TIC[[j]][,1]<-rtcor
-    }
-
-
-
-    pdf(pdfname,w=16,h=10)
-    cols <- rainbow(N)
-    lty <- 1:N
-    pch <- 1:N
-    #search for max x and max y in BPCs
-    xlim <- range(sapply(TIC, function(x) range(x[,1])))
-    ylim <- range(sapply(TIC, function(x) range(x[,2])))
-    ylim <- c(-ylim[2], ylim[2])
-
-
-    ##plot start
-
-    if (length(phenoDataClass)>2){
-        for (k in 1:(length(phenoDataClass)-1)){
-            for (l in (k+1):length(phenoDataClass)){
-                #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
-                plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC")
-                colvect <- NULL
-                for (j in 1:length(classnames[[k]])) {
-                    tic <- TIC[[classnames[[k]][j]]]
-                    # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
-                    points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
-                    colvect <- append(colvect,cols[classnames[[k]][j]])
-                }
-                for (j in 1:length(classnames[[l]])) {
-                    # i <- class2names[j]
-                    tic <- TIC[[classnames[[l]][j]]]
-                    points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
-                    colvect <- append(colvect,cols[classnames[[l]][j]])
-                }
-                legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
-            }
-        }
-    }#end if length >2
-
-    if (length(phenoDataClass)==2){
-        k <- 1
-        l <- 2
-        colvect <- NULL
-        plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC")
-
-        for (j in 1:length(classnames[[k]])) {
-
-            tic <- TIC[[classnames[[k]][j]]]
-            # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
-            points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
-            colvect<-append(colvect,cols[classnames[[k]][j]])
-        }
-        for (j in 1:length(classnames[[l]])) {
-            # i <- class2names[j]
-            tic <- TIC[[classnames[[l]][j]]]
-            points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
-            colvect <- append(colvect,cols[classnames[[l]][j]])
-        }
-        legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
-
-    }#end length ==2
-
-    #case where only one class
-    if (length(phenoDataClass)==1){
-        k <- 1
-        ylim <- range(sapply(TIC, function(x) range(x[,2])))
-        colvect <- NULL
-        plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="BPC")
-
-        for (j in 1:length(classnames[[k]])) {
-            tic <- TIC[[classnames[[k]][j]]]
-            # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
-            points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
-            colvect <- append(colvect,cols[classnames[[k]][j]])
-        }
-
-        legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch)
-
-    }#end length ==1
-
-    dev.off() #pdf(pdfname,w=16,h=10)
-
-    invisible(TIC)
+#@author G. Le Corguille
+getPlotTICs <- function(xdata, pdfname="TICs.pdf") {
+    getPlotChromatogram(xdata, pdfname, aggregationFun = "sum")
 }

-
-
-#@author Y. Guitton
-getTIC <- function(file, rtcor=NULL) {
-    object <- xcmsRaw(file)
-    cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object, mzrange=range(object@env$mz))$intensity)
+#@author G. Le Corguille
+getPlotBPIs <- function(xdata, pdfname="BPIs.pdf") {
+    getPlotChromatogram(xdata, pdfname, aggregationFun = "max")
 }

-#overlay TIC from all files in current folder or from xcmsSet, create pdf
-#@author Y. Guitton
-getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf", rt=c("raw","corrected")) {
-    cat("Creating TIC pdf...\n")
-
-    if (is.null(xcmsSet)) {
-        filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-        filepattern <- paste(paste("\\.", filepattern, "$", sep=""), collapse="|")
-        if (is.null(files))
-            files <- getwd()
-        info <- file.info(files)
-        listed <- list.files(files[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
-        files <- c(files[!info$isdir], listed)
-    } else {
-        files <- filepaths(xcmsSet)
-    }
-
-    phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class
-    classnames <- vector("list",length(phenoDataClass))
-    for (i in 1:length(phenoDataClass)){
-        classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i])
-    }
-
-    N <- length(files)
-    TIC <- vector("list",N)
-
-    for (i in 1:N) {
-        if (!is.null(xcmsSet) && rt == "corrected")
-            rtcor <- xcmsSet@rt$corrected[[i]] else
-        rtcor <- NULL
-        TIC[[i]] <- getTIC(files[i], rtcor=rtcor)
-    }
-
-    pdf(pdfname, w=16, h=10)
-    cols <- rainbow(N)
-    lty <- 1:N
-    pch <- 1:N
-    #search for max x and max y in TICs
-    xlim <- range(sapply(TIC, function(x) range(x[,1])))
-    ylim <- range(sapply(TIC, function(x) range(x[,2])))
-    ylim <- c(-ylim[2], ylim[2])
-
-
-    ##plot start
-    if (length(phenoDataClass)>2){
-        for (k in 1:(length(phenoDataClass)-1)){
-            for (l in (k+1):length(phenoDataClass)){
-                #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
-                plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC")
-                colvect <- NULL
-                for (j in 1:length(classnames[[k]])) {
-                    tic <- TIC[[classnames[[k]][j]]]
-                    # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
-                    points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
-                    colvect <- append(colvect,cols[classnames[[k]][j]])
-                }
-                for (j in 1:length(classnames[[l]])) {
-                    # i=class2names[j]
-                    tic <- TIC[[classnames[[l]][j]]]
-                    points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
-                    colvect <- append(colvect,cols[classnames[[l]][j]])
-                }
-                legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
-            }
-        }
-    }#end if length >2
-    if (length(phenoDataClass)==2){
-        k <- 1
-        l <- 2
-
-        plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC")
-        colvect <- NULL
-        for (j in 1:length(classnames[[k]])) {
-            tic <- TIC[[classnames[[k]][j]]]
-            # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
-            points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
-            colvect <- append(colvect,cols[classnames[[k]][j]])
-        }
-        for (j in 1:length(classnames[[l]])) {
-            # i <- class2names[j]
-            tic <- TIC[[classnames[[l]][j]]]
-            points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
-            colvect <- append(colvect,cols[classnames[[l]][j]])
-        }
-        legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
-
-    }#end length ==2
-
-    #case where only one class
-    if (length(phenoDataClass)==1){
-        k <- 1
-        ylim <- range(sapply(TIC, function(x) range(x[,2])))
-
-        plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="TIC")
-        colvect <- NULL
-        for (j in 1:length(classnames[[k]])) {
-            tic <- TIC[[classnames[[k]][j]]]
-            # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
-            points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
-            colvect <- append(colvect,cols[classnames[[k]][j]])
-        }
-
-        legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch)
-
-    }#end length ==1
-
-    dev.off() #pdf(pdfname,w=16,h=10)
-
-    invisible(TIC)
-}
-
-

 # Get the polarities from all the samples of a condition
 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
--- a/macros.xml	Tue Mar 06 04:03:16 2018 -0500
+++ b/macros.xml	Thu Mar 08 05:54:22 2018 -0500
@@ -37,6 +37,9 @@
         <validator type="regex" message="The format is '1,2,4,6'" >[0-9, ]+</validator>
     </xml>

+    <token name="@INPUT_IMAGE_LABEL@">RData file</token>
+    <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token>
+
     <!-- zipfile load for planemo test -->

     <token name="@COMMAND_FILE_LOAD@">
--- a/xcms_retcor.r	Tue Mar 06 04:03:16 2018 -0500
+++ b/xcms_retcor.r	Thu Mar 08 05:54:22 2018 -0500
@@ -50,11 +50,6 @@
 args <- rawFilePath$args
 directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)

-# Check some character issues
-md5sumList <- list("origin" = getMd5sum(directory))
-checkXmlStructure(directory)
-checkFilesCompatibilityWithXcms(directory)
-

 cat("\n\n")

@@ -70,9 +65,6 @@
 print(adjustRtimeParam)
 xdata <- adjustRtime(xdata, param=adjustRtimeParam)

-# Get the legacy xcmsSet object
-xset <- getxcmsSetObject(xdata)
-
 cat("\n\n")


@@ -80,10 +72,6 @@
 cat("\t\tDRAW GRAPHICS\n")
 getPlotAdjustedRtime(xdata)

-#@TODO: one day, use xdata instead of xset to draw the TICs and BPC or a complete other method
-getTICs(xcmsSet=xset, rt="corrected", pdfname="TICs.pdf")
-getBPCs(xcmsSet=xset, rt="corrected", pdfname="BICs.pdf")
-
 cat("\n\n")

 # ----- EXPORT -----
@@ -93,6 +81,8 @@
 cat("\n\n")

 cat("\txcmsSet OBJECT INFO\n")
+# Get the legacy xcmsSet object
+xset <- getxcmsSetObject(xdata)
 print(xset)
 cat("\n\n")