# HG changeset patch # User lecorguille # Date 1486303022 18000 # Node ID 20a75ba4345b9b8dec3207308bfec8fc9242f922 # Parent c04568596f4031f05f2f6c1b12a1546dc17ad428 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 22c4e92909198328fc7439ff47e4546a273eb907 diff -r c04568596f40 -r 20a75ba4345b README.rst --- a/README.rst Mon Jan 30 08:51:40 2017 -0500 +++ b/README.rst Sun Feb 05 08:57:02 2017 -0500 @@ -2,6 +2,10 @@ Changelog/News -------------- +**Version 2.1.0 - 03/02/2017** + +- IMPROVEMENT: xcms.retcor can deal with merged individual data + **Version 2.0.8 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph @@ -31,12 +35,3 @@ - IMPROVEMENT: parameter labels have changed to facilitate their reading. - -Test Status ------------ - -Planemo test using conda: passed - -Planemo test using source env.sh: passed - -Planemo shed_test : passed diff -r c04568596f40 -r 20a75ba4345b abims_xcms_retcor.xml --- a/abims_xcms_retcor.xml Mon Jan 30 08:51:40 2017 -0500 +++ b/abims_xcms_retcor.xml Sun Feb 05 08:57:02 2017 -0500 @@ -1,4 +1,4 @@ - + Retention Time Correction using retcor function from xcms R package @@ -33,7 +33,7 @@ #end if #end if - @COMMAND_ZIPFILE_LOAD@ + @COMMAND_FILE_LOAD@ @COMMAND_LOG_EXIT@ ]]> @@ -82,7 +82,7 @@ - + @@ -133,8 +133,33 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -298,6 +323,10 @@ Changelog/News -------------- +**Version 2.1.0 - 03/02/2017** + +- IMPROVEMENT: xcms.retcor can deal with merged individual data + **Version 2.0.8 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph diff -r c04568596f40 -r 20a75ba4345b lib.r --- a/lib.r Mon Jan 30 08:51:40 2017 -0500 +++ b/lib.r Sun Feb 05 08:57:02 2017 -0500 @@ -36,11 +36,11 @@ getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { groups <- xset@groups values <- groupval(xset, "medret", value=intval) - + # renamming of the column rtmed to rt to fit with camera peaklist function output colnames(groups)[colnames(groups)=="rtmed"] <- "rt" colnames(groups)[colnames(groups)=="mzmed"] <- "mz" - + ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) groups = RTSecondToMinute(groups, convertRTMinute) @@ -57,138 +57,139 @@ #@author Y. Guitton getBPC <- function(file,rtcor=NULL, ...) { - object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) - #plotChrom(xcmsRaw(file), base=T) + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + #plotChrom(xcmsRaw(file), base=T) } #@author Y. Guitton getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { - cat("Creating BIC pdf...\n") + cat("Creating BIC pdf...\n") - if (is.null(xcmsSet)) { - cat("Enter an xcmsSet \n") - stop() - } else { - files <- filepaths(xcmsSet) - } + if (is.null(xcmsSet)) { + cat("Enter an xcmsSet \n") + stop() + } else { + files <- filepaths(xcmsSet) + } - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- dim(phenoData(xcmsSet))[1] + N <- dim(phenoData(xcmsSet))[1] - TIC <- vector("list",N) + TIC <- vector("list",N) - for (j in 1:N) { + for (j in 1:N) { - TIC[[j]] <- getBPC(files[j]) - #good for raw - # seems strange for corrected - #errors if scanrange used in xcmsSetgeneration - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[j]] else - rtcor <- NULL + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] + else + rtcor <- NULL - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) - # TIC[[j]][,1]<-rtcor - } + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + # TIC[[j]][,1]<-rtcor + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in BPCs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start + ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(phenoDataClass)==2){ + k=1 + l=2 colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 + + }#end length ==2 - if (length(class)==2){ - k=1 - l=2 - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - - for (j in 1:length(classnames[[k]])) { + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==2 + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + dev.off() #pdf(pdfname,w=16,h=10) - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } #@author Y. Guitton getTIC <- function(file,rtcor=NULL) { - object <- xcmsRaw(file) - cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) } ## @@ -196,114 +197,112 @@ ## #@author Y. Guitton getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { - cat("Creating TIC pdf...\n") + cat("Creating TIC pdf...\n") - if (is.null(xcmsSet)) { - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") - if (is.null(files)) - files <- getwd() - info <- file.info(files) - listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) - files <- c(files[!info$isdir], listed) - } else { - files <- filepaths(xcmsSet) - } - - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + if (is.null(xcmsSet)) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + if (is.null(files)) + files <- getwd() + info <- file.info(files) + listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) + files <- c(files[!info$isdir], listed) + } else { + files <- filepaths(xcmsSet) + } - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- length(files) - TIC <- vector("list",N) + N <- length(files) + TIC <- vector("list",N) - for (i in 1:N) { - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[i]] else - rtcor <- NULL - TIC[[i]] <- getTIC(files[i],rtcor=rtcor) - } + for (i in 1:N) { + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in TICs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + ##plot start + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(phenoDataClass)==2){ + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 - if (length(class)==2){ - k=1 - l=2 + + }#end length ==2 - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) - }#end length ==2 - - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 + dev.off() #pdf(pdfname,w=16,h=10) - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } @@ -313,70 +312,70 @@ #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { - cat("Creating the sampleMetadata file...\n") - - #Create the sampleMetada dataframe - sampleMetadata=xset@phenoData - sampleNamesOrigin=rownames(sampleMetadata) - sampleNamesMakeNames=make.names(sampleNamesOrigin) + cat("Creating the sampleMetadata file...\n") - if (any(duplicated(sampleNamesMakeNames))) { - write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) - for (sampleName in sampleNamesOrigin) { - write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) - } - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - } - - if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { - cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") - for (sampleName in sampleNamesOrigin) { - cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) - } - } - - sampleMetadata$sampleMetadata=sampleNamesMakeNames - sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns - rownames(sampleMetadata)=NULL + #Create the sampleMetada dataframe + sampleMetadata=xset@phenoData + sampleNamesOrigin=rownames(sampleMetadata) + sampleNamesMakeNames=make.names(sampleNamesOrigin) - #Create a list of files name in the current directory - list_files=xset@filepaths - #For each sample file, the following actions are done - for (file in list_files){ - #Check if the file is in the CDF format - if (!mzR:::netCDFIsFile(file)){ - - # If the column isn't exist, with add one filled with NA - if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } - #Create a simple xcmsRaw object for each sample - xcmsRaw=xcmsRaw(file) - #Extract the polarity (a list of polarities) - polarity=xcmsRaw@polarity - #Verify if all the scans have the same polarity - uniq_list=unique(polarity) - if (length(uniq_list)>1){ - polarity="mixed" - } else { - polarity=as.character(uniq_list) - } - #Transforms the character to obtain only the sample name - filename=basename(file) - library(tools) - samplename=file_path_sans_ext(filename) - - #Set the polarity attribute - sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity - - #Delete xcmsRaw object because it creates a bug for the fillpeaks step - rm(xcmsRaw) + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } } - } + sampleMetadata$sampleMetadata=sampleNamesMakeNames + sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns + rownames(sampleMetadata)=NULL + + #Create a list of files name in the current directory + list_files=xset@filepaths + #For each sample file, the following actions are done + for (file in list_files){ + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(file)){ + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA - write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + #Create a simple xcmsRaw object for each sample + xcmsRaw=xcmsRaw(file) + #Extract the polarity (a list of polarities) + polarity=xcmsRaw@polarity + #Verify if all the scans have the same polarity + uniq_list=unique(polarity) + if (length(uniq_list)>1){ + polarity="mixed" + } else { + polarity=as.character(uniq_list) + } + #Transforms the character to obtain only the sample name + filename=basename(file) + library(tools) + samplename=file_path_sans_ext(filename) - return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) + #Set the polarity attribute + sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity + + #Delete xcmsRaw object because it creates a bug for the fillpeaks step + rm(xcmsRaw) + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) } @@ -386,29 +385,28 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM checkFilesCompatibilityWithXcms <- function(directory) { - cat("Checking files filenames compatibilities with xmcs...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - files_abs <- file.path(getwd(), files) - exists <- file.exists(files_abs) - files[exists] <- files_abs[exists] - files[exists] <- sub("//","/",files[exists]) + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) - # WHAT IS ON THE FILESYSTEM - filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) - filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] - # COMPARISON - if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { - write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) - write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - - } + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } } @@ -418,17 +416,17 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM checkXmlStructure <- function (directory) { - cat("Checking XML structure...\n") + cat("Checking XML structure...\n") - cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") - capture=system(cmd,intern=TRUE) + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) - if (length(capture)>0){ - #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) - write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) - write(capture, stderr()) - stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") - } + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } } @@ -438,23 +436,23 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM deleteXmlBadCharacters<- function (directory) { - cat("Checking Non ASCII characters in the XML...\n") + cat("Checking Non ASCII characters in the XML...\n") - processed=F - l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) - for (i in l){ - cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") - capture=suppressWarnings(system(cmd,intern=TRUE)) - if (length(capture)>0){ - cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) - print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) - c=system(cmd,intern=TRUE) - capture="" - processed=T + processed=F + l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) + for (i in l){ + cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") + capture=suppressWarnings(system(cmd,intern=TRUE)) + if (length(capture)>0){ + cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c=system(cmd,intern=TRUE) + capture="" + processed=T + } } - } - if (processed) cat("\n\n") - return(processed) + if (processed) cat("\n\n") + return(processed) } @@ -463,19 +461,19 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr getMd5sum <- function (directory) { - cat("Compute md5 checksum...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - exists <- file.exists(files) - files <- files[exists] + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] - library(tools) + library(tools) - #cat("\n\n") + #cat("\n\n") - return(as.matrix(md5sum(files))) + return(as.matrix(md5sum(files))) } diff -r c04568596f40 -r 20a75ba4345b macros.xml --- a/macros.xml Mon Jan 30 08:51:40 2017 -0500 +++ b/macros.xml Sun Feb 05 08:57:02 2017 -0500 @@ -7,6 +7,11 @@ r-batch + + + bioconductor-xcms + + @@ -27,24 +32,40 @@ - - #if $zipfile_load_conditional.zipfile_load_select == "yes": - #if $zipfile_load_conditional.zip_file: - zipfile $zipfile_load_conditional.zip_file + + #if $file_load_conditional.file_load_select == "yes": + #if $file_load_conditional.inputs.input == "zip_file": + zipfile $file_load_conditional.inputs.zip_file + #else + #set singlefile_galaxyPath = ','.join( [ str( $single_file ) for $single_file in $file_load_conditional.inputs.single_file ] ) + #set singlefile_sampleName = ','.join( [ str( $single_file.name ) for $single_file in $file_load_conditional.inputs.single_file ] ) + + singlefile_galaxyPath $singlefile_galaxyPath singlefile_sampleName $singlefile_sampleName #end if #end if - - - + + + - + - + + + + + + + + + + + + @@ -54,7 +75,7 @@ .. class:: infomark -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu .. class:: infomark diff -r c04568596f40 -r 20a75ba4345b planemo_test.sh --- a/planemo_test.sh Mon Jan 30 08:51:40 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -# Example of planemo command to launch test - -# -- Use of installed package environments -# after having installing package on a local galaxy instance -source /w/galaxy/dev/shed_tools_tool_dependency_dir/R/3.1.2/iuc/package_r_3_1_2/1ca39eb16186/env.sh -source /w/galaxy/dev/shed_tools_tool_dependency_dir/bioconductor-xcms/1.44.0/lecorguille/package_bioconductor_xcms_1_44_0/0c38f7d43e08/env.sh -planemo test --install_galaxy - -#All 1 test(s) executed passed. -#abims_xcms_retcor[0]: passed - - -# -- Use of conda dependencies -planemo conda_init --conda_prefix /tmp/mc -planemo conda_install --conda_prefix /tmp/mc . -planemo test --install_galaxy --conda_prefix /tmp/mc --conda_dependency_resolution - -#All 1 test(s) executed passed. -#abims_xcms_retcor[0]: passed - - -# -- Use of shed_test -planemo shed_test --install_galaxy --galaxy_branch "dev" -t testtoolshed -#All 1 test(s) executed passed. -#testtoolshed.g2.bx.psu.edu/repos/lecorguille/xcms_retcor/abims_xcms_retcor/2.0.6[0]: passed diff -r c04568596f40 -r 20a75ba4345b test-data/faahKO-single-class.xset.group.RData Binary file test-data/faahKO-single-class.xset.group.RData has changed diff -r c04568596f40 -r 20a75ba4345b test-data/ko15.CDF Binary file test-data/ko15.CDF has changed diff -r c04568596f40 -r 20a75ba4345b test-data/ko16.CDF Binary file test-data/ko16.CDF has changed diff -r c04568596f40 -r 20a75ba4345b test-data/wt15.CDF Binary file test-data/wt15.CDF has changed diff -r c04568596f40 -r 20a75ba4345b test-data/wt16.CDF Binary file test-data/wt16.CDF has changed diff -r c04568596f40 -r 20a75ba4345b xcms.r --- a/xcms.r Mon Jan 30 08:51:40 2017 -0500 +++ b/xcms.r Sun Feb 05 08:57:02 2017 -0500 @@ -15,8 +15,8 @@ #pkgs=c("xcms","batch") pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") for(pkg in pkgs) { - suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) - cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") } source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } cat("\n\n"); @@ -38,7 +38,7 @@ #image is an .RData file necessary to use xset variable given by previous tools if (!is.null(listArguments[["image"]])){ - load(listArguments[["image"]]); listArguments[["image"]]=NULL + load(listArguments[["image"]]); listArguments[["image"]]=NULL } #Import the different functions @@ -61,13 +61,13 @@ xsetRdataOutput = paste(thefunction,"RData",sep=".") if (!is.null(listArguments[["xsetRdataOutput"]])){ - xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL + xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL } #saving the specific parameters rplotspdf = "Rplots.pdf" if (!is.null(listArguments[["rplotspdf"]])){ - rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL + rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL } sampleMetadataOutput = "sampleMetadata.tsv" if (!is.null(listArguments[["sampleMetadataOutput"]])){ @@ -75,28 +75,28 @@ } variableMetadataOutput = "variableMetadata.tsv" if (!is.null(listArguments[["variableMetadataOutput"]])){ - variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL + variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL } dataMatrixOutput = "dataMatrix.tsv" if (!is.null(listArguments[["dataMatrixOutput"]])){ - dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL + dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL } if (!is.null(listArguments[["convertRTMinute"]])){ - convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL + convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL } if (!is.null(listArguments[["numDigitsMZ"]])){ - numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL + numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL } if (!is.null(listArguments[["numDigitsRT"]])){ numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL } if (!is.null(listArguments[["intval"]])){ - intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL + intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL } if (thefunction %in% c("xcmsSet","retcor")) { - ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL - bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL + ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL + bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL } #necessary to unzip .zip file uploaded to Galaxy @@ -104,67 +104,75 @@ if (!is.null(listArguments[["zipfile"]])){ - zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL + zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL } -if (!is.null(listArguments[["library"]])){ - directory=listArguments[["library"]]; listArguments[["library"]]=NULL - if(!file.exists(directory)){ - error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") - print(error_message) - stop(error_message) - } +if (!is.null(listArguments[["singlefile_galaxyPath"]])){ + singlefile_galaxyPaths = unlist(strsplit(listArguments[["singlefile_galaxyPath"]],",")); listArguments[["singlefile_galaxyPath"]]=NULL + singlefile_sampleNames = unlist(strsplit(listArguments[["singlefile_sampleName"]],",")); listArguments[["singlefile_sampleName"]]=NULL + + singlefile=NULL + for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { + singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] + singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] + singlefile[[singlefile_sampleName]] = singlefile_galaxyPath + } } # We unzip automatically the chromatograms from the zip files. if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { - if(exists("zipfile") && (zipfile!="")) { - if(!file.exists(zipfile)){ - error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") - print(error_message) - stop(error_message) - } + if(exists("singlefile") && (length("singlefile")>0)) { + for (singlefile_sampleName in names(singlefile)) { + singlefile_galaxyPath = singlefile[[singlefile_sampleName]] + if(!file.exists(singlefile_galaxyPath)){ + error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") + print(error_message); stop(error_message) + } - #list all file in the zip file - #zip_files=unzip(zipfile,list=T)[,"Name"] + file.symlink(singlefile_galaxyPath,singlefile_sampleName) + } + directory = "." - - #unzip - suppressWarnings(unzip(zipfile, unzip="unzip")) + md5sumList=list("origin"=getMd5sum(directory)) - #get the directory name - filesInZip=unzip(zipfile, list=T); - directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); - directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] - directory = "." - if (length(directories) == 1) directory = directories + } + if(exists("zipfile") && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } - cat("files_root_directory\t",directory,"\n") + #list all file in the zip file + #zip_files=unzip(zipfile,list=T)[,"Name"] - # - md5sumList=list("origin"=getMd5sum(directory)) + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) - # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. - # Remove because can create issue with some clean files - #@TODO: fix me - #if (deleteXmlBadCharacters(directory)) { - # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) - #} + #get the directory name + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories - } + cat("files_root_directory\t",directory,"\n") + + md5sumList=list("origin"=getMd5sum(directory)) + } } #addition of the directory to the list of arguments in the first position if (thefunction == "xcmsSet") { - checkXmlStructure(directory) - checkFilesCompatibilityWithXcms(directory) - listArguments=append(directory, listArguments) + checkXmlStructure(directory) + checkFilesCompatibilityWithXcms(directory) + listArguments=append(directory, listArguments) } #addition of xset object to the list of arguments in the first position if (exists("xset")){ - listArguments=append(list(xset), listArguments) + listArguments=append(list(xset), listArguments) } cat("\n\n") @@ -172,8 +180,6 @@ - - # ----- MAIN PROCESSING INFO ----- cat("\tMAIN PROCESSING INFO\n") @@ -181,12 +187,12 @@ #Verification of a group step before doing the fillpeaks job. if (thefunction == "fillPeaks") { - res=try(is.null(groupnames(xset))) - if (class(res) == "try-error"){ - error<-geterrmessage() - write(error, stderr()) - stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") - } + res=try(is.null(groupnames(xset))) + if (class(res) == "try-error"){ + error<-geterrmessage() + write(error, stderr()) + stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") + } } @@ -194,7 +200,7 @@ #dev.new(file="Rplots.pdf", width=16, height=12) pdf(file=rplotspdf, width=16, height=12) if (thefunction == "group") { - par(mfrow=c(2,2)) + par(mfrow=c(2,2)) } #else if (thefunction == "retcor") { #try to change the legend display @@ -215,40 +221,40 @@ if (thefunction == "xcmsSet") { - #transform the files absolute pathways into relative pathways - xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) + #transform the files absolute pathways into relative pathways + xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) - if(exists("zipfile") && (zipfile!="")) { + if(exists("zipfile") && (zipfile!="")) { - #Modify the samples names (erase the path) - for(i in 1:length(sampnames(xset))){ + #Modify the samples names (erase the path) + for(i in 1:length(sampnames(xset))){ - sample_name=unlist(strsplit(sampnames(xset)[i], "/")) - sample_name=sample_name[length(sample_name)] - sample_name= unlist(strsplit(sample_name,"[.]"))[1] - sampnames(xset)[i]=sample_name + sample_name=unlist(strsplit(sampnames(xset)[i], "/")) + sample_name=sample_name[length(sample_name)] + sample_name= unlist(strsplit(sample_name,"[.]"))[1] + sampnames(xset)[i]=sample_name + + } } - } - } # -- TIC -- if (thefunction == "xcmsSet") { - cat("\t\tGET TIC GRAPH\n") - sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") - getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) + cat("\t\tGET TIC GRAPH\n") + sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") + getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) } else if (thefunction == "retcor") { - cat("\t\tGET TIC GRAPH\n") - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") - getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) + cat("\t\tGET TIC GRAPH\n") + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") + getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) } if (thefunction == "fillPeaks") { - cat("\t\tGET THE PEAK LIST\n") - getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) + cat("\t\tGET THE PEAK LIST\n") + getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) } @@ -262,7 +268,7 @@ #saving R data in .Rdata file to save the variables used in the present tool -objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") +objects2save = c("xset","zipfile","singlefile","listOFlistArguments","md5sumList","sampleNamesList") save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) cat("\n\n")