Mercurial > repos > lecorguille > xcms_fillpeaks
diff lib.r @ 16:5f61570177e2 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 22c4e92909198328fc7439ff47e4546a273eb907
| author | lecorguille | 
|---|---|
| date | Sun, 05 Feb 2017 08:57:59 -0500 | 
| parents | 1c78d61fd646 | 
| children | 65dc5e8b53f7 | 
line wrap: on
 line diff
--- a/lib.r Mon Jan 30 08:52:00 2017 -0500 +++ b/lib.r Sun Feb 05 08:57:59 2017 -0500 @@ -36,11 +36,11 @@ getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { groups <- xset@groups values <- groupval(xset, "medret", value=intval) - + # renamming of the column rtmed to rt to fit with camera peaklist function output colnames(groups)[colnames(groups)=="rtmed"] <- "rt" colnames(groups)[colnames(groups)=="mzmed"] <- "mz" - + ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) groups = RTSecondToMinute(groups, convertRTMinute) @@ -57,138 +57,139 @@ #@author Y. Guitton getBPC <- function(file,rtcor=NULL, ...) { - object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) - #plotChrom(xcmsRaw(file), base=T) + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + #plotChrom(xcmsRaw(file), base=T) } #@author Y. Guitton getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { - cat("Creating BIC pdf...\n") + cat("Creating BIC pdf...\n") - if (is.null(xcmsSet)) { - cat("Enter an xcmsSet \n") - stop() - } else { - files <- filepaths(xcmsSet) - } + if (is.null(xcmsSet)) { + cat("Enter an xcmsSet \n") + stop() + } else { + files <- filepaths(xcmsSet) + } - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- dim(phenoData(xcmsSet))[1] + N <- dim(phenoData(xcmsSet))[1] - TIC <- vector("list",N) + TIC <- vector("list",N) - for (j in 1:N) { + for (j in 1:N) { - TIC[[j]] <- getBPC(files[j]) - #good for raw - # seems strange for corrected - #errors if scanrange used in xcmsSetgeneration - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[j]] else - rtcor <- NULL + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] + else + rtcor <- NULL - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) - # TIC[[j]][,1]<-rtcor - } + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + # TIC[[j]][,1]<-rtcor + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in BPCs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start + ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(phenoDataClass)==2){ + k=1 + l=2 colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 + + }#end length ==2 - if (length(class)==2){ - k=1 - l=2 - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - - for (j in 1:length(classnames[[k]])) { + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==2 + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + dev.off() #pdf(pdfname,w=16,h=10) - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } #@author Y. Guitton getTIC <- function(file,rtcor=NULL) { - object <- xcmsRaw(file) - cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) } ## @@ -196,114 +197,112 @@ ## #@author Y. Guitton getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { - cat("Creating TIC pdf...\n") + cat("Creating TIC pdf...\n") - if (is.null(xcmsSet)) { - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") - if (is.null(files)) - files <- getwd() - info <- file.info(files) - listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) - files <- c(files[!info$isdir], listed) - } else { - files <- filepaths(xcmsSet) - } - - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + if (is.null(xcmsSet)) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + if (is.null(files)) + files <- getwd() + info <- file.info(files) + listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) + files <- c(files[!info$isdir], listed) + } else { + files <- filepaths(xcmsSet) + } - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- length(files) - TIC <- vector("list",N) + N <- length(files) + TIC <- vector("list",N) - for (i in 1:N) { - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[i]] else - rtcor <- NULL - TIC[[i]] <- getTIC(files[i],rtcor=rtcor) - } + for (i in 1:N) { + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in TICs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + ##plot start + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(phenoDataClass)==2){ + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 - if (length(class)==2){ - k=1 - l=2 + + }#end length ==2 - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) - }#end length ==2 - - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 + dev.off() #pdf(pdfname,w=16,h=10) - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } @@ -313,70 +312,70 @@ #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { - cat("Creating the sampleMetadata file...\n") - - #Create the sampleMetada dataframe - sampleMetadata=xset@phenoData - sampleNamesOrigin=rownames(sampleMetadata) - sampleNamesMakeNames=make.names(sampleNamesOrigin) + cat("Creating the sampleMetadata file...\n") - if (any(duplicated(sampleNamesMakeNames))) { - write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) - for (sampleName in sampleNamesOrigin) { - write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) - } - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - } - - if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { - cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") - for (sampleName in sampleNamesOrigin) { - cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) - } - } - - sampleMetadata$sampleMetadata=sampleNamesMakeNames - sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns - rownames(sampleMetadata)=NULL + #Create the sampleMetada dataframe + sampleMetadata=xset@phenoData + sampleNamesOrigin=rownames(sampleMetadata) + sampleNamesMakeNames=make.names(sampleNamesOrigin) - #Create a list of files name in the current directory - list_files=xset@filepaths - #For each sample file, the following actions are done - for (file in list_files){ - #Check if the file is in the CDF format - if (!mzR:::netCDFIsFile(file)){ - - # If the column isn't exist, with add one filled with NA - if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } - #Create a simple xcmsRaw object for each sample - xcmsRaw=xcmsRaw(file) - #Extract the polarity (a list of polarities) - polarity=xcmsRaw@polarity - #Verify if all the scans have the same polarity - uniq_list=unique(polarity) - if (length(uniq_list)>1){ - polarity="mixed" - } else { - polarity=as.character(uniq_list) - } - #Transforms the character to obtain only the sample name - filename=basename(file) - library(tools) - samplename=file_path_sans_ext(filename) - - #Set the polarity attribute - sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity - - #Delete xcmsRaw object because it creates a bug for the fillpeaks step - rm(xcmsRaw) + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } } - } + sampleMetadata$sampleMetadata=sampleNamesMakeNames + sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns + rownames(sampleMetadata)=NULL + + #Create a list of files name in the current directory + list_files=xset@filepaths + #For each sample file, the following actions are done + for (file in list_files){ + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(file)){ + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA - write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + #Create a simple xcmsRaw object for each sample + xcmsRaw=xcmsRaw(file) + #Extract the polarity (a list of polarities) + polarity=xcmsRaw@polarity + #Verify if all the scans have the same polarity + uniq_list=unique(polarity) + if (length(uniq_list)>1){ + polarity="mixed" + } else { + polarity=as.character(uniq_list) + } + #Transforms the character to obtain only the sample name + filename=basename(file) + library(tools) + samplename=file_path_sans_ext(filename) - return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) + #Set the polarity attribute + sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity + + #Delete xcmsRaw object because it creates a bug for the fillpeaks step + rm(xcmsRaw) + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) } @@ -386,29 +385,28 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM checkFilesCompatibilityWithXcms <- function(directory) { - cat("Checking files filenames compatibilities with xmcs...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - files_abs <- file.path(getwd(), files) - exists <- file.exists(files_abs) - files[exists] <- files_abs[exists] - files[exists] <- sub("//","/",files[exists]) + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) - # WHAT IS ON THE FILESYSTEM - filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) - filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] - # COMPARISON - if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { - write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) - write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - - } + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } } @@ -418,17 +416,17 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM checkXmlStructure <- function (directory) { - cat("Checking XML structure...\n") + cat("Checking XML structure...\n") - cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") - capture=system(cmd,intern=TRUE) + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) - if (length(capture)>0){ - #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) - write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) - write(capture, stderr()) - stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") - } + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } } @@ -438,23 +436,23 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM deleteXmlBadCharacters<- function (directory) { - cat("Checking Non ASCII characters in the XML...\n") + cat("Checking Non ASCII characters in the XML...\n") - processed=F - l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) - for (i in l){ - cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") - capture=suppressWarnings(system(cmd,intern=TRUE)) - if (length(capture)>0){ - cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) - print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) - c=system(cmd,intern=TRUE) - capture="" - processed=T + processed=F + l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) + for (i in l){ + cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") + capture=suppressWarnings(system(cmd,intern=TRUE)) + if (length(capture)>0){ + cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c=system(cmd,intern=TRUE) + capture="" + processed=T + } } - } - if (processed) cat("\n\n") - return(processed) + if (processed) cat("\n\n") + return(processed) } @@ -463,19 +461,19 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr getMd5sum <- function (directory) { - cat("Compute md5 checksum...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - exists <- file.exists(files) - files <- files[exists] + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] - library(tools) + library(tools) - #cat("\n\n") + #cat("\n\n") - return(as.matrix(md5sum(files))) + return(as.matrix(md5sum(files))) }
