Mercurial > repos > lecorguille > xcms_group
changeset 14:f4dc089f9d19 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 22c4e92909198328fc7439ff47e4546a273eb907
author | lecorguille |
---|---|
date | Sun, 05 Feb 2017 08:56:10 -0500 |
parents | 206e5a968dc5 |
children | fa9fe7d46ec3 |
files | README.rst abims_xcms_group.xml lib.r macros.xml planemo_test.sh test-data/MM-single.xset.merged.RData test-data/faahKO-single-class.xset.merged.RData test-data/faahKO-single.xset.merged.RData test-data/faahKO-single.xset.merged.group.retcor.RData xcms.r |
diffstat | 10 files changed, 525 insertions(+), 452 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Mon Jan 30 08:51:20 2017 -0500 +++ b/README.rst Sun Feb 05 08:56:10 2017 -0500 @@ -2,6 +2,10 @@ Changelog/News -------------- +**Version 2.1.0 - 03/02/2017** + +- IMPROVEMENT: xcms.group can deal with merged individual data from "xcms.xcmsSet Merger" + **Version 2.0.6 - 06/07/2016** - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 @@ -27,13 +31,3 @@ - IMPROVEMENT: parameter labels have changed to facilitate their reading. -Test Status ------------ - -Planemo test using conda: passed - -Planemo test using source env.sh: passed - -Planemo shed_test : passed - -
--- a/abims_xcms_group.xml Mon Jan 30 08:51:20 2017 -0500 +++ b/abims_xcms_group.xml Sun Feb 05 08:56:10 2017 -0500 @@ -1,4 +1,4 @@ -<tool id="abims_xcms_group" name="xcms.group" version="2.0.6"> +<tool id="abims_xcms_group" name="xcms.group" version="2.1.0"> <description>Group peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.</description> @@ -11,21 +11,21 @@ <command><![CDATA[ @COMMAND_XCMS_SCRIPT@ - xfunction group - image $image + xfunction group + image $image xsetRdataOutput $xsetRData rplotspdf $rplotsPdf - method $methods.method sleep 0.001 + method $methods.method sleep 0.001 #if $methods.method == "density": - ## minsamp $methods.minsamp - minfrac $methods.minfrac - bw $methods.bw + ## minsamp $methods.minsamp + minfrac $methods.minfrac + bw $methods.bw mzwid $methods.mzwid #if $methods.density_options.option == "show": max $methods.density_options.max - #end if + #end if #elif $methods.method == "mzClust": mzppm $methods.mzppm mzabs $methods.mzabs @@ -38,7 +38,7 @@ kNN $methods.kNN #end if @COMMAND_LOG_EXIT@ - ]]></command> + ]]></command> <inputs> <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata" label="xset RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" /> @@ -53,7 +53,7 @@ <param name="minfrac" type="float" value="0.5" label="Minimum fraction of samples necessary" help="[minfrac] in at least one of the sample groups for it to be a valid group" /> <param name="mzwid" type="float" value="0.25" label="Width of overlapping m/z slices" help="[mzwid] to use for creating peak density chromatograms and grouping peaks across samples " /> <!-- - <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " /> + <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " /> --> <conditional name="density_options"> <param name="option" type="select" label="Advanced options"> @@ -73,18 +73,18 @@ <param name="mzabs" type="float" value="0" label="Absolute error used for clustering/grouping" help="[mzabs]" /> <param name="minfrac" type="float" value="0" label="Minimum fraction of each class in one bin" help="[minfrac] minimum fraction of samples necessary in at least one of the sample groups for it to be a valid group" /> <!-- - <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " /> + <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " /> --> </when> <when value="nearest"> <param name="mzVsRTbalance" type="integer" value="10 " label="Multiplicator for mz value before calculating the (euclidean) distance between two peaks." help="[mzVsRTbalance]" /> <param name="mzCheck" type="float" value="0.2" label="Maximum tolerated distance for mz" help="[mzCheck]" /> <param name="rtCheck" type="integer" value="15" label="Maximum tolerated distance for RT" help="[rtCheck]" /> - <param name="kNN" type="integer" value="10" label="Number of nearest Neighbours to check" help="[kNN]" /> + <param name="kNN" type="integer" value="10" label="Number of nearest Neighbours to check" help="[kNN]" /> </when> </conditional> <!-- - <param name="sleepy" type="float" value="0.001" label="sleep" help="seconds to pause between plotting successive steps of the peak grouping algorithm. peaks are plotted as points showing relative intensity. identified groups are flanked by dotted vertical lines"> + <param name="sleepy" type="float" value="0.001" label="sleep" help="seconds to pause between plotting successive steps of the peak grouping algorithm. peaks are plotted as points showing relative intensity. identified groups are flanked by dotted vertical lines"> <validator type="in_range" message="Must be more than 0" min="0.001" max="inf"/> </param> --> @@ -136,6 +136,63 @@ </assert_contents> </output> </test> + <test> + <param name="image" value="faahKO-single-class.xset.merged.RData"/> + <param name="methods|method" value="density"/> + <param name="methods|bw" value="5"/> + <param name="methods|minfrac" value="0.3"/> + <param name="methods|mzwid" value="0.01"/> + <param name="methods|density_options|option" value="show"/> + <param name="methods|density_options|max" value="50"/> + <output name="log"> + <assert_contents> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> + <has_text text="Mass range: 200.1-600 m/z" /> + <has_text text="Peaks: 9251 (about 2313 per sample)" /> + <has_text text="Peak Groups: 8275" /> + <has_text text="Sample classes: KO, WT" /> + </assert_contents> + </output> + </test> + <test> + <param name="image" value="faahKO-single.xset.merged.RData"/> + <param name="methods|method" value="density"/> + <param name="methods|bw" value="5"/> + <param name="methods|minfrac" value="0.3"/> + <param name="methods|mzwid" value="0.01"/> + <param name="methods|density_options|option" value="show"/> + <param name="methods|density_options|max" value="50"/> + <output name="log"> + <assert_contents> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> + <has_text text="Mass range: 200.1-600 m/z" /> + <has_text text="Peaks: 9251 (about 2313 per sample)" /> + <has_text text="Peak Groups: 664" /> + <has_text text="Sample classes: ." /> + </assert_contents> + </output> + </test> + <test> + <param name="image" value="MM-single.xset.merged.RData"/> + <param name="methods|method" value="density"/> + <param name="methods|bw" value="5"/> + <param name="methods|minfrac" value="0.3"/> + <param name="methods|mzwid" value="0.01"/> + <param name="methods|density_options|option" value="show"/> + <param name="methods|density_options|max" value="50"/> + <output name="log"> + <assert_contents> + <has_text text="object with 2 samples" /> + <has_text text="Time range: 19.7-307.3 seconds (0.3-5.1 minutes)" /> + <has_text text="Mass range: 117.0357-936.7059 m/z" /> + <has_text text="Peaks: 236 (about 118 per sample)" /> + <has_text text="Peak Groups: 236" /> + <has_text text="Sample classes: ." /> + </assert_contents> + </output> + </test> <!--<test> <param name="image" value="xset.group.retcor.RData"/> <param name="methods|method" value="density"/> @@ -174,10 +231,29 @@ </assert_contents> </output> </test> + <test> + <param name="image" value="faahKO-single.xset.merged.group.retcor.RData"/> + <param name="methods|method" value="density"/> + <param name="methods|bw" value="5"/> + <param name="methods|minfrac" value="0.3"/> + <param name="methods|mzwid" value="0.01"/> + <param name="methods|density_options|option" value="show"/> + <param name="methods|density_options|max" value="50"/> + <output name="log"> + <assert_contents> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" /> + <has_text text="Mass range: 200.1-600 m/z" /> + <has_text text="Peaks: 9251 (about 2313 per sample)" /> + <has_text text="Peak Groups: 8157" /> + <has_text text="Sample classes: KO, WT" /> + </assert_contents> + </output> + </test> </tests> <help><![CDATA[ - + @HELP_AUTHORS@ ========== @@ -203,6 +279,8 @@ ========================= ================= =================== ========== xcms.xcmsSet xset.RData rdata.xcms.raw RData file ------------------------- ----------------- ------------------- ---------- +xcms.xcmsSet Merger xset.RData rdata.xcms.raw RData file +------------------------- ----------------- ------------------- ---------- xcms.retcor xset.RData rdata.xcms.retcor RData file ========================= ================= =================== ========== @@ -231,7 +309,9 @@ +---------------------------+-----------------------+ | Parameter : num + label | Format | +===========================+=======================+ -| 1 : RData file | rdata.xcms.group | +| Or : RData file | rdata.xcms.raw | ++---------------------------+-----------------------+ +| Or : RData file | rdata.xcms.retcor | +---------------------------+-----------------------+ @@ -264,15 +344,15 @@ xset.group.RData: rdata.xcms.group format | Rdata file that will be necessary in the third and fourth step of the workflow (xcms.retcor and xcms.fillpeaks). - + ------ -.. class:: infomark +.. class:: infomark The output file is an xset.group.RData file. You can continue your analysis using it in **xcms.retcor** tool. - + --------------------------------------------------- @@ -312,13 +392,17 @@ Changelog/News -------------- +**Version 2.1.0 - 03/02/2017** + +- IMPROVEMENT: xcms.group can deal with merged individual data from "xcms.xcmsSet Merger" + **Version 2.0.6 - 06/07/2016** - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 **Version 2.0.5 - 04/04/2016** -- TEST: refactoring to pass planemo test using conda dependencies +- TEST: refactoring to pass planemo test using conda dependencies **Version 2.0.4 - 10/02/2016**
--- a/lib.r Mon Jan 30 08:51:20 2017 -0500 +++ b/lib.r Sun Feb 05 08:56:10 2017 -0500 @@ -36,11 +36,11 @@ getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { groups <- xset@groups values <- groupval(xset, "medret", value=intval) - + # renamming of the column rtmed to rt to fit with camera peaklist function output colnames(groups)[colnames(groups)=="rtmed"] <- "rt" colnames(groups)[colnames(groups)=="mzmed"] <- "mz" - + ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) groups = RTSecondToMinute(groups, convertRTMinute) @@ -57,138 +57,139 @@ #@author Y. Guitton getBPC <- function(file,rtcor=NULL, ...) { - object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) - #plotChrom(xcmsRaw(file), base=T) + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + #plotChrom(xcmsRaw(file), base=T) } #@author Y. Guitton getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { - cat("Creating BIC pdf...\n") + cat("Creating BIC pdf...\n") - if (is.null(xcmsSet)) { - cat("Enter an xcmsSet \n") - stop() - } else { - files <- filepaths(xcmsSet) - } + if (is.null(xcmsSet)) { + cat("Enter an xcmsSet \n") + stop() + } else { + files <- filepaths(xcmsSet) + } - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- dim(phenoData(xcmsSet))[1] + N <- dim(phenoData(xcmsSet))[1] - TIC <- vector("list",N) + TIC <- vector("list",N) - for (j in 1:N) { + for (j in 1:N) { - TIC[[j]] <- getBPC(files[j]) - #good for raw - # seems strange for corrected - #errors if scanrange used in xcmsSetgeneration - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[j]] else - rtcor <- NULL + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] + else + rtcor <- NULL - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) - # TIC[[j]][,1]<-rtcor - } + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + # TIC[[j]][,1]<-rtcor + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in BPCs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start + ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(phenoDataClass)==2){ + k=1 + l=2 colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 + + }#end length ==2 - if (length(class)==2){ - k=1 - l=2 - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - - for (j in 1:length(classnames[[k]])) { + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==2 + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + dev.off() #pdf(pdfname,w=16,h=10) - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } #@author Y. Guitton getTIC <- function(file,rtcor=NULL) { - object <- xcmsRaw(file) - cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) } ## @@ -196,114 +197,112 @@ ## #@author Y. Guitton getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { - cat("Creating TIC pdf...\n") + cat("Creating TIC pdf...\n") - if (is.null(xcmsSet)) { - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") - if (is.null(files)) - files <- getwd() - info <- file.info(files) - listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) - files <- c(files[!info$isdir], listed) - } else { - files <- filepaths(xcmsSet) - } - - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + if (is.null(xcmsSet)) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + if (is.null(files)) + files <- getwd() + info <- file.info(files) + listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) + files <- c(files[!info$isdir], listed) + } else { + files <- filepaths(xcmsSet) + } - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- length(files) - TIC <- vector("list",N) + N <- length(files) + TIC <- vector("list",N) - for (i in 1:N) { - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[i]] else - rtcor <- NULL - TIC[[i]] <- getTIC(files[i],rtcor=rtcor) - } + for (i in 1:N) { + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in TICs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + ##plot start + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(phenoDataClass)==2){ + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 - if (length(class)==2){ - k=1 - l=2 + + }#end length ==2 - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) - }#end length ==2 - - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 + dev.off() #pdf(pdfname,w=16,h=10) - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } @@ -313,70 +312,70 @@ #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { - cat("Creating the sampleMetadata file...\n") - - #Create the sampleMetada dataframe - sampleMetadata=xset@phenoData - sampleNamesOrigin=rownames(sampleMetadata) - sampleNamesMakeNames=make.names(sampleNamesOrigin) + cat("Creating the sampleMetadata file...\n") - if (any(duplicated(sampleNamesMakeNames))) { - write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) - for (sampleName in sampleNamesOrigin) { - write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) - } - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - } - - if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { - cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") - for (sampleName in sampleNamesOrigin) { - cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) - } - } - - sampleMetadata$sampleMetadata=sampleNamesMakeNames - sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns - rownames(sampleMetadata)=NULL + #Create the sampleMetada dataframe + sampleMetadata=xset@phenoData + sampleNamesOrigin=rownames(sampleMetadata) + sampleNamesMakeNames=make.names(sampleNamesOrigin) - #Create a list of files name in the current directory - list_files=xset@filepaths - #For each sample file, the following actions are done - for (file in list_files){ - #Check if the file is in the CDF format - if (!mzR:::netCDFIsFile(file)){ - - # If the column isn't exist, with add one filled with NA - if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } - #Create a simple xcmsRaw object for each sample - xcmsRaw=xcmsRaw(file) - #Extract the polarity (a list of polarities) - polarity=xcmsRaw@polarity - #Verify if all the scans have the same polarity - uniq_list=unique(polarity) - if (length(uniq_list)>1){ - polarity="mixed" - } else { - polarity=as.character(uniq_list) - } - #Transforms the character to obtain only the sample name - filename=basename(file) - library(tools) - samplename=file_path_sans_ext(filename) - - #Set the polarity attribute - sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity - - #Delete xcmsRaw object because it creates a bug for the fillpeaks step - rm(xcmsRaw) + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } } - } + sampleMetadata$sampleMetadata=sampleNamesMakeNames + sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns + rownames(sampleMetadata)=NULL + + #Create a list of files name in the current directory + list_files=xset@filepaths + #For each sample file, the following actions are done + for (file in list_files){ + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(file)){ + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA - write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + #Create a simple xcmsRaw object for each sample + xcmsRaw=xcmsRaw(file) + #Extract the polarity (a list of polarities) + polarity=xcmsRaw@polarity + #Verify if all the scans have the same polarity + uniq_list=unique(polarity) + if (length(uniq_list)>1){ + polarity="mixed" + } else { + polarity=as.character(uniq_list) + } + #Transforms the character to obtain only the sample name + filename=basename(file) + library(tools) + samplename=file_path_sans_ext(filename) - return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) + #Set the polarity attribute + sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity + + #Delete xcmsRaw object because it creates a bug for the fillpeaks step + rm(xcmsRaw) + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) } @@ -386,29 +385,28 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM checkFilesCompatibilityWithXcms <- function(directory) { - cat("Checking files filenames compatibilities with xmcs...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - files_abs <- file.path(getwd(), files) - exists <- file.exists(files_abs) - files[exists] <- files_abs[exists] - files[exists] <- sub("//","/",files[exists]) + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) - # WHAT IS ON THE FILESYSTEM - filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) - filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] - # COMPARISON - if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { - write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) - write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - - } + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } } @@ -418,17 +416,17 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM checkXmlStructure <- function (directory) { - cat("Checking XML structure...\n") + cat("Checking XML structure...\n") - cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") - capture=system(cmd,intern=TRUE) + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) - if (length(capture)>0){ - #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) - write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) - write(capture, stderr()) - stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") - } + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } } @@ -438,23 +436,23 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM deleteXmlBadCharacters<- function (directory) { - cat("Checking Non ASCII characters in the XML...\n") + cat("Checking Non ASCII characters in the XML...\n") - processed=F - l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) - for (i in l){ - cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") - capture=suppressWarnings(system(cmd,intern=TRUE)) - if (length(capture)>0){ - cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) - print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) - c=system(cmd,intern=TRUE) - capture="" - processed=T + processed=F + l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) + for (i in l){ + cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") + capture=suppressWarnings(system(cmd,intern=TRUE)) + if (length(capture)>0){ + cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c=system(cmd,intern=TRUE) + capture="" + processed=T + } } - } - if (processed) cat("\n\n") - return(processed) + if (processed) cat("\n\n") + return(processed) } @@ -463,19 +461,19 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr getMd5sum <- function (directory) { - cat("Compute md5 checksum...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - exists <- file.exists(files) - files <- files[exists] + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] - library(tools) + library(tools) - #cat("\n\n") + #cat("\n\n") - return(as.matrix(md5sum(files))) + return(as.matrix(md5sum(files))) }
--- a/macros.xml Mon Jan 30 08:51:20 2017 -0500 +++ b/macros.xml Sun Feb 05 08:56:10 2017 -0500 @@ -7,6 +7,11 @@ <requirement type="package" version="1.1_4">r-batch</requirement> </requirements> </xml> + <xml name="requirements_light"> + <requirements> + <requirement type="package" version="1.46.0">bioconductor-xcms</requirement> + </requirements> + </xml> <xml name="stdio"> <stdio> <exit_code range="1" level="fatal" /> @@ -27,24 +32,40 @@ <!-- zipfile load for planemo test --> - <token name="@COMMAND_ZIPFILE_LOAD@"> - #if $zipfile_load_conditional.zipfile_load_select == "yes": - #if $zipfile_load_conditional.zip_file: - zipfile $zipfile_load_conditional.zip_file + <token name="@COMMAND_FILE_LOAD@"> + #if $file_load_conditional.file_load_select == "yes": + #if $file_load_conditional.inputs.input == "zip_file": + zipfile $file_load_conditional.inputs.zip_file + #else + #set singlefile_galaxyPath = ','.join( [ str( $single_file ) for $single_file in $file_load_conditional.inputs.single_file ] ) + #set singlefile_sampleName = ','.join( [ str( $single_file.name ) for $single_file in $file_load_conditional.inputs.single_file ] ) + + singlefile_galaxyPath $singlefile_galaxyPath singlefile_sampleName $singlefile_sampleName #end if #end if </token> - <xml name="zipfile_load"> - <conditional name="zipfile_load_conditional"> - <param name="zipfile_load_select" type="select" label="Resubmit your zip file" help="Use only if you get a message which say that your original zip file have been deleted on the server." > + <xml name="file_load"> + <conditional name="file_load_conditional"> + <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > <option value="no" >no need</option> - <option value="yes">yes</option> + <option value="yes" >yes</option> </param> <when value="no"> </when> <when value="yes"> - <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" /> + <conditional name="inputs"> + <param name="input" type="select" label="Choose your inputs method" > + <option value="zip_file" selected="true">Zip file from your history containing your chromatograms</option> + <option value="single_file">A mzXML or netCDF file from your history</option> + </param> + <when value="zip_file"> + <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" /> + </when> + <when value="single_file"> + <param name="single_file" type="data" format="mzxml,netcdf" label="Single file" multiple="true"/> + </when> + </conditional> </when> </conditional> </xml> @@ -54,7 +75,7 @@ <token name="@HELP_AUTHORS@"> .. class:: infomark -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu .. class:: infomark
--- a/planemo_test.sh Mon Jan 30 08:51:20 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -# Example of planemo command to launch test - - -# -- Use of installed package environments -# after having installing package on a local galaxy instance -source /w/galaxy/dev/shed_tools_tool_dependency_dir/R/3.1.2/iuc/package_r_3_1_2/1ca39eb16186/env.sh -source /w/galaxy/dev/shed_tools_tool_dependency_dir/bioconductor-xcms/1.44.0/lecorguille/package_bioconductor_xcms_1_44_0/0c38f7d43e08/env.sh -planemo test --install_galaxy - -#All 2 test(s) executed passed. -#abims_xcms_group[0]: passed -#abims_xcms_group[1]: passed - - -# -- Use of conda dependencies -planemo conda_init --conda_prefix /tmp/mc -planemo conda_install --conda_prefix /tmp/mc . -planemo test --install_galaxy --conda_prefix /tmp/mc --conda_dependency_resolution - -#All 2 test(s) executed passed. -#abims_xcms_group[0]: passed -#abims_xcms_group[1]: passed - - -# -- Use of shed_test -planemo shed_test --install_galaxy -t testtoolshed - -#All 2 test(s) executed passed. -#testtoolshed.g2.bx.psu.edu/repos/lecorguille/xcms_group/abims_xcms_group/2.0.5[0]: passed -#testtoolshed.g2.bx.psu.edu/repos/lecorguille/xcms_group/abims_xcms_group/2.0.5[1]: passed
--- a/xcms.r Mon Jan 30 08:51:20 2017 -0500 +++ b/xcms.r Sun Feb 05 08:56:10 2017 -0500 @@ -15,8 +15,8 @@ #pkgs=c("xcms","batch") pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") for(pkg in pkgs) { - suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) - cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") } source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } cat("\n\n"); @@ -38,7 +38,7 @@ #image is an .RData file necessary to use xset variable given by previous tools if (!is.null(listArguments[["image"]])){ - load(listArguments[["image"]]); listArguments[["image"]]=NULL + load(listArguments[["image"]]); listArguments[["image"]]=NULL } #Import the different functions @@ -61,13 +61,13 @@ xsetRdataOutput = paste(thefunction,"RData",sep=".") if (!is.null(listArguments[["xsetRdataOutput"]])){ - xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL + xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL } #saving the specific parameters rplotspdf = "Rplots.pdf" if (!is.null(listArguments[["rplotspdf"]])){ - rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL + rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL } sampleMetadataOutput = "sampleMetadata.tsv" if (!is.null(listArguments[["sampleMetadataOutput"]])){ @@ -75,28 +75,28 @@ } variableMetadataOutput = "variableMetadata.tsv" if (!is.null(listArguments[["variableMetadataOutput"]])){ - variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL + variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL } dataMatrixOutput = "dataMatrix.tsv" if (!is.null(listArguments[["dataMatrixOutput"]])){ - dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL + dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL } if (!is.null(listArguments[["convertRTMinute"]])){ - convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL + convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL } if (!is.null(listArguments[["numDigitsMZ"]])){ - numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL + numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL } if (!is.null(listArguments[["numDigitsRT"]])){ numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL } if (!is.null(listArguments[["intval"]])){ - intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL + intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL } if (thefunction %in% c("xcmsSet","retcor")) { - ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL - bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL + ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL + bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL } #necessary to unzip .zip file uploaded to Galaxy @@ -104,67 +104,75 @@ if (!is.null(listArguments[["zipfile"]])){ - zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL + zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL } -if (!is.null(listArguments[["library"]])){ - directory=listArguments[["library"]]; listArguments[["library"]]=NULL - if(!file.exists(directory)){ - error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") - print(error_message) - stop(error_message) - } +if (!is.null(listArguments[["singlefile_galaxyPath"]])){ + singlefile_galaxyPaths = unlist(strsplit(listArguments[["singlefile_galaxyPath"]],",")); listArguments[["singlefile_galaxyPath"]]=NULL + singlefile_sampleNames = unlist(strsplit(listArguments[["singlefile_sampleName"]],",")); listArguments[["singlefile_sampleName"]]=NULL + + singlefile=NULL + for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { + singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] + singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] + singlefile[[singlefile_sampleName]] = singlefile_galaxyPath + } } # We unzip automatically the chromatograms from the zip files. if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { - if(exists("zipfile") && (zipfile!="")) { - if(!file.exists(zipfile)){ - error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") - print(error_message) - stop(error_message) - } + if(exists("singlefile") && (length("singlefile")>0)) { + for (singlefile_sampleName in names(singlefile)) { + singlefile_galaxyPath = singlefile[[singlefile_sampleName]] + if(!file.exists(singlefile_galaxyPath)){ + error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") + print(error_message); stop(error_message) + } - #list all file in the zip file - #zip_files=unzip(zipfile,list=T)[,"Name"] + file.symlink(singlefile_galaxyPath,singlefile_sampleName) + } + directory = "." - - #unzip - suppressWarnings(unzip(zipfile, unzip="unzip")) + md5sumList=list("origin"=getMd5sum(directory)) - #get the directory name - filesInZip=unzip(zipfile, list=T); - directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); - directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] - directory = "." - if (length(directories) == 1) directory = directories + } + if(exists("zipfile") && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } - cat("files_root_directory\t",directory,"\n") + #list all file in the zip file + #zip_files=unzip(zipfile,list=T)[,"Name"] - # - md5sumList=list("origin"=getMd5sum(directory)) + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) - # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. - # Remove because can create issue with some clean files - #@TODO: fix me - #if (deleteXmlBadCharacters(directory)) { - # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) - #} + #get the directory name + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories - } + cat("files_root_directory\t",directory,"\n") + + md5sumList=list("origin"=getMd5sum(directory)) + } } #addition of the directory to the list of arguments in the first position if (thefunction == "xcmsSet") { - checkXmlStructure(directory) - checkFilesCompatibilityWithXcms(directory) - listArguments=append(directory, listArguments) + checkXmlStructure(directory) + checkFilesCompatibilityWithXcms(directory) + listArguments=append(directory, listArguments) } #addition of xset object to the list of arguments in the first position if (exists("xset")){ - listArguments=append(list(xset), listArguments) + listArguments=append(list(xset), listArguments) } cat("\n\n") @@ -172,8 +180,6 @@ - - # ----- MAIN PROCESSING INFO ----- cat("\tMAIN PROCESSING INFO\n") @@ -181,12 +187,12 @@ #Verification of a group step before doing the fillpeaks job. if (thefunction == "fillPeaks") { - res=try(is.null(groupnames(xset))) - if (class(res) == "try-error"){ - error<-geterrmessage() - write(error, stderr()) - stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") - } + res=try(is.null(groupnames(xset))) + if (class(res) == "try-error"){ + error<-geterrmessage() + write(error, stderr()) + stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") + } } @@ -194,7 +200,7 @@ #dev.new(file="Rplots.pdf", width=16, height=12) pdf(file=rplotspdf, width=16, height=12) if (thefunction == "group") { - par(mfrow=c(2,2)) + par(mfrow=c(2,2)) } #else if (thefunction == "retcor") { #try to change the legend display @@ -215,40 +221,40 @@ if (thefunction == "xcmsSet") { - #transform the files absolute pathways into relative pathways - xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) + #transform the files absolute pathways into relative pathways + xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) - if(exists("zipfile") && (zipfile!="")) { + if(exists("zipfile") && (zipfile!="")) { - #Modify the samples names (erase the path) - for(i in 1:length(sampnames(xset))){ + #Modify the samples names (erase the path) + for(i in 1:length(sampnames(xset))){ - sample_name=unlist(strsplit(sampnames(xset)[i], "/")) - sample_name=sample_name[length(sample_name)] - sample_name= unlist(strsplit(sample_name,"[.]"))[1] - sampnames(xset)[i]=sample_name + sample_name=unlist(strsplit(sampnames(xset)[i], "/")) + sample_name=sample_name[length(sample_name)] + sample_name= unlist(strsplit(sample_name,"[.]"))[1] + sampnames(xset)[i]=sample_name + + } } - } - } # -- TIC -- if (thefunction == "xcmsSet") { - cat("\t\tGET TIC GRAPH\n") - sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") - getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) + cat("\t\tGET TIC GRAPH\n") + sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") + getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) } else if (thefunction == "retcor") { - cat("\t\tGET TIC GRAPH\n") - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") - getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) + cat("\t\tGET TIC GRAPH\n") + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") + getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) } if (thefunction == "fillPeaks") { - cat("\t\tGET THE PEAK LIST\n") - getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) + cat("\t\tGET THE PEAK LIST\n") + getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) } @@ -262,7 +268,7 @@ #saving R data in .Rdata file to save the variables used in the present tool -objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") +objects2save = c("xset","zipfile","singlefile","listOFlistArguments","md5sumList","sampleNamesList") save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) cat("\n\n")