Mercurial > repos > lecorguille > xcms_group
changeset 4:fef3d1b8e7f4 draft
planemo upload commit a3229faad6949bbca965d1d7e138cb3c0550780e
| author | lecorguille | 
|---|---|
| date | Thu, 07 Apr 2016 16:01:08 -0400 | 
| parents | 003373d58672 | 
| children | b45f5c31643a | 
| files | README.rst abims_xcms_group.xml lib.r macros.xml planemo.sh planemo_test.sh repository_dependencies.xml test-data/log.txt test-data/xset.RData test-data/xset.group.RData test-data/xset.group.Rplots.pdf test-data/xset.group.retcor.RData test-data/xset.log.txt tool_dependencies.xml xcms.r | 
| diffstat | 15 files changed, 819 insertions(+), 104 deletions(-) [+] | 
line wrap: on
 line diff
--- a/README.rst Mon Feb 22 17:03:29 2016 -0500 +++ b/README.rst Thu Apr 07 16:01:08 2016 -0400 @@ -2,6 +2,11 @@ Changelog/News -------------- +**Version 1.1.4 04/04/2016** + +- TEST: refactoring to pass planemo test using conda dependencies + + **Version 2.0.4 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed @@ -17,3 +22,10 @@ - IMPROVEMENT: parameter labels have changed to facilitate their reading. + +Test Status +----------- + +Planemo test using conda: passed + +
--- a/abims_xcms_group.xml Mon Feb 22 17:03:29 2016 -0500 +++ b/abims_xcms_group.xml Thu Apr 07 16:01:08 2016 -0400 @@ -1,20 +1,16 @@ -<tool id="abims_xcms_group" name="xcms.group" version="2.0.4"> +<tool id="abims_xcms_group" name="xcms.group" version="2.0.5"> <description>Group peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.</description> - <requirements> - <requirement type="package" version="3.1.2">R</requirement> - <requirement type="binary">Rscript</requirement> - <requirement type="package" version="1.44.0">xcms</requirement> - <requirement type="package" version="2.2.0">xcms_w4m_script</requirement> - </requirements> + <macros> + <import>macros.xml</import> + </macros> - <stdio> - <exit_code range="1:" level="fatal" /> - </stdio> + <expand macro="requirements"/> + <expand macro="stdio"/> <command><![CDATA[ - xcms.r + @COMMAND_XCMS_SCRIPT@ xfunction group image $image @@ -41,12 +37,7 @@ rtCheck $methods.rtCheck kNN $methods.kNN #end if - ; - return=\$?; - mv log.txt $log; - cat $log; - sh -c "exit \$return" - + @COMMAND_LOG_EXIT@ ]]></command> <inputs> @@ -109,41 +100,47 @@ <tests> <test> <param name="image" value="xset.RData"/> - <param name="methods.method" value="density"/> - <param name="methods.bw" value="5"/> - <param name="methods.minfrac" value="0.3"/> - <param name="methods.mzwid" value="0.01"/> - <param name="methods.density_options.option" value="show"/> - <param name="methods.density_options.max" value="50"/> - <!--<output name="xsetRData" file="xset.group.RData" />--> - <!--<output name="rplotsPdf" file="xset.group.Rplots.pdf" />--> + <param name="methods|method" value="density"/> + <param name="methods|bw" value="5"/> + <param name="methods|minfrac" value="0.3"/> + <param name="methods|mzwid" value="0.01"/> + <param name="methods|density_options|option" value="show"/> + <param name="methods|density_options|max" value="50"/> <output name="log"> <assert_contents> - <has_text text="object with 9 samples" /> - <has_text text="Time range: 0.7-1140 seconds (0-19 minutes)" /> - <has_text text="Mass range: 50.0019-999.9863 m/z" /> - <has_text text="Peaks: 135846 (about 15094 per sample)" /> - <has_text text="Peak Groups: 6642" /> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 0.7-1139.7 seconds (0-19 minutes)" /> + <has_text text="Mass range: 50.0021-999.9863 m/z" /> + <has_text text="Peaks: 59359 (about 14840 per sample)" /> + <has_text text="Peak Groups: 48998" /> <has_text text="Sample classes: bio, blank" /> </assert_contents> </output> - + </test> + <test> + <param name="image" value="xset.group.retcor.RData"/> + <param name="methods|method" value="density"/> + <param name="methods|bw" value="5"/> + <param name="methods|minfrac" value="0.3"/> + <param name="methods|mzwid" value="0.01"/> + <param name="methods|density_options|option" value="show"/> + <param name="methods|density_options|max" value="50"/> + <output name="log"> + <assert_contents> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" /> + <has_text text="Mass range: 50.0021-999.9863 m/z" /> + <has_text text="Peaks: 59359 (about 14840 per sample)" /> + <has_text text="Peak Groups: 48958" /> + <has_text text="Sample classes: bio, blank" /> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ -.. class:: infomark - -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu - -**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M] - - | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. - - - ---------------------------------------------------- +@HELP_AUTHORS@ ========== Xcms.Group @@ -277,6 +274,11 @@ Changelog/News -------------- +**Version 2.0.5 - 04/04/2016** + +- TEST: refactoring to pass planemo test using conda dependencies + + **Version 2.0.4 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed @@ -296,10 +298,7 @@ ]]></help> - <citations> - <citation type="doi">10.1021/ac051437y</citation> - <citation type="doi">10.1093/bioinformatics/btu813</citation> - </citations> + <expand macro="citation" /> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib.r Thu Apr 07 16:01:08 2016 -0400 @@ -0,0 +1,400 @@ +# lib.r version="2.0.1" +#Authors ABiMS TEAM +#Lib.r for Galaxy Workflow4Metabo +#version 2.2 +#Based on lib.r 2.1 +#Modifications made by Guitton Yann +#correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet +#Note if scanrange is used a warning is prompted in R console but do not stop PDF generation + + + + +#@author Y. Guitton +getBPC <- function(file,rtcor=NULL, ...) { + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + #plotChrom(xcmsRaw(file), base=T) +} + +#@author Y. Guitton +getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { + cat("Creating BIC pdf...\n") + + if (is.null(xcmsSet)) { + cat("Enter an xcmsSet \n") + stop() + } else { + files <- filepaths(xcmsSet) + } + + class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) + } + + N <- dim(phenoData(xcmsSet))[1] + + TIC <- vector("list",N) + + + for (j in 1:N) { + + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] else + rtcor <- NULL + + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + # TIC[[j]][,1]<-rtcor + } + + + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + #print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(class)==2){ + k=1 + l=2 + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + + dev.off() #pdf(pdfname,w=16,h=10) + + invisible(TIC) +} + + + +#@author Y. Guitton +getTIC <- function(file,rtcor=NULL) { + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) +} + +## +## overlay TIC from all files in current folder or from xcmsSet, create pdf +## +#@author Y. Guitton +getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { + cat("Creating TIC pdf...\n") + + if (is.null(xcmsSet)) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + if (is.null(files)) + files <- getwd() + info <- file.info(files) + listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) + files <- c(files[!info$isdir], listed) + } else { + files <- filepaths(xcmsSet) + } + + class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) + } + + N <- length(files) + TIC <- vector("list",N) + + for (i in 1:N) { + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + #print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(class)==2){ + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + dev.off() #pdf(pdfname,w=16,h=10) + + invisible(TIC) +} + + + +## +## Get the polarities from all the samples of a condition +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { + cat("Creating the sampleMetadata file...\n") + + #Create the sampleMetada dataframe + sampleMetadata=xset@phenoData + sampleNamesOrigin=rownames(sampleMetadata) + sampleNamesMakeNames=make.names(sampleNamesOrigin) + + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } + + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } + } + + sampleMetadata$sampleMetadata=sampleNamesMakeNames + sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns + rownames(sampleMetadata)=NULL + + #Create a list of files name in the current directory + list_files=xset@filepaths + #For each sample file, the following actions are done + for (file in list_files){ + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(file)){ + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA + + #Create a simple xcmsRaw object for each sample + xcmsRaw=xcmsRaw(file) + #Extract the polarity (a list of polarities) + polarity=xcmsRaw@polarity + #Verify if all the scans have the same polarity + uniq_list=unique(polarity) + if (length(uniq_list)>1){ + polarity="mixed" + } else { + polarity=as.character(uniq_list) + } + #Transforms the character to obtain only the sample name + filename=basename(file) + library(tools) + samplename=file_path_sans_ext(filename) + + #Set the polarity attribute + sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity + + #Delete xcmsRaw object because it creates a bug for the fillpeaks step + rm(xcmsRaw) + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) + +} + + +## +## This function check if xcms will found all the files +## +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +checkFilesCompatibilityWithXcms <- function(directory) { + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) + + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + + } +} + + + +## +## This function check if XML contains special caracters. It also checks integrity and completness. +## +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +checkXmlStructure <- function (directory) { + cat("Checking XML structure...\n") + + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) + + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } + +} + + +## +## This function check if XML contain special characters +## +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +deleteXmlBadCharacters<- function (directory) { + cat("Checking Non ASCII characters in the XML...\n") + + processed=F + l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) + for (i in l){ + cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") + capture=suppressWarnings(system(cmd,intern=TRUE)) + if (length(capture)>0){ + cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c=system(cmd,intern=TRUE) + capture="" + processed=T + } + } + if (processed) cat("\n\n") + return(processed) +} + + +## +## This function will compute MD5 checksum to check the data integrity +## +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getMd5sum <- function (directory) { + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + + library(tools) + + #cat("\n\n") + + return(as.matrix(md5sum(files))) +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Apr 07 16:01:08 2016 -0400 @@ -0,0 +1,51 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.1.2">R</requirement> + <requirement type="package" version="0.4_1">r-snow</requirement> + <requirement type="package" version="1.44.0">bioconductor-xcms</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1" level="fatal" /> + </stdio> + </xml> + + <token name="@COMMAND_XCMS_SCRIPT@"> + LANG=C Rscript $__tool_directory__/xcms.r + </token> + + <token name="@COMMAND_LOG_EXIT@"> + ; + return=\$?; + mv log.txt $log; + cat $log; + sh -c "exit \$return" + </token> + + <token name="@HELP_AUTHORS@"> +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +.. class:: infomark + +**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] + + | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. + +--------------------------------------------------- + + </token> + + + <xml name="citation"> + <citations> + <citation type="doi">10.1021/ac051437y</citation> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> + </xml> +</macros>
--- a/planemo.sh Mon Feb 22 17:03:29 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -planemo shed_init -f --name=xcms_group --owner=lecorguille --description="[W4M][GC-MS] XCMS R Package - Preprocessing - Group peaks from different samples together" --homepage_url="http://workflow4metabolomics.org" --long_description="Part of the W4M project: http://workflow4metabolomics.org\n\nXCMS: http://www.bioconductor.org/packages/release/bioc/html/xcms.html\n\nGroup peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.\n\nBEWARE: this tool don't come with its script. You will need to install the dedicated package_xcms_w4m_script too" --category="Metabolomics"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo_test.sh Thu Apr 07 16:01:08 2016 -0400 @@ -0,0 +1,7 @@ +planemo conda_init +planemo conda_install . +planemo test --install_galaxy --conda_dependency_resolution + +#All 2 test(s) executed passed. +#abims_xcms_group[0]: passed +#abims_xcms_group[1]: passed
--- a/repository_dependencies.xml Mon Feb 22 17:03:29 2016 -0500 +++ b/repository_dependencies.xml Thu Apr 07 16:01:08 2016 -0400 @@ -1,4 +1,5 @@ <?xml version="1.0"?> <repositories> - <repository changeset_revision="bff835d58914" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="5527da68412e" name="no_unzip_datatype" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="bff835d58914" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </repositories>
--- a/test-data/log.txt Mon Feb 22 17:03:29 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ - PACKAGE INFO -parallel 3.1.2 -BiocGenerics 0.14.0 -Biobase 2.28.0 -Rcpp 0.12.0 -mzR 2.2.2 -xcms 1.44.0 -snow 0.3.13 -batch 1.1.4 - - - ARGUMENTS INFO -xfunction group -image test-data/xset.RData -xsetRdataOutput test-data/xset.group.RData -rplotspdf xset.group.Rplots.pdf -method density -sleep 0.001 -minfrac 0.5 -bw 30 -mzwid 0.25 - - - INFILE PROCESSING INFO - - - ARGUMENTS PROCESSING INFO - - - MAIN PROCESSING INFO -112 175 237 300 362 425 487 550 612 675 737 800 862 925 987 -null device - 1 - - - XSET OBJECT INFO -An "xcmsSet" object with 9 samples - -Time range: 0.7-1140 seconds (0-19 minutes) -Mass range: 50.0019-999.9863 m/z -Peaks: 135846 (about 15094 per sample) -Peak Groups: 6642 -Sample classes: bio, blank - -Profile settings: method = bin - step = 0.01 - -Memory usage: 15 MB - - - DONE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/xset.log.txt Thu Apr 07 16:01:08 2016 -0400 @@ -0,0 +1,54 @@ + PACKAGE INFO +parallel 3.2.2 +BiocGenerics 0.16.1 +Biobase 2.30.0 +Rcpp 0.12.2 +mzR 2.4.1 +xcms 1.44.0 +snow 0.4.1 +batch 1.1.4 + + + ARGUMENTS INFO +xfunction group +image /tmp/tmp2L11lo/files/000/dataset_1.dat +xsetRdataOutput /tmp/tmp2L11lo/files/000/dataset_2.dat +rplotspdf /tmp/tmp2L11lo/files/000/dataset_3.dat +method density +sleep 0.001 +minfrac 0.3 +bw 5 +mzwid 0.01 +max 50 + + + INFILE PROCESSING INFO + + + ARGUMENTS PROCESSING INFO + + + MAIN PROCESSING INFO +52 55 57 60 62 65 67 70 72 75 77 80 82 85 87 90 92 95 97 100 102 105 107 110 112 115 117 120 122 125 127 130 132 135 137 140 142 145 147 150 152 155 157 160 162 165 167 170 172 175 177 180 182 185 187 190 192 195 197 200 202 205 207 210 212 215 217 220 222 225 227 230 232 235 237 240 242 245 247 250 252 255 257 260 262 265 267 270 272 275 277 280 282 285 287 290 292 295 297 300 302 305 307 310 312 315 317 320 322 325 327 330 332 335 337 340 342 345 347 350 352 355 357 360 362 365 367 370 372 375 377 380 382 385 387 390 392 395 397 400 402 405 407 410 412 415 417 420 422 425 427 430 432 435 437 440 442 445 447 450 452 455 457 460 462 465 467 470 472 475 477 480 482 485 487 490 492 495 497 500 502 505 507 510 512 515 517 520 522 525 527 530 532 535 537 540 542 545 547 550 552 555 557 560 562 565 567 570 572 575 577 580 582 585 587 590 592 595 597 600 602 605 607 610 612 615 617 620 622 625 627 630 632 635 637 640 642 645 647 650 652 655 657 660 662 665 667 670 672 675 677 680 682 685 687 690 692 695 697 700 702 705 707 710 712 715 717 720 722 725 727 730 732 735 737 740 742 745 747 750 752 755 757 760 762 765 767 770 772 775 777 780 782 785 787 790 792 795 797 800 802 805 807 810 812 815 817 820 822 825 827 830 832 835 837 840 842 845 847 850 852 855 857 860 862 865 867 870 872 875 877 880 882 885 887 890 892 895 897 900 902 905 907 910 912 915 917 920 922 925 927 930 932 935 937 940 942 945 947 950 952 955 957 960 962 965 967 970 972 975 977 980 982 985 987 990 992 995 997 + + +null device + 1 + + + XSET OBJECT INFO +An "xcmsSet" object with 4 samples + +Time range: 0.7-1139.7 seconds (0-19 minutes) +Mass range: 50.0021-999.9863 m/z +Peaks: 59359 (about 14840 per sample) +Peak Groups: 48998 +Sample classes: bio, blank + +Profile settings: method = bin + step = 0.01 + +Memory usage: 12 MB + + + DONE
--- a/tool_dependencies.xml Mon Feb 22 17:03:29 2016 -0500 +++ b/tool_dependencies.xml Thu Apr 07 16:01:08 2016 -0400 @@ -3,10 +3,7 @@ <package name="R" version="3.1.2"> <repository changeset_revision="1ca39eb16186" name="package_r_3_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> - <package name="xcms" version="1.44.0"> - <repository changeset_revision="8ea252413ed6" name="package_r_xcms_1_44_0" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="xcms_w4m_script" version="2.2.0"> - <repository changeset_revision="f54521c7a50c" name="package_xcms_w4m_script_2_2_0" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <package name="bioconductor-xcms" version="1.44.0"> + <repository changeset_revision="0c38f7d43e08" name="package_bioconductor_xcms_1_44_0" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms.r Thu Apr 07 16:01:08 2016 -0400 @@ -0,0 +1,246 @@ +#!/usr/bin/env Rscript +# xcms.r version="2.2.0" +#Authors ABIMS TEAM +#BPC Addition from Y.guitton + + +# ----- LOG FILE ----- +log_file=file("log.txt", open = "wt") +sink(log_file) +sink(log_file, type = "output") + + +# ----- PACKAGE ----- +cat("\tPACKAGE INFO\n") +#pkgs=c("xcms","batch") +pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") +for(pkg in pkgs) { + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") +} +source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } +cat("\n\n"); + + + + + +# ----- ARGUMENTS ----- +cat("\tARGUMENTS INFO\n") +listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +write.table(as.matrix(listArguments), col.names=F, quote=F, sep='\t') + +cat("\n\n"); + + +# ----- ARGUMENTS PROCESSING ----- +cat("\tINFILE PROCESSING INFO\n") + +#image is an .RData file necessary to use xset variable given by previous tools +if (!is.null(listArguments[["image"]])){ + load(listArguments[["image"]]); listArguments[["image"]]=NULL +} + +#Import the different functions +source_local("lib.r") + +cat("\n\n") + +#Import the different functions + +# ----- PROCESSING INFILE ----- +cat("\tARGUMENTS PROCESSING INFO\n") + +# Save arguments to generate a report +if (!exists("listOFlistArguments")) listOFlistArguments=list() +listOFlistArguments[[paste(format(Sys.time(), "%y%m%d-%H:%M:%S_"),listArguments[["xfunction"]],sep="")]] = listArguments + + +#saving the commun parameters +thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments + +xsetRdataOutput = paste(thefunction,"RData",sep=".") +if (!is.null(listArguments[["xsetRdataOutput"]])){ + xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL +} + +rplotspdf = "Rplots.pdf" +if (!is.null(listArguments[["rplotspdf"]])){ + rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL +} + +sampleMetadataOutput = "sampleMetadata.tsv" +if (!is.null(listArguments[["sampleMetadataOutput"]])){ + sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL +} + + + + +if (thefunction %in% c("xcmsSet","retcor")) { + ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL + bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL +} + +#necessary to unzip .zip file uploaded to Galaxy +#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories + + +if (!is.null(listArguments[["zipfile"]])){ + zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL +} + +if (!is.null(listArguments[["library"]])){ + directory=listArguments[["library"]]; listArguments[["library"]]=NULL + if(!file.exists(directory)){ + error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") + print(error_message) + stop(error_message) + } +} + +# We unzip automatically the chromatograms from the zip files. +if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { + if(exists("zipfile") && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } + + #list all file in the zip file + #zip_files=unzip(zipfile,list=T)[,"Name"] + + + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) + + #get the directory name + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories + + cat("files_root_directory\t",directory,"\n") + + # + md5sumList=list("origin"=getMd5sum(directory)) + + # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. + # Remove because can create issue with some clean files + #@TODO: fix me + #if (deleteXmlBadCharacters(directory)) { + # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) + #} + + } +} + +#addition of the directory to the list of arguments in the first position +if (thefunction == "xcmsSet") { + checkXmlStructure(directory) + checkFilesCompatibilityWithXcms(directory) + listArguments=append(directory, listArguments) +} + + +#addition of xset object to the list of arguments in the first position +if (exists("xset")){ + listArguments=append(list(xset), listArguments) +} + +cat("\n\n") + + + + + + +# ----- MAIN PROCESSING INFO ----- +cat("\tMAIN PROCESSING INFO\n") + + +#Verification of a group step before doing the fillpeaks job. + +if (thefunction == "fillPeaks") { + res=try(is.null(groupnames(xset))) + if (class(res) == "try-error"){ + error<-geterrmessage() + write(error, stderr()) + stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") + } + +} + +#change the default display settings +#dev.new(file="Rplots.pdf", width=16, height=12) +pdf(file=rplotspdf, width=16, height=12) +if (thefunction == "group") { + par(mfrow=c(2,2)) +} +#else if (thefunction == "retcor") { +#try to change the legend display +# par(xpd=NA) +# par(xpd=T, mar=par()$mar+c(0,0,0,4)) +#} + + +#execution of the function "thefunction" with the parameters given in "listArguments" +xset = do.call(thefunction, listArguments) + + +cat("\n\n") + +dev.off() #dev.new(file="Rplots.pdf", width=16, height=12) + +if (thefunction == "xcmsSet") { + + #transform the files absolute pathways into relative pathways + xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) + + if(exists("zipfile") && (zipfile!="")) { + + #Modify the samples names (erase the path) + for(i in 1:length(sampnames(xset))){ + + sample_name=unlist(strsplit(sampnames(xset)[i], "/")) + sample_name=sample_name[length(sample_name)] + sample_name= unlist(strsplit(sample_name,"[.]"))[1] + sampnames(xset)[i]=sample_name + + } + + } + +} + +# -- TIC -- +if (thefunction == "xcmsSet") { + sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") + getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) +} else if (thefunction == "retcor") { + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") + getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) +} + +cat("\n\n") + + +# ----- EXPORT ----- + +cat("\tXSET OBJECT INFO\n") +print(xset) +#delete the parameters to avoid the passage to the next tool in .RData image + + +#saving R data in .Rdata file to save the variables used in the present tool +objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") +save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) + +cat("\n\n") + + +cat("\tDONE\n") +
