Mercurial > repos > lecorguille > ipo
changeset 0:b2032600d98f draft
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
author | lecorguille |
---|---|
date | Tue, 24 May 2016 19:49:14 -0400 |
parents | |
children | 508ab373b524 |
files | README.rst ipo.r ipo4xcmsSet.xml lib.r macros.xml planemo_test.sh repository_dependencies.xml test-data/faahKO_IPO_parameters4xcmsSet.tsv test-data/faahKO_reduce.zip test-data/sacuri_2files.zip test-data/sacuri_2files_IPO_parameters4xcmsSet.tsv |
diffstat | 11 files changed, 550 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,14 @@ + +Changelog/News +-------------- + +**Version X.X.X - XX/XX/XXXX** + +- NEW: + +Test Status +----------- + +Planemo test using conda: failed on the faahKO_reduce.zip + +Planemo shed_test : unbuild
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ipo.r Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,134 @@ +#!/usr/bin/env Rscript +#Authors Gildas Le Corguille and Yann Guitton + + +# ----- LOG FILE ----- +log_file=file("log.txt", open = "wt") +sink(log_file) +sink(log_file, type = "output") + + +# ----- PACKAGE ----- +cat("\tPACKAGE INFO\n") +#pkgs=c("xcms","batch") +pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","rsm","igraph","CAMERA","IPO","batch") +for(pkg in pkgs) { + suppressWarnings( suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") +} +source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } +cat("\n\n"); + + + + + +# ----- ARGUMENTS ----- +cat("\tARGUMENTS INFO\n") +listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +write.table(as.matrix(listArguments), col.names=F, quote=F, sep='\t') + +cat("\n\n"); + + +# ----- ARGUMENTS PROCESSING ----- +cat("\tINFILE PROCESSING INFO\n") + + +#Import the different functions +source_local("lib.r") + +cat("\n\n") + +#Import the different functions + +# ----- PROCESSING INFILE ----- +cat("\tARGUMENTS PROCESSING INFO\n") + + +parametersOutput = "parametersOutput.tsv" +if (!is.null(listArguments[["parametersOutput"]])){ + parametersOutput = listArguments[["parametersOutput"]]; listArguments[["parametersOutput"]]=NULL +} + +samplebyclass = 2 +if (!is.null(listArguments[["samplebyclass"]])){ + samplebyclass = listArguments[["samplebyclass"]]; listArguments[["samplebyclass"]]=NULL +} + +#necessary to unzip .zip file uploaded to Galaxy +#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories + + +if (!is.null(listArguments[["zipfile"]])){ + zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL +} + +if (!is.null(listArguments[["library"]])){ + directory=listArguments[["library"]]; listArguments[["library"]]=NULL + if(!file.exists(directory)){ + error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") + print(error_message) + stop(error_message) + } +} + +# We unzip automatically the chromatograms from the zip files. +if(exists("zipfile") && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } + + #list all file in the zip file + #zip_files=unzip(zipfile,list=T)[,"Name"] + + + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) + + #get the directory name + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories + + cat("files_root_directory\t",directory,"\n") + + +} + +#addition of the directory to the list of arguments in the first position +checkXmlStructure(directory) +checkFilesCompatibilityWithXcms(directory) + +cat("\n\n") + + + + + + +# ----- MAIN PROCESSING INFO ----- +cat("\tMAIN PROCESSING INFO\n") + + +ipo4xcmsSet(directory, parametersOutput, listArguments, samplebyclass) + + + +cat("\n\n") + + +# ----- EXPORT ----- + +cat("\tEXPORTING INFO\n") + + +cat("\n\n") + + +cat("\tDONE\n") +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ipo4xcmsSet.xml Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,239 @@ +<tool id="ipo4xcmsSet" name="IPO for xcmsSet" version="2.0.8"> + + <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description> + + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="requirements"/> + <expand macro="stdio"/> + + <command><![CDATA[ + @COMMAND_SCRIPT@ + + #if $inputs.input == "zip_file": + zipfile $inputs.zip_file + #end if + + parametersOutput $parametersOutput + + samplebyclass $samplebyclass + + ## profmethod $profmethod + nSlaves \${GALAXY_SLOTS:-1} method $methods.method + #if $methods.method == "centWave": + ppm $methods.ppm + min_peakwidth "c($methods.min_peakwidth)" + max_peakwidth "c($methods.max_peakwidth)" + #elif $methods.method == "matchedFilter": + step $methods.step + fwhm $methods.fwhm + #end if + + @COMMAND_LOG_EXIT@ 2> /tmp/log.err + ]]></command> + + <inputs> + + <conditional name="inputs"> + <param name="input" type="select" label="Choose your inputs method" > + <option value="zip_file" selected="true">Zip file from your history containing your chromatograms</option> + </param> + <when value="zip_file"> + <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" /> + </when> + </conditional> + + <param name="samplebyclass" type="integer" value="2" label="Number of samples used per class to estimate the best parameters" help="Set to 0 to use the whole dataset. To save time, reduce this number" /> + + <conditional name="methods"> + <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below"> + <option value="centWave" >centWave</option> + <option value="matchedFilter" selected="true">matchedFilter</option> + </param> + + <!-- centWave Filter options --> + <when value="centWave"> + <param name="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="[ppm]" optional="false" /> + <param name="min_peakwidth" type="text" value="15,25" label="Min peak width range in seconds" help="[peakwidth]" optional="true" /> + <param name="max_peakwidth" type="text" value="45,55" label="Max peak width range in seconds" help="[peakwidth]" optional="true" /> + </when> + + <!-- matched Filter options --> + <when value="matchedFilter"> + <param name="step" type="float" value="0.01" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" optional="true" /> + <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" optional="true" /> + </when> + </conditional> + + + </inputs> + + <outputs> + <data name="parametersOutput" format="tabular" label="IPO_parameters4xcmsSet.tsv" /> + <data name="log" format="txt" label="ipo4xcmsSet.log.txt" /> + </outputs> + + <tests> + <test> + <param name="inputs|input" value="zip_file" /> + <param name="inputs|zip_file" value="faahKO_reduce.zip" ftype="zip" /> + <param name="methods|method" value="centWave" /> + <param name="methods|ppm" value="25" /> + <param name="methods|min_peakwidth" value="20,30" /> + <param name="methods|max_peakwidth" value="45,55" /> + <output name="parametersOutput" file="faahKO_IPO_parameters4xcmsSet.tsv" /> + </test> + <test> + <param name="inputs|input" value="zip_file" /> + <param name="inputs|zip_file" value="sacuri_2files.zip" ftype="zip" /> + <param name="methods|method" value="centWave" /> + <param name="methods|ppm" value="25" /> + <param name="methods|min_peakwidth" value="20,30" /> + <param name="methods|max_peakwidth" value="45,55" /> + <output name="parametersOutput" file="sacuri_2files_IPO_parameters4xcmsSet.tsv" /> + </test> + </tests> + + <help><![CDATA[ + +@HELP_AUTHORS@ + +=============== +IPO.ipo4xcmsSet +=============== + +----------- +Description +----------- + +A Tool for automated Optimization of XCMS Parameters + + +----------------- +Workflow position +----------------- + +**Upstream tools** + +========================= ================= ======= ========= +Name output file format parameter +========================= ================= ======= ========= +NA NA zip NA +========================= ================= ======= ========= + + +**Downstream tools** + ++---------------------------+----------------------+-----------------+ +| Name | Output file | Format | ++===========================+======================+=================+ +|xcms.xcmsSet | parametersOutput.tsv | Tabular | ++---------------------------+--------------------+-------------------+ + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Choose your inputs | zip | ++---------------------------+------------+ + +**Choose your inputs** + +You have two methods for your inputs: + + | Zip file (recommended): You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories). + | library folder: You must specify the name of your "library" (folder) created within your space project (for example: /projet/externe/institut/login/galaxylibrary/yourlibrary). Your library must contain all your conditions as sub-directories. + +Steps for creating the zip file +------------------------------- + +**Step1: Creating your directory and hierarchize the subdirectories** + + +VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip software (http://www.7-zip.org/), otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug). + +Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild): +arabidopsis/wild/01.raw +arabidopsis/mutant/01.raw + +**Step2: Creating a zip file** + +Create your zip file (e.g.: arabidopsis.zip). + +**Step 3 : Uploading it to our Galaxy server** + +If your zip file is less than 2Gb, you get use the Get Data tool to upload it. + +Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO on workflow4metabolomics.org (http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf). + +For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org). + +Advices for converting your files for the XCMS input +---------------------------------------------------- + +We recommend you to convert your raw files to **mzXML** in centroid mode (smaller files) and the files will be compatible with the xmcs centWave method. + +**We recommend you the following parameters:** + +Use Filtering: **True** + +Use Peak Picking: **True** + +Peak Peaking -Apply to MS Levels: **All Levels (1-)** : Centroid Mode + +Use zlib: **64** + +Binary Encoding: **64** + +m/z Encoding: **64** + +Intensity Encoding: **64** + + +---------- +Parameters +---------- + +Extraction method for peaks detection +------------------------------------- + +**Matched Filter** + + | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm. + | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002). + + +**cent Wave** + + | This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode. + | Due to the fact that peak centroids are used, a binning step is not necessary. + | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals. + + +------------ +Output files +------------ + +IPO_parameters4xcmsSet.tsv + + | Optimal parameters for xcmsSet + + +--------------------------------------------------- + +Changelog/News +-------------- + + + + ]]></help> + + <expand macro="citation" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib.r Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,96 @@ + +## +## This function launch IPO functions to get the best parameters for xcmsSet +## A sample among the whole dataset is used to save time +## +ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) { + setwd(directory) + files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" + files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" + + mzmlfile = files + if (samplebyclass > 0) { + #random selection of N files for IPO in each class + classes<-unique(basename(dirname(files))) + mzmlfile = NULL + for (class_i in classes){ + files_class_i = files[files_classes==class_i] + if (samplebyclass > length(files_class_i)) { + mzmlfile = c(mzmlfile, files_class_i) + } else { + mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass)) + } + } + } + #TODO: else, must we keep the RData to been use directly by group? + + cat("\t\tSamples used:\n") + print(mzmlfile) + + paramsPP <- getDefaultXcmsSetStartingParams(listArguments[["method"]]) #load default parameters of IPO + + #user defined new parameters + paramsPP$ppm <- listArguments[["ppm"]] + paramsPP$min_peakwidth <- listArguments[["min_peakwidth"]] + paramsPP$max_peakwidth <- listArguments[["max_peakwidth"]] + paramsPP$nSlaves <- listArguments[["nSlaves"]] + + #paramsPP$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement + resultPPpos <- optimizeXcmsSet(mzmlfile, paramsPP, subdir="IPO_results") #some images generated by IPO + write.table(resultPPpos$best_settings$parameters, file=parametersOutput, sep="\t",row.names=FALSE) #can be read by user + +} + + + + +## +## This function check if xcms will found all the files +## +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +checkFilesCompatibilityWithXcms <- function(directory) { + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) + + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + + } +} + + + +## +## This function check if XML contains special caracters. It also checks integrity and completness. +## +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +checkXmlStructure <- function (directory) { + cat("Checking XML structure...\n") + + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) + + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,50 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.1.2">R</requirement> + <requirement type="package" version="1.7.5">r-ipo</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1" level="fatal" /> + </stdio> + </xml> + + <token name="@COMMAND_SCRIPT@"> + LANG=C Rscript $__tool_directory__/ipo.r + </token> + + <token name="@COMMAND_LOG_EXIT@"> + ; + return=\$?; + mv log.txt $log; + cat $log; + sh -c "exit \$return" + </token> + + <token name="@HELP_AUTHORS@"> +.. class:: infomark + +**Authors** Gunnar Libiseller, Michaela Dvorzak, Ulrike Kleb, Edgar Gander, Tobias Eisenberg, Frank Madeo, Steffen Neumann, Gert Trausinger, Frank Sinner, Thomas Pieber and Christoph Magnes + +.. class:: infomark + +**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] + + | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. + +--------------------------------------------------- + + </token> + + + <xml name="citation"> + <citations> + <citation type="doi">10.1186/s12859-015-0562-8</citation> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo_test.sh Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,9 @@ +conda create -n r-ipo --use-local r-ipo r-batch +. ~/miniconda2/bin/activate r-ipo +planemo test --install_galaxy --galaxy_branch "dev" --update_test_data + + +# -- Use of conda dependencies +planemo conda_init --conda_prefix /tmp/mc +planemo conda_install --conda_prefix /tmp/mc . +planemo test --install_galaxy --conda_prefix /tmp/mc --conda_dependency_resolution --galaxy_branch "dev" --update_test_data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories> + <repository changeset_revision="5527da68412e" name="no_unzip_datatype" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/faahKO_IPO_parameters4xcmsSet.tsv Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,2 @@ +"min_peakwidth" "max_peakwidth" "mzdiff" "ppm" "snthresh" "noise" "prefilter" "value_of_prefilter" "mzCenterFun" "integrate" "fitgauss" "verbose.columns" "nSlaves" +6.6 56.5 0.01605 25 10 0 3 100 "wMean" 1 FALSE FALSE 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sacuri_2files_IPO_parameters4xcmsSet.tsv Tue May 24 19:49:14 2016 -0400 @@ -0,0 +1,2 @@ +"min_peakwidth" "max_peakwidth" "mzdiff" "ppm" "snthresh" "noise" "prefilter" "value_of_prefilter" "mzCenterFun" "integrate" "fitgauss" "verbose.columns" "nSlaves" +7.5 42.5 0.01715 25 10 0 3 100 "wMean" 1 FALSE FALSE 1