changeset 0:b2032600d98f draft

planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
author lecorguille
date Tue, 24 May 2016 19:49:14 -0400
parents
children 508ab373b524
files README.rst ipo.r ipo4xcmsSet.xml lib.r macros.xml planemo_test.sh repository_dependencies.xml test-data/faahKO_IPO_parameters4xcmsSet.tsv test-data/faahKO_reduce.zip test-data/sacuri_2files.zip test-data/sacuri_2files_IPO_parameters4xcmsSet.tsv
diffstat 11 files changed, 550 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,14 @@
+
+Changelog/News
+--------------
+
+**Version X.X.X - XX/XX/XXXX**
+
+- NEW: 
+
+Test Status
+-----------
+
+Planemo test using conda: failed on the faahKO_reduce.zip
+
+Planemo shed_test : unbuild
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ipo.r	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,134 @@
+#!/usr/bin/env Rscript
+#Authors Gildas Le Corguille and Yann Guitton
+
+
+# ----- LOG FILE -----
+log_file=file("log.txt", open = "wt")
+sink(log_file)
+sink(log_file, type = "output")
+
+
+# ----- PACKAGE -----
+cat("\tPACKAGE INFO\n")
+#pkgs=c("xcms","batch")
+pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","rsm","igraph","CAMERA","IPO","batch")
+for(pkg in pkgs) {
+  suppressWarnings( suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))))
+  cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="")
+}
+source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
+cat("\n\n"); 
+
+
+
+
+
+# ----- ARGUMENTS -----
+cat("\tARGUMENTS INFO\n")
+listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
+write.table(as.matrix(listArguments), col.names=F, quote=F, sep='\t')
+
+cat("\n\n");
+
+
+# ----- ARGUMENTS PROCESSING -----
+cat("\tINFILE PROCESSING INFO\n")
+
+
+#Import the different functions
+source_local("lib.r")
+
+cat("\n\n")
+
+#Import the different functions
+
+# ----- PROCESSING INFILE -----
+cat("\tARGUMENTS PROCESSING INFO\n")
+
+
+parametersOutput = "parametersOutput.tsv"
+if (!is.null(listArguments[["parametersOutput"]])){
+  parametersOutput = listArguments[["parametersOutput"]]; listArguments[["parametersOutput"]]=NULL
+}
+
+samplebyclass = 2
+if (!is.null(listArguments[["samplebyclass"]])){
+  samplebyclass = listArguments[["samplebyclass"]]; listArguments[["samplebyclass"]]=NULL
+}
+
+#necessary to unzip .zip file uploaded to Galaxy
+#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories
+
+
+if (!is.null(listArguments[["zipfile"]])){
+  zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL
+}
+
+if (!is.null(listArguments[["library"]])){
+  directory=listArguments[["library"]]; listArguments[["library"]]=NULL
+  if(!file.exists(directory)){
+    error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.")
+    print(error_message)
+    stop(error_message)
+  }
+}
+
+# We unzip automatically the chromatograms from the zip files.
+if(exists("zipfile") && (zipfile!="")) {
+    if(!file.exists(zipfile)){
+        error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
+        print(error_message)
+        stop(error_message)
+    }
+
+    #list all file in the zip file
+    #zip_files=unzip(zipfile,list=T)[,"Name"]
+
+
+    #unzip
+    suppressWarnings(unzip(zipfile, unzip="unzip"))
+
+    #get the directory name
+    filesInZip=unzip(zipfile, list=T); 
+    directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
+    directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
+    directory = "."
+    if (length(directories) == 1) directory = directories
+
+    cat("files_root_directory\t",directory,"\n")
+
+
+}
+
+#addition of the directory to the list of arguments in the first position
+checkXmlStructure(directory)
+checkFilesCompatibilityWithXcms(directory)
+
+cat("\n\n")
+
+
+
+
+
+
+# ----- MAIN PROCESSING INFO -----
+cat("\tMAIN PROCESSING INFO\n")
+
+
+ipo4xcmsSet(directory, parametersOutput, listArguments, samplebyclass)
+
+
+
+cat("\n\n")
+
+
+# ----- EXPORT -----
+
+cat("\tEXPORTING INFO\n")
+
+
+cat("\n\n")
+
+
+cat("\tDONE\n")
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ipo4xcmsSet.xml	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,239 @@
+<tool id="ipo4xcmsSet" name="IPO for xcmsSet" version="2.0.8">
+    
+    <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    
+    <command><![CDATA[
+        @COMMAND_SCRIPT@
+
+        #if $inputs.input == "zip_file":
+            zipfile $inputs.zip_file
+        #end if
+
+        parametersOutput $parametersOutput
+
+        samplebyclass $samplebyclass
+
+        ## profmethod $profmethod 
+        nSlaves \${GALAXY_SLOTS:-1} method $methods.method 
+        #if $methods.method == "centWave":
+            ppm $methods.ppm
+            min_peakwidth "c($methods.min_peakwidth)"
+            max_peakwidth "c($methods.max_peakwidth)"
+        #elif $methods.method == "matchedFilter":
+            step $methods.step
+            fwhm $methods.fwhm
+        #end if
+
+        @COMMAND_LOG_EXIT@ 2> /tmp/log.err
+    ]]></command>
+    
+    <inputs>
+
+        <conditional name="inputs">
+            <param name="input" type="select" label="Choose your inputs method" >
+                <option value="zip_file" selected="true">Zip file from your history containing your chromatograms</option>
+            </param>
+            <when value="zip_file">
+                <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" />
+             </when>
+        </conditional>
+        
+        <param name="samplebyclass" type="integer" value="2" label="Number of samples used per class to estimate the best parameters" help="Set to 0 to use the whole dataset. To save time, reduce this number" />
+
+        <conditional name="methods">
+            <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below">
+                <option value="centWave" >centWave</option>
+                <option value="matchedFilter" selected="true">matchedFilter</option>
+            </param>
+
+            <!-- centWave Filter options -->
+            <when value="centWave">
+                <param name="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="[ppm]" optional="false" />
+                <param name="min_peakwidth" type="text" value="15,25" label="Min peak width range in seconds" help="[peakwidth]" optional="true" />
+                <param name="max_peakwidth" type="text" value="45,55" label="Max peak width range in seconds" help="[peakwidth]" optional="true" />
+            </when>
+
+        <!-- matched Filter options -->
+            <when value="matchedFilter">
+                <param name="step" type="float" value="0.01" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" optional="true" />
+                <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" optional="true" />
+            </when>
+        </conditional>       
+
+
+    </inputs>
+    
+    <outputs>
+        <data name="parametersOutput" format="tabular" label="IPO_parameters4xcmsSet.tsv" />
+        <data name="log" format="txt" label="ipo4xcmsSet.log.txt" />
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="inputs|input" value="zip_file" />
+            <param name="inputs|zip_file" value="faahKO_reduce.zip"  ftype="zip" />
+            <param name="methods|method" value="centWave" />
+            <param name="methods|ppm" value="25" />
+            <param name="methods|min_peakwidth" value="20,30" />
+            <param name="methods|max_peakwidth" value="45,55" />
+            <output name="parametersOutput" file="faahKO_IPO_parameters4xcmsSet.tsv" />
+        </test>
+        <test>
+            <param name="inputs|input" value="zip_file" />
+            <param name="inputs|zip_file" value="sacuri_2files.zip"  ftype="zip" />
+            <param name="methods|method" value="centWave" />
+            <param name="methods|ppm" value="25" />
+            <param name="methods|min_peakwidth" value="20,30" />
+            <param name="methods|max_peakwidth" value="45,55" />
+            <output name="parametersOutput" file="sacuri_2files_IPO_parameters4xcmsSet.tsv" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+
+@HELP_AUTHORS@
+
+===============
+IPO.ipo4xcmsSet
+===============
+
+-----------
+Description
+-----------
+
+A Tool for automated Optimization of XCMS Parameters
+
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
+========================= ================= ======= =========
+Name                      output file       format  parameter
+========================= ================= ======= =========
+NA                        NA                zip     NA       
+========================= ================= ======= =========
+
+
+**Downstream tools**
+
++---------------------------+----------------------+-----------------+
+| Name                      | Output file          | Format          |
++===========================+======================+=================+
+|xcms.xcmsSet               | parametersOutput.tsv | Tabular         |
++---------------------------+--------------------+-------------------+
+
+
+
+-----------
+Input files
+-----------
+
++---------------------------+------------+
+| Parameter : num + label   |   Format   |
++===========================+============+
+| 1 : Choose your inputs    |   zip      |
++---------------------------+------------+
+
+**Choose your inputs**
+
+You have two methods for your inputs:
+
+    | Zip file (recommended): You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories).
+    | library folder: You must specify the name of your "library" (folder) created within your space project (for example: /projet/externe/institut/login/galaxylibrary/yourlibrary). Your library must contain all your conditions as sub-directories.
+
+Steps for creating the zip file
+-------------------------------
+
+**Step1: Creating your directory and hierarchize the subdirectories**
+
+
+VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip software (http://www.7-zip.org/), otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug).
+
+Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild):
+arabidopsis/wild/01.raw
+arabidopsis/mutant/01.raw
+
+**Step2: Creating a zip file**
+
+Create your zip file (e.g.: arabidopsis.zip).
+
+**Step 3 : Uploading it to our Galaxy server**
+
+If your zip file is less than 2Gb, you get use the Get Data tool to upload it.
+
+Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO on workflow4metabolomics.org (http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf).
+
+For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org).
+
+Advices for converting your files for the XCMS input
+----------------------------------------------------
+
+We recommend you to convert your raw files to **mzXML** in centroid mode (smaller files) and the files will be compatible with the xmcs centWave method.
+
+**We recommend you the following parameters:**
+
+Use Filtering: **True**
+
+Use Peak Picking: **True**
+
+Peak Peaking -Apply to MS Levels: **All Levels (1-)** : Centroid Mode
+
+Use zlib: **64**
+
+Binary Encoding: **64**
+
+m/z Encoding: **64**
+
+Intensity Encoding: **64**
+
+
+----------
+Parameters
+----------
+
+Extraction method for peaks detection
+-------------------------------------
+
+**Matched Filter**
+
+    | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm. 
+    | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002).
+
+
+**cent Wave**
+
+    | This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode.
+    | Due to the fact that peak centroids are used, a binning step is not necessary.
+    | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals.
+
+
+------------
+Output files
+------------
+
+IPO_parameters4xcmsSet.tsv
+
+    | Optimal parameters for xcmsSet
+
+
+---------------------------------------------------
+
+Changelog/News
+--------------
+
+
+
+    ]]></help>
+
+    <expand macro="citation" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib.r	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,96 @@
+
+##
+## This function launch IPO functions to get the best parameters for xcmsSet
+## A sample among the whole dataset is used to save time
+##
+ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) {
+    setwd(directory)
+    files = list.files(".", recursive=T)  # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
+    files_classes = basename(dirname(files))    # "KO", "KO", "WT", "WT"
+    
+    mzmlfile = files
+    if (samplebyclass > 0) {
+        #random selection of N files for IPO in each class
+        classes<-unique(basename(dirname(files)))
+        mzmlfile = NULL
+        for (class_i in classes){
+            files_class_i = files[files_classes==class_i]
+            if (samplebyclass > length(files_class_i)) {
+                mzmlfile = c(mzmlfile, files_class_i)
+            } else {
+                mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass))
+            }
+        }
+    }
+    #TODO: else, must we keep the RData to been use directly by group?
+
+    cat("\t\tSamples used:\n")
+    print(mzmlfile)
+
+    paramsPP <- getDefaultXcmsSetStartingParams(listArguments[["method"]]) #load default parameters of IPO
+
+    #user defined new parameters
+    paramsPP$ppm <- listArguments[["ppm"]]
+    paramsPP$min_peakwidth <- listArguments[["min_peakwidth"]]
+    paramsPP$max_peakwidth <- listArguments[["max_peakwidth"]]
+    paramsPP$nSlaves <- listArguments[["nSlaves"]]
+    
+    #paramsPP$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement
+    resultPPpos <- optimizeXcmsSet(mzmlfile, paramsPP, subdir="IPO_results") #some images generated by IPO
+    write.table(resultPPpos$best_settings$parameters, file=parametersOutput, sep="\t",row.names=FALSE) #can be read by user
+
+}
+
+
+
+
+##
+## This function check if xcms will found all the files
+##
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
+checkFilesCompatibilityWithXcms <- function(directory) {
+  cat("Checking files filenames compatibilities with xmcs...\n")
+  # WHAT XCMS WILL FIND
+  filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+  filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
+  info <- file.info(directory)
+  listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
+  files <- c(directory[!info$isdir], listed)
+  files_abs <- file.path(getwd(), files)
+  exists <- file.exists(files_abs)
+  files[exists] <- files_abs[exists]
+  files[exists] <- sub("//","/",files[exists])
+
+  # WHAT IS ON THE FILESYSTEM
+  filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
+  filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
+
+  # COMPARISON
+  if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { 
+    write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
+    write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
+    stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
+
+  }
+}
+
+
+
+##
+## This function check if XML contains special caracters. It also checks integrity and completness.
+##
+#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
+checkXmlStructure <- function (directory) {
+  cat("Checking XML structure...\n")
+
+  cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
+  capture=system(cmd,intern=TRUE)
+
+  if (length(capture)>0){
+    #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
+    write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
+    write(capture, stderr())
+    stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
+  }
+   
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,50 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.1.2">R</requirement>
+	    <requirement type="package" version="1.7.5">r-ipo</requirement>
+	    <requirement type="package" version="1.1_4">r-batch</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1" level="fatal" />
+        </stdio>
+    </xml>
+
+    <token name="@COMMAND_SCRIPT@">
+        LANG=C Rscript $__tool_directory__/ipo.r
+    </token>
+
+    <token name="@COMMAND_LOG_EXIT@">
+        ;
+        return=\$?;
+        mv log.txt $log;
+        cat $log;
+        sh -c "exit \$return"
+    </token>
+
+    <token name="@HELP_AUTHORS@">
+.. class:: infomark
+
+**Authors**  Gunnar Libiseller, Michaela Dvorzak, Ulrike Kleb, Edgar Gander, Tobias Eisenberg, Frank Madeo, Steffen Neumann, Gert Trausinger, Frank Sinner, Thomas Pieber and Christoph Magnes 
+
+.. class:: infomark
+
+**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M]
+
+ | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.
+
+---------------------------------------------------
+
+    </token>
+
+
+    <xml name="citation">
+        <citations>
+            <citation type="doi">10.1186/s12859-015-0562-8</citation>
+            <citation type="doi">10.1093/bioinformatics/btu813</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo_test.sh	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,9 @@
+conda create -n r-ipo --use-local r-ipo r-batch
+. ~/miniconda2/bin/activate r-ipo
+planemo test --install_galaxy --galaxy_branch "dev" --update_test_data
+
+
+# -- Use of conda dependencies
+planemo conda_init --conda_prefix /tmp/mc
+planemo conda_install --conda_prefix /tmp/mc .
+planemo test --install_galaxy --conda_prefix /tmp/mc --conda_dependency_resolution --galaxy_branch "dev" --update_test_data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories>
+    <repository changeset_revision="5527da68412e" name="no_unzip_datatype" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+</repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/faahKO_IPO_parameters4xcmsSet.tsv	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,2 @@
+"min_peakwidth"	"max_peakwidth"	"mzdiff"	"ppm"	"snthresh"	"noise"	"prefilter"	"value_of_prefilter"	"mzCenterFun"	"integrate"	"fitgauss"	"verbose.columns"	"nSlaves"
+6.6	56.5	0.01605	25	10	0	3	100	"wMean"	1	FALSE	FALSE	1
Binary file test-data/faahKO_reduce.zip has changed
Binary file test-data/sacuri_2files.zip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sacuri_2files_IPO_parameters4xcmsSet.tsv	Tue May 24 19:49:14 2016 -0400
@@ -0,0 +1,2 @@
+"min_peakwidth"	"max_peakwidth"	"mzdiff"	"ppm"	"snthresh"	"noise"	"prefilter"	"value_of_prefilter"	"mzCenterFun"	"integrate"	"fitgauss"	"verbose.columns"	"nSlaves"
+7.5	42.5	0.01715	25	10	0	3	100	"wMean"	1	FALSE	FALSE	1