Mercurial > repos > lecorguille > xcms_xcmsset

--- a/abims_xcms_xcmsSet.xml	Thu Mar 01 04:14:39 2018 -0500
+++ b/abims_xcms_xcmsSet.xml	Mon Mar 05 04:15:34 2018 -0500
@@ -18,6 +18,17 @@
         #end if

         BPPARAM \${GALAXY_SLOTS:-1}
+
+        #if $filterSection.filterAcquisitionNum != "":
+            filterAcquisitionNum "c($filterSection.filterAcquisitionNum)"
+        #end if
+        #if $filterSection.filterRt != "":
+            filterRt "c($filterSection.filterRt)"
+        #end if
+        #if $filterSection.filterMz != "":
+            filterMz "c($filterSection.filterMz)"
+        #end if
+
         method $methods.method

         #if $methods.method == "CentWave":
@@ -64,12 +75,20 @@

         <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." />

-        <!--@TODO <param argument="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" >
-        Should be replaced by MSnBase::filterAcquisition
-        -->
+        <section name="filterSection" title="Spectra Filters" expanded="False">
+            <param argument="filterAcquisitionNum" type="text" value="" optional="true" label="Filter on Acquisition Numbers" help="min,max">
+                <expand macro="input_validator_range_integer"/>
+            </param>
+            <param argument="filterRt" type="text" value="" optional="true" label="Filter on Retention Time" help="min,max">
+                <expand macro="input_validator_range_integer"/>
+            </param>
+            <param argument="filterMz" type="text" value="" optional="true" label="Filter on Mz" help="min,max">
+                <expand macro="input_validator_range_integer"/>
+            </param>
+        </section>

         <conditional name="methods">
-            <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below">
+            <param name="method" type="select" label="Extraction method for peaks detection" help="See the help section below">
                 <option value="MatchedFilter" selected="true">MatchedFilter - peak detection in chromatographic space</option>
                 <option value="CentWave">CentWave - chromatographic peak detection using the centWave method</option>
                 <option value="MSW">MSW - single-spectrum non-chromatography MS data peak detection</option>
@@ -78,11 +97,15 @@
             <!-- centWave Filter options -->
             <when value="CentWave">
                 <param argument="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="for the initial ROI definition." />
-                <param argument="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="with the expected approximate peak width in chromatographic space." />
+                <param argument="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="with the expected approximate peak width in chromatographic space.">
+                    <expand macro="input_validator_range_float"/>
+                </param>

                 <section name="CentWaveAdv" title="Advanced Options" expanded="False">
                     <param argument="snthresh" type="integer" value="10" label="Signal to Noise ratio cutoff" />
-                    <param argument="prefilter" type="text" value="3,100" label="Prefilter step for for the first analysis step (ROI detection)" help="Separate by coma k, I. Mass traces are only retained if they contain at least ‘k‘ peaks with intensity ‘>= I‘." />
+                    <param argument="prefilter" type="text" value="3,100" label="Prefilter step for for the first analysis step (ROI detection)" help="Separate by coma k, I. Mass traces are only retained if they contain at least ‘k‘ peaks with intensity ‘>= I‘.">
+                        <expand macro="input_validator_range_integer"/>
+                    </param>
                     <param argument="mzCenterFun" type="select" label="Name of the function to calculate the m/z center of the chromatographic peak" >
                         <option value="wMean">intensity weighted mean of the peak's m/z values</option>
                         <option value="mean">mean of the peak's m/z values</option>
@@ -138,7 +161,9 @@
                 <!---@TODO <param argument="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" />-->
                 <param argument="snthresh" type="integer" value="3" label="Signal to Noise ratio cutoff" help="" />
                 <param argument="verboseColumns" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="verbose Columns" help="whether additional peak meta data columns should be returned" />
-                <param argument="scales" type="text" value="1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64" label="Scales of the Continuous Wavelet Transform (CWT)" help="Scales are linked to the width of the peaks that are to be detected." />
+                <param argument="scales" type="text" value="1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64" label="Scales of the Continuous Wavelet Transform (CWT)" help="Scales are linked to the width of the peaks that are to be detected." >
+                    <expand macro="input_validator_list_integer"/>
+                </param>
                 <param argument="nearbyPeak" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Determine whether to include the nearby small peaks of major peaks" />
                 <!-- peakScaleRange -->
                 <param argument="ampTh" type="float" value="0.01" label="Minimum required relative amplitude of the peak" help="Ratio to the maximum of CWT coefficients" />
@@ -165,6 +190,11 @@

         <test>
             <param name="input" value="faahKO_reduce.zip"  ftype="zip" />
+            <section name="filterSection">
+                <param name="filterAcquisitionNum" value="100,5000" />
+                <param name="filterRt" value="3000,4000" />
+                <param name="filterMz" value="300,400" />
+            </section>
             <conditional name="methods">
                 <param name="method" value="CentWave" />
                 <param name="ppm" value="25" />
@@ -174,9 +204,9 @@
                 <has_text text="ppm: 25" />
                 <has_text text="peakwidth: 20, 50" />
                 <has_text text="object with 4 samples" />
-                <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" />
-                <has_text text="Mass range: 200.1-600 m/z" />
-                <has_text text="Peaks: 9251 (about 2313 per sample)" />
+                <has_text text="Time range: 3006.9-3978.7 seconds (50.1-66.3 minutes)" />
+                <has_text text="Mass range: 300-400 m/z" />
+                <has_text text="Peaks: 1311 (about 328 per sample)" />
                 <has_text text="Peak Groups: 0" />
                 <has_text text="Sample classes: KO, WT" />
             </assert_stdout>
@@ -594,7 +624,7 @@

 - UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlining codes and methods

-- NEW: a bunch of new options: CentWave.mzCenterFun, CentWave.fitgauss, CentWave.verboseColumns, MatchedFilter.sigma
+- NEW: a bunch of new options: Spectra Filters, CentWave.mzCenterFun, CentWave.fitgauss, CentWave.verboseColumns, MatchedFilter.sigma

 - UPDATE: since xcms 3.0.0, some options are no more available: scanrange, profmethod, MatchedFilter.step, MatchedFilter.sigma, MSW.winSize.noise, MSW.SNR.method
--- a/lib.r	Thu Mar 01 04:14:39 2018 -0500
+++ b/lib.r	Mon Mar 05 04:15:34 2018 -0500
@@ -52,7 +52,7 @@

 #@author G. Le Corguille
 # Draw the plotChromPeakDensity 3 per page in a pdf file
-getPlotChromPeakDensity <- function(xdata) {
+getPlotChromPeakDensity <- function(xdata, mzdigit=4) {
     pdf(file="plotChromPeakDensity.pdf", width=16, height=12)

     par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
@@ -62,7 +62,9 @@

     xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
     for (i in 1:nrow(featureDefinitions(xdata))) {
-        plotChromPeakDensity(xdata, mz=c(featureDefinitions(xdata)[i,]$mzmin,featureDefinitions(xdata)[i,]$mzmax), col=group_colors, pch=16, xlim=xlim)
+        mzmin = featureDefinitions(xdata)[i,]$mzmin
+        mzmax = featureDefinitions(xdata)[i,]$mzmax
+        plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit)))
         legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
     }

@@ -431,7 +433,7 @@
     files[exists] <- sub("//","/",files[exists])

     # WHAT IS ON THE FILESYSTEM
-    filesystem_filepaths <- system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
+    filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T)
     filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]

     # COMPARISON
@@ -461,7 +463,7 @@
 checkXmlStructure <- function (directory) {
     cat("Checking XML structure...\n")

-    cmd <- paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
+    cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
     capture <- system(cmd, intern=TRUE)

     if (length(capture)>0){
@@ -480,7 +482,7 @@
     cat("Checking Non ASCII characters in the XML...\n")

     processed <- F
-    l <- system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
+    l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
     for (i in l){
         cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
         capture <- suppressWarnings(system(cmd, intern=TRUE))
@@ -538,8 +540,8 @@
         singlefile_sampleNames <- args$singlefile_sampleNameNegative
     }
     if (exists("singlefile_galaxyPaths")){
-        singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,","))
-        singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,","))
+        singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
+        singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))

         singlefile <- NULL
         for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
--- a/macros.xml	Thu Mar 01 04:14:39 2018 -0500
+++ b/macros.xml	Mon Mar 05 04:15:34 2018 -0500
@@ -6,6 +6,7 @@
             <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement>
             <requirement type="package" version="1.1_4">r-batch</requirement>
             <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
+            <requirement type="package" version="6.0">unzip</requirement>
             <yield />
         </requirements>
     </xml>
@@ -24,13 +25,25 @@
         sh -c "exit \$return"
     </token>

+    <xml name="input_validator_range_integer">
+        <validator type="regex" message="The format is 'min,max'" >[0-9]+ *, *[0-9]+</validator>
+    </xml>
+
+    <xml name="input_validator_range_float">
+        <validator type="regex" message="The format is 'min,max'" >[0-9]+\.?[0-9]* *, *[0-9]+\.?[0-9]*</validator>
+    </xml>
+
+    <xml name="input_validator_list_integer">
+        <validator type="regex" message="The format is '1,2,4,6'" >[0-9, ]+</validator>
+    </xml>
+
     <!-- zipfile load for planemo test -->

     <token name="@COMMAND_FILE_LOAD@">
         #if $file_load_section.file_load_conditional.file_load_select == "yes":
             #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"):
-                #set singlefile_galaxyPath = ','.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] )
-                #set singlefile_sampleName = ','.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] )
+                #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] )
+                #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] )

                 singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName'
             #else
--- a/xcms_xcmsSet.r	Thu Mar 01 04:14:39 2018 -0500
+++ b/xcms_xcmsSet.r	Mon Mar 05 04:15:34 2018 -0500
@@ -37,6 +37,16 @@
 register(BPPARAM)

 #saving the specific parameters
+if (!is.null(args$filterAcquisitionNum)){
+    filterAcquisitionNumParam <- args$filterAcquisitionNum; args$filterAcquisitionNum <- NULL
+}
+if (!is.null(args$filterRt)){
+    filterRtParam <- args$filterRt; args$filterRt <- NULL
+}
+if (!is.null(args$filterMz)){
+    filterMzParam <- args$filterMz; args$filterMz <- NULL
+}
+
 method <- args$method; args$method <- NULL

 cat("\n\n")
@@ -80,6 +90,17 @@
 cat("\t\t\tLoad Raw Data\n")
 raw_data <- readMSData(files=files, pdata = new("NAnnotatedDataFrame", pd), mode="onDisk")

+cat("\t\t\tApply filter[s] (if asked)\n")
+if (exists("filterAcquisitionNumParam")) {
+    raw_data <- filterAcquisitionNum(raw_data, filterAcquisitionNumParam[1]:filterAcquisitionNumParam[2])
+}
+if (exists("filterRtParam")) {
+    raw_data <- filterRt(raw_data, filterRtParam)
+}
+if (exists("filterMzParam")) {
+    raw_data <- filterMz(raw_data, filterMzParam)
+}
+
 cat("\t\t\tChromatographic peak detection\n")
 findChromPeaksParam <- do.call(paste0(method,"Param"), args)
 print(findChromPeaksParam)
@@ -90,7 +111,7 @@

 # Transform the files absolute pathways into relative pathways
 xdata@processingData@files <- sub(paste(getwd(), "/", sep="") , "", xdata@processingData@files)
-save.image()
+
 # Create a sampleMetada file
 sampleNamesList <- getSampleMetadata(xdata=xdata, sampleMetadataOutput="sampleMetadata.tsv")