Mercurial > repos > lecorguille > xcms_xcmsset
changeset 33:c363b9f1caef draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 7b226c3ba91a3cf654ec1c14b3ef85090968bb0f
author | lecorguille |
---|---|
date | Mon, 05 Mar 2018 04:15:34 -0500 |
parents | 2bf1cb023c94 |
children | efd23113d5f4 |
files | abims_xcms_xcmsSet.xml lib.r macros.xml xcms_xcmsSet.r |
diffstat | 4 files changed, 87 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/abims_xcms_xcmsSet.xml Thu Mar 01 04:14:39 2018 -0500 +++ b/abims_xcms_xcmsSet.xml Mon Mar 05 04:15:34 2018 -0500 @@ -18,6 +18,17 @@ #end if BPPARAM \${GALAXY_SLOTS:-1} + + #if $filterSection.filterAcquisitionNum != "": + filterAcquisitionNum "c($filterSection.filterAcquisitionNum)" + #end if + #if $filterSection.filterRt != "": + filterRt "c($filterSection.filterRt)" + #end if + #if $filterSection.filterMz != "": + filterMz "c($filterSection.filterMz)" + #end if + method $methods.method #if $methods.method == "CentWave": @@ -64,12 +75,20 @@ <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> - <!--@TODO <param argument="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" > - Should be replaced by MSnBase::filterAcquisition - --> + <section name="filterSection" title="Spectra Filters" expanded="False"> + <param argument="filterAcquisitionNum" type="text" value="" optional="true" label="Filter on Acquisition Numbers" help="min,max"> + <expand macro="input_validator_range_integer"/> + </param> + <param argument="filterRt" type="text" value="" optional="true" label="Filter on Retention Time" help="min,max"> + <expand macro="input_validator_range_integer"/> + </param> + <param argument="filterMz" type="text" value="" optional="true" label="Filter on Mz" help="min,max"> + <expand macro="input_validator_range_integer"/> + </param> + </section> <conditional name="methods"> - <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below"> + <param name="method" type="select" label="Extraction method for peaks detection" help="See the help section below"> <option value="MatchedFilter" selected="true">MatchedFilter - peak detection in chromatographic space</option> <option value="CentWave">CentWave - chromatographic peak detection using the centWave method</option> <option value="MSW">MSW - single-spectrum non-chromatography MS data peak detection</option> @@ -78,11 +97,15 @@ <!-- centWave Filter options --> <when value="CentWave"> <param argument="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="for the initial ROI definition." /> - <param argument="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="with the expected approximate peak width in chromatographic space." /> + <param argument="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="with the expected approximate peak width in chromatographic space."> + <expand macro="input_validator_range_float"/> + </param> <section name="CentWaveAdv" title="Advanced Options" expanded="False"> <param argument="snthresh" type="integer" value="10" label="Signal to Noise ratio cutoff" /> - <param argument="prefilter" type="text" value="3,100" label="Prefilter step for for the first analysis step (ROI detection)" help="Separate by coma k, I. Mass traces are only retained if they contain at least ‘k‘ peaks with intensity ‘>= I‘." /> + <param argument="prefilter" type="text" value="3,100" label="Prefilter step for for the first analysis step (ROI detection)" help="Separate by coma k, I. Mass traces are only retained if they contain at least ‘k‘ peaks with intensity ‘>= I‘."> + <expand macro="input_validator_range_integer"/> + </param> <param argument="mzCenterFun" type="select" label="Name of the function to calculate the m/z center of the chromatographic peak" > <option value="wMean">intensity weighted mean of the peak's m/z values</option> <option value="mean">mean of the peak's m/z values</option> @@ -138,7 +161,9 @@ <!---@TODO <param argument="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" />--> <param argument="snthresh" type="integer" value="3" label="Signal to Noise ratio cutoff" help="" /> <param argument="verboseColumns" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="verbose Columns" help="whether additional peak meta data columns should be returned" /> - <param argument="scales" type="text" value="1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64" label="Scales of the Continuous Wavelet Transform (CWT)" help="Scales are linked to the width of the peaks that are to be detected." /> + <param argument="scales" type="text" value="1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64" label="Scales of the Continuous Wavelet Transform (CWT)" help="Scales are linked to the width of the peaks that are to be detected." > + <expand macro="input_validator_list_integer"/> + </param> <param argument="nearbyPeak" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Determine whether to include the nearby small peaks of major peaks" /> <!-- peakScaleRange --> <param argument="ampTh" type="float" value="0.01" label="Minimum required relative amplitude of the peak" help="Ratio to the maximum of CWT coefficients" /> @@ -165,6 +190,11 @@ <test> <param name="input" value="faahKO_reduce.zip" ftype="zip" /> + <section name="filterSection"> + <param name="filterAcquisitionNum" value="100,5000" /> + <param name="filterRt" value="3000,4000" /> + <param name="filterMz" value="300,400" /> + </section> <conditional name="methods"> <param name="method" value="CentWave" /> <param name="ppm" value="25" /> @@ -174,9 +204,9 @@ <has_text text="ppm: 25" /> <has_text text="peakwidth: 20, 50" /> <has_text text="object with 4 samples" /> - <has_text text="Time range: 2506.1-4477.9 seconds (41.8-74.6 minutes)" /> - <has_text text="Mass range: 200.1-600 m/z" /> - <has_text text="Peaks: 9251 (about 2313 per sample)" /> + <has_text text="Time range: 3006.9-3978.7 seconds (50.1-66.3 minutes)" /> + <has_text text="Mass range: 300-400 m/z" /> + <has_text text="Peaks: 1311 (about 328 per sample)" /> <has_text text="Peak Groups: 0" /> <has_text text="Sample classes: KO, WT" /> </assert_stdout> @@ -594,7 +624,7 @@ - UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlining codes and methods -- NEW: a bunch of new options: CentWave.mzCenterFun, CentWave.fitgauss, CentWave.verboseColumns, MatchedFilter.sigma +- NEW: a bunch of new options: Spectra Filters, CentWave.mzCenterFun, CentWave.fitgauss, CentWave.verboseColumns, MatchedFilter.sigma - UPDATE: since xcms 3.0.0, some options are no more available: scanrange, profmethod, MatchedFilter.step, MatchedFilter.sigma, MSW.winSize.noise, MSW.SNR.method
--- a/lib.r Thu Mar 01 04:14:39 2018 -0500 +++ b/lib.r Mon Mar 05 04:15:34 2018 -0500 @@ -52,7 +52,7 @@ #@author G. Le Corguille # Draw the plotChromPeakDensity 3 per page in a pdf file -getPlotChromPeakDensity <- function(xdata) { +getPlotChromPeakDensity <- function(xdata, mzdigit=4) { pdf(file="plotChromPeakDensity.pdf", width=16, height=12) par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) @@ -62,7 +62,9 @@ xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) for (i in 1:nrow(featureDefinitions(xdata))) { - plotChromPeakDensity(xdata, mz=c(featureDefinitions(xdata)[i,]$mzmin,featureDefinitions(xdata)[i,]$mzmax), col=group_colors, pch=16, xlim=xlim) + mzmin = featureDefinitions(xdata)[i,]$mzmin + mzmax = featureDefinitions(xdata)[i,]$mzmax + plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) } @@ -431,7 +433,7 @@ files[exists] <- sub("//","/",files[exists]) # WHAT IS ON THE FILESYSTEM - filesystem_filepaths <- system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] # COMPARISON @@ -461,7 +463,7 @@ checkXmlStructure <- function (directory) { cat("Checking XML structure...\n") - cmd <- paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") capture <- system(cmd, intern=TRUE) if (length(capture)>0){ @@ -480,7 +482,7 @@ cat("Checking Non ASCII characters in the XML...\n") processed <- F - l <- system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) + l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) for (i in l){ cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") capture <- suppressWarnings(system(cmd, intern=TRUE)) @@ -538,8 +540,8 @@ singlefile_sampleNames <- args$singlefile_sampleNameNegative } if (exists("singlefile_galaxyPaths")){ - singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,",")) - singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,",")) + singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) + singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) singlefile <- NULL for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
--- a/macros.xml Thu Mar 01 04:14:39 2018 -0500 +++ b/macros.xml Mon Mar 05 04:15:34 2018 -0500 @@ -6,6 +6,7 @@ <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement> <requirement type="package" version="1.1_4">r-batch</requirement> <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> + <requirement type="package" version="6.0">unzip</requirement> <yield /> </requirements> </xml> @@ -24,13 +25,25 @@ sh -c "exit \$return" </token> + <xml name="input_validator_range_integer"> + <validator type="regex" message="The format is 'min,max'" >[0-9]+ *, *[0-9]+</validator> + </xml> + + <xml name="input_validator_range_float"> + <validator type="regex" message="The format is 'min,max'" >[0-9]+\.?[0-9]* *, *[0-9]+\.?[0-9]*</validator> + </xml> + + <xml name="input_validator_list_integer"> + <validator type="regex" message="The format is '1,2,4,6'" >[0-9, ]+</validator> + </xml> + <!-- zipfile load for planemo test --> <token name="@COMMAND_FILE_LOAD@"> #if $file_load_section.file_load_conditional.file_load_select == "yes": #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): - #set singlefile_galaxyPath = ','.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) - #set singlefile_sampleName = ','.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) + #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) + #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' #else
--- a/xcms_xcmsSet.r Thu Mar 01 04:14:39 2018 -0500 +++ b/xcms_xcmsSet.r Mon Mar 05 04:15:34 2018 -0500 @@ -37,6 +37,16 @@ register(BPPARAM) #saving the specific parameters +if (!is.null(args$filterAcquisitionNum)){ + filterAcquisitionNumParam <- args$filterAcquisitionNum; args$filterAcquisitionNum <- NULL +} +if (!is.null(args$filterRt)){ + filterRtParam <- args$filterRt; args$filterRt <- NULL +} +if (!is.null(args$filterMz)){ + filterMzParam <- args$filterMz; args$filterMz <- NULL +} + method <- args$method; args$method <- NULL cat("\n\n") @@ -80,6 +90,17 @@ cat("\t\t\tLoad Raw Data\n") raw_data <- readMSData(files=files, pdata = new("NAnnotatedDataFrame", pd), mode="onDisk") +cat("\t\t\tApply filter[s] (if asked)\n") +if (exists("filterAcquisitionNumParam")) { + raw_data <- filterAcquisitionNum(raw_data, filterAcquisitionNumParam[1]:filterAcquisitionNumParam[2]) +} +if (exists("filterRtParam")) { + raw_data <- filterRt(raw_data, filterRtParam) +} +if (exists("filterMzParam")) { + raw_data <- filterMz(raw_data, filterMzParam) +} + cat("\t\t\tChromatographic peak detection\n") findChromPeaksParam <- do.call(paste0(method,"Param"), args) print(findChromPeaksParam) @@ -90,7 +111,7 @@ # Transform the files absolute pathways into relative pathways xdata@processingData@files <- sub(paste(getwd(), "/", sep="") , "", xdata@processingData@files) -save.image() + # Create a sampleMetada file sampleNamesList <- getSampleMetadata(xdata=xdata, sampleMetadataOutput="sampleMetadata.tsv")