view preprocessing.xml @ 10:aa479a0cfb43 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit c8d3adac445b4e08e2724e22d7201bfc38bbf40f"
author galaxyp
date Sun, 29 Aug 2021 07:20:07 +0000
parents e0bbaf9f7da0
children 274e81434593
line wrap: on
line source

<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0">
    <description>
        mass spectrometry imaging preprocessing
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="2.3">r-gridextra</requirement>
        <requirement type="package" version="3.3.5">r-ggplot2</requirement>
    </expand>
    <command detect_errors="exit_code">
    <![CDATA[

        @INPUT_LINKING@
        cat '${cardinal_preprocessing}' &&
        Rscript '${cardinal_preprocessing}' &&

        mkdir $outfile_imzml.files_path &&
            mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
            mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
        echo "imzML file:" > $outfile_imzml &&
        ls -l "$outfile_imzml.files_path" >> $outfile_imzml

    ]]>
    </command>
    <configfiles>
        <configfile name="cardinal_preprocessing"><![CDATA[

################################# load libraries and read file #################

## set CPU, default = 1

if (Sys.getenv("GALAXY_SLOTS")!="")
    {
        number_cpu = 1 ## default = 1
    }else{
        number_cpu = as.numeric(Sys.getenv("GALAXY_SLOTS")) ##cpu set by Galaxy
    }

library(Cardinal)
library(gridExtra)
library(ggplot2)


@READING_MSIDATA_FULLY_COMPATIBLE@


## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail

## set variable to False
#set $used_peak_picking = False
#set $used_peak_alignment = False
#set $continuous_format = False


if (ncol(msidata)>0 & nrow(msidata) >0){                                         

    ## start QC report

    pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
    plot(0,type='n',axes=FALSE,ann=FALSE)
    title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))

    ######################### preparations for QC report #################

    maxfeatures =nrow(msidata)
    pixelcount = ncol(msidata)
    minmz = round(min(mz(msidata)), digits=2)
    maxmz = round(max(mz(msidata)), digits=2)
    QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, pixelcount))
    vectorofactions = "inputdata"
    ## Choose random spectra for QC plots
    random_spectra = sample(pixels(msidata), 4, replace=FALSE)
    par(oma=c(0,0,2,0))
    print(plot(msidata, pixel=random_spectra, col="black"))
    title("Input spectra", outer=TRUE, line=0)

    ############################### Preprocessing steps ###########################
    ###############################################################################

    #for $method in $methods:

    ############################### Normalization ###########################

        #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization':
            print('Normalization')
            ##normalization

            msidata = normalize(msidata, method="$method.methods_conditional.methods_for_normalization.normalization_method")
            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))


            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            normalized = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, normalized)
            vectorofactions = append(vectorofactions, "normalized")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after normalization", outer=TRUE, line=0)

    ############################### Baseline reduction ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction':
            print('Baseline_reduction')
            ##baseline reduction

            msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline)
            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))

            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            baseline = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, baseline)
            vectorofactions = append(vectorofactions, "baseline red.")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after baseline reduction", outer=TRUE, line=0)

    ############################### Smoothing ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing':
            print('Smoothing')
            ## Smoothing

            #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
                print('gaussian smoothing')

                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
                msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))

            #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
                print('sgolay smoothing')

                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
                msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))

                ## if selected replace negative intensities with zero
                #if $method.methods_conditional.methods_for_smoothing.replace_negatives:
                    ## bring spectra matrix to disk
                    spectra_df = as.matrix(spectra(msidata))
                    spectra_df[spectra_df<0] = 0
                    spectra(msidata) = spectra_df
                #end if

            #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
                print('moving average smoothing')

                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
                msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))

            #end if

            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            smoothed = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, smoothed)
            vectorofactions = append(vectorofactions, "smoothed")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after smoothing", outer=TRUE, line=0)


    ############################### Mz alignment ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment':
            print('m/z alignment')
            ## M/z alignment

            #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table':

                reference_mz = read.delim("$method.methods_conditional.mzalign_ref_type.mz_tabular", header = $method.methods_conditional.mzalign_ref_type.feature_header, stringsAsFactors = FALSE)
                reference_mz = reference_mz[,$method.methods_conditional.mzalign_ref_type.feature_column]

                msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)


            #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref':

                msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)

            #end if

            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))

            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            mz_aligned = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, mz_aligned)
            vectorofactions = append(vectorofactions, "mz aligned")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after m/z alignment", outer=TRUE, line=0)
            
          
    ############################### Mz recalibration ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_recalibration':
            print('m/z recalibration')
            ## M/z recalibration

            reference_mz = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE)
            reference_mz = reference_mz[,$method.methods_conditional.feature_column]

            msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)

            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
            
            ## remove the reference peaks data to allow proper peak alignment afterwards
            metadata(featureData(msidata))['reference peaks'] <- NULL

            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            mz_recal = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, mz_recal)
            vectorofactions = append(vectorofactions, "mz recalibrated")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after m/z recalibration", outer=TRUE, line=0)      


    ############################### Peak picking ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
            #set $used_peak_picking = True
            print('Peak_picking')
            ## Peakpicking

            #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
                print('adaptive peakpicking')

                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)

            #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'mad':
                print('mad peakpicking')

                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)

            #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple':
                print('simple peakpicking')

                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)

            #end if
            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))


            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            picked = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, picked)
            vectorofactions = append(vectorofactions, "picked")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after peak picking", outer=TRUE, line=0)

    ############################### Peak alignment ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
            #set $used_peak_alignment = True
            print('Peak_alignment')
            ## Peakalignment

            #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':

                align_reference_table = read.delim("$method.methods_conditional.align_ref_type.mz_tabular", header = $method.methods_conditional.align_ref_type.feature_header, stringsAsFactors = FALSE)

                align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.feature_column]

                align_peak_reference = as.numeric(align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))])
                if (length(align_peak_reference) == 0)
                    {align_peak_reference = 0}

                msidata = peakAlign(msidata,tolerance =$method.methods_conditional.value_diffalignment, units = "$method.methods_conditional.units_diffalignment", ref=align_peak_reference)


            #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref':

                msidata = peakAlign(msidata,tolerance =$method.methods_conditional.value_diffalignment, units = "$method.methods_conditional.units_diffalignment")

            #end if

            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))


            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            aligned = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, aligned)
            vectorofactions = append(vectorofactions, "aligned")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after alignment", outer=TRUE, line=0)

    ############################### Peak filtering ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
            print('Peak_filtering')

            msidata = peakFilter(msidata, freq.min = $method.methods_conditional.frequ_filtering)
            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))

            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            filtered = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, filtered)
            vectorofactions = append(vectorofactions, "filtered")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after filtering", outer=TRUE, line=0)

    ############################### Peak binning ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_binning':
            print('Peak_binning')

            ## reading reference file
            reference_table = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE)
            reference_column = reference_table[,$method.methods_conditional.feature_column]
            peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]

            msidata = peakBin(msidata, ref = peak_reference, tolerance = $method.methods_conditional.peakbin_tol, units = "$method.methods_conditional.peakbin_units", type="$method.methods_conditional.peaks_type")
            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))

            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            peak_binned = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, peak_binned)
            vectorofactions = append(vectorofactions, "peak binned")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after peak binning", outer=TRUE, line=0)


    ############################### Mass binning ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning':
            print('mass binning')

            #if str($method.methods_conditional.mz_range.features_filtering) == 'change_mz_range':

                msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun")

		   
	    #elif str($method.methods_conditional.mz_range.features_filtering) == 'none':

                msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun)

	    #elif str($method.methods_conditional.mz_range.features_filtering) == 'bin_to_reference':

	        bin_reference_mz = read.delim("$method.methods_conditional.mz_range.mz_tabular", header = $method.methods_conditional.mz_range.feature_header, stringsAsFactors = FALSE)
                bin_reference_mz = bin_reference_mz[,$method.methods_conditional.mz_range.feature_column]

	       msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun,
	       ref=bin_reference_mz)

            #end if
             
            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
                
                ## optional: replace NA with 0
                #if $method.methods_conditional.replace_NA_bin:
                    ## count and replace NAs
                    print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata)))))
                    spectra(msidata)[is.na(spectra(msidata))] = 0 
                #end if
                
            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            reduced = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, reduced)
            vectorofactions = append(vectorofactions, "reduced")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after m/z binning", outer=TRUE, line=0)

        ############################### Transformation ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation':
            print('Transformation')

            if (class(msidata) == "MSProcessedImagingExperiment"){
                msidata = as(msidata, "MSContinuousImagingExperiment")
            }

            #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
                print('log2 transformation')

                ## replace 0 with NA to prevent Inf
                spectra_df = as.matrix(spectra(msidata)) ## convert into R matrix
                spectra_df[spectra_df ==0] = NA
                print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df))))
                spectra(msidata) = spectra_df
                ## log transformation
                spectra(msidata) = log2(spectra(msidata))
                ## optional: replace NA with 0
                #if $method.methods_conditional.transf_conditional.replace_NA_trans:
                    spectra(msidata)[is.na(spectra(msidata))] = 0
                #end if

            #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
                print('squareroot transformation')

                spectra(msidata) = sqrt(spectra(msidata))

           #end if

            ############################### QC ###########################

            maxfeatures =nrow(msidata)
            pixelcount = ncol(msidata)
            minmz = round(min(mz(msidata)), digits=2)
            maxmz = round(max(mz(msidata)), digits=2)
            transformed = c(minmz, maxmz,maxfeatures, pixelcount)
            QC_numbers= cbind(QC_numbers, transformed)
            vectorofactions = append(vectorofactions, "transformed")
            print(plot(msidata, pixel=random_spectra, col="black"))
            title("Spectra after transformation", outer=TRUE, line=0)

            #end if
    #end for

    ############# Outputs: RData, imzml and QC report #############
    ################################################################################

    ## save msidata as imzML file, will only work if there is at least 1 m/z left

        if (nrow(msidata) > 0){
            ## make sure that coordinates are integers
            coord(msidata)\$y = as.integer(coord(msidata)\$y)
            coord(msidata)\$x = as.integer(coord(msidata)\$x)
            ## only continuous files can currently be exported
            msidata = as(msidata, "MSContinuousImagingExperiment")
            writeImzML(msidata, "out")
        }

    plot(0,type='n',axes=FALSE,ann=FALSE)
    rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra")
    grid.table(t(QC_numbers))
    dev.off()

}else{
    print("inputfile has no intensities > 0")
}

    ]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="reading_msidata"/>
        <repeat name="methods" title="Preprocessing" min="1" max="50">
            <conditional name="methods_conditional">
                <param name="preprocessing_method" type="select" label="Preprocessing methods">
                    <option value="Normalization" selected="True">Intensity Normalization</option>
                    <option value="Baseline_reduction">Baseline Reduction</option>
                    <option value="Smoothing">Peak smoothing</option>
                    <option value="mz_alignment">m/z alignment</option>
                    <option value="mz_recalibration">m/z recalibration</option>
                    <option value="Peak_picking">Peak picking</option>
                    <option value="Peak_alignment">Peak alignment</option>
                    <option value="Peak_filtering">Peak filtering</option>
                    <option value="Peak_binning">Peak binning to reference peaks</option>
                    <option value="Mass_binning">m/z binning</option>
                    <option value="Transformation">Transformation</option>
                </param>
                <when value="Normalization">
                    <conditional name="methods_for_normalization">
                        <param name="normalization_method" type="select" label="Normalization method">
                            <option value="tic" selected="True">TIC</option>
                            <option value="rms">RMS</option>
                        </param>
                        <when value="tic"/>
                        <when value="rms"/>
                    </conditional>
                </when>
                <when value="Baseline_reduction">
                    <param name="blocks_baseline" type="integer" value="500"
                        label="Blocks"/>
                    <param name="spar_baseline" type="float" value="1.0" label="Spar value" 
                           help="Smoothing parameter for the spline smoothing 
                                  applied to the spectrum in order to decide the cutoffs 
                              for throwing away false noise spikes that might occur inside peaks"/>
                </when>
                <when value="Smoothing">
                    <conditional name="methods_for_smoothing">
                        <param name="smoothing_method" type="select" label="Smoothing method">
                            <option value="gaussian" selected="True">gaussian</option>
                            <option value="sgolay">Savitsky-Golay</option>
                            <option value="ma">moving average</option>
                        </param>
                        <when value="gaussian">
                            <param name="sd_gaussian" type="float" value="2"
                                   label="The standard deviation for the Gaussian kernel. Default = window/4"/>
                        </when>
                        <when value="sgolay">
                            <param name="order_of_filters" type="integer" value="3"
                                   label="The order of the smoothing filter, must be smaller than window size"/>
                            <param name="replace_negatives" type="boolean" label="Replace negative intensities with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Savitzky golay smoothing can introduce negative intensity values it is recommended to replace them with zero"/>
                        </when>
                        <when value="ma">
                            <param name="coefficients_ma_filter" type="float" value="1"
                                   label="The coefficients for the moving average filter"/>
                        </when>
                    </conditional>
                    <param name="window_smoothing" type="float" value="8"
                                label="Window size"/>
                </when>
                <when value="mz_alignment">
                    <param name="alignment_tol" type="text" value="NA"
                           label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data.">
                       <sanitizer>
                            <valid initial="string.digits">
                                <add value="N" />
                                <add value="A" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance.">
                            <option value="ppm" selected="True">ppm</option>
                            <option value="mz">m/z</option>
                    </param>
                    <conditional name="mzalign_ref_type">
                        <param name="align_reference_datatype" type="select" label="Choose reference">
                            <option value="align_noref" selected="True">use mean spectrum as reference</option>
                            <option value="align_table" >m/z values from tabular file as reference</option>
                        </param>
                        <when value="align_noref"/>
                        <when value="align_table">
                            <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/>
                        </when>
                    </conditional>
                    <param name="quantile" type="float" value="0.2"
                        label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/>
                    <param name="span" type="float" value="0.75"
                        label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/>
                </when>
	        <when value="mz_recalibration">
		    <param name="alignment_tol" type="text" value="NA"
		           label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data.">
		       <sanitizer>
		            <valid initial="string.digits">
		                <add value="N" />
		                <add value="A" />
		            </valid>
		        </sanitizer>
		    </param>
		    <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance.">
		            <option value="ppm" selected="True">ppm</option>
		            <option value="mz">m/z</option>
		    </param>
		    <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/>
		    <param name="quantile" type="float" value="0.2"
		        label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/>
		    <param name="span" type="float" value="0.75"
		        label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/>
		</when>
                <when value="Peak_picking">
                    <param name="SNR_picking_method" type="float" value="6"
                        label="Signal to noise ratio"
                        help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/>
                    <param name="blocks_picking" type="integer" value="100" label = "Number of blocks"
                        help="Number of blocks in which to divide mass spectrum to calculate noise"/>
                    <param name="window_picking" type="float" value="5" label= "Window size" help="Window width for seeking local maxima"/>
                    <conditional name="methods_for_picking">
                        <param name="picking_method" type="select" label="Peak picking method">
                            <option value="adaptive" selected="True">adaptive</option>
                            <option value="mad">mad</option>
                            <option value="simple">simple</option>
                        </param>
                        <when value="adaptive">
                            <param name="spar_picking" type="float" value="1.0"
                                label="Spar value" 
                                help = "Smoothing parameter for the spline smoothing 
                                  applied to the spectrum in order to decide the cutoffs 
                                  for throwing away false noise spikes that might occur inside peaks"/>
                        </when>
                        <when value="mad"/>
                        <when value="simple"/>
                    </conditional>
                </when>
                <when value="Peak_alignment">
                    <param name="value_diffalignment" type="float" value="200"
                           label="tolerance" help="Peaks that differ less than this value will be aligned together"/>
                    <param name="units_diffalignment" type="select" display="radio" optional="False" label="units">
                            <option value="ppm" selected="True">ppm</option>
                            <option value="mz">m/z</option>
                    </param>
                    <conditional name="align_ref_type">
                        <param name="align_reference_datatype" type="select" label="Choose reference">
                            <option value="align_noref" selected="True">no reference</option>
                            <option value="align_table" >m/z values from tabular file as reference</option>
                        </param>
                        <when value="align_noref"/>
                        <when value="align_table">
                            <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/>
                        </when>
                    </conditional>
                </when>
                <when value="Peak_filtering">
                    <param name="frequ_filtering" type="float" value="0.01" max="1" min="0" label="Minimum frequency" help="Peaks that occur in the dataset in lesser proportion than this will be dropped (0.01 --> filtering for 1% of spectra)"/>
                </when>
                <when value="Peak_binning">
                    <expand macro="reading_1_column_mz_tabular" label="A reference to which the peaks are binned." help="Tabular file with m/z features to extract from input file"/>
                    <param name="peakbin_tol" value="NA" type="text" label="The tolerance to be used when matching the m/z features in the dataset to the reference. If this is NA, then automatically guess a resolution from the data." >
                        <sanitizer>
                            <valid initial="string.digits">
                                <add value="N" />
                                <add value="A" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param name="peakbin_units" display="radio" type="select" label="The units to use for the tolerance.">
                        <option value="mz" >mz</option>
                        <option value="ppm" selected="True" >ppm</option>
                    </param>
                    <param name="peaks_type" type="select" display="radio"
                           label="Should the peak height or area under the curve be taken as the intensity value?">
                            <option value="height" selected="True">height</option>
                            <option value="area">area</option>
                    </param>
                </when>
                <when value="Mass_binning">
                    <param name="bin_width" type="float" value="1" label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/>
		    <param name="bin_units" type="select" display="radio"
		           label="Unit for bin">
		            <option value="mz" selected="True">mz</option>
		            <option value="ppm">ppm</option>
		    </param>
                    <param name="bin_fun" type="select" display="radio"
                           label="Calculate sum or mean intensity for ions of the same bin">
                            <option value="mean" selected="True">mean</option>
                            <option value="sum">sum</option>
                    </param>
                    <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/>
                    <conditional name="mz_range">
                    	<param name="features_filtering" type="select" label="Select m/z options">
                            <option value="none" selected="True">none</option>
                            <option value="change_mz_range">change m/z range</option>
                            <option value="bin_to_reference">bin m/z to reference</option>
                        </param>
			    <when value="none"/>
			    <when value="change_mz_range">
				<param name="min_mz" type="float" value="1" label="Minimum value for m/z"/>
				<param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/>
			    </when>
			    <when value="bin_to_reference">
		                <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features as reference for binning. Only the m/z values from the tabular file will be kept."/>
	                   </when>
                    </conditional>
                </when>
                <when value="Transformation">
                    <conditional name="transf_conditional">
                        <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)">
                            <option value="log2" selected="True">log2</option>
                            <option value="sqrt">sqrt</option>
                        </param>
                            <when value="log2">
                                <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/>
                            </when>
                            <when value="sqrt"/>
                    </conditional>
                </when>
            </conditional>
        </repeat>
    </inputs>
    <outputs>
        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/>
        <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/>
    </outputs>
    <tests>
        <test>
            <expand macro="infile_imzml"/>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Normalization"/>
                    <conditional name="methods_for_normalization">
                        <param name="normalization_method" value="tic"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Smoothing"/>
                    <conditional name="methods_for_smoothing">
                        <param name="smoothing_method" value="sgolay"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_picking"/>
                    <conditional name="methods_for_picking">
                        <param name="picking_method" value="adaptive"/>
                    </conditional>
                    <param name="blocks_picking" value="3"/>
                    <param name="window_picking" value="3"/>
                    <param name="SNR_picking_method" value="3"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_alignment"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_filtering"/>
                    <param name="frequ_filtering" value="0.3"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Transformation"/>
                        <conditional name="transf_conditional">
                            <param name="trans_type" value="sqrt"/>
                        </conditional>
                </conditional>
            </repeat>
            <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
            <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size">
                <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
        <test>
            <param name="infile" value="3_files_combined.RData" ftype="rdata"/>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_picking"/>
                    <param name="blocks_picking" value="3"/>
                    <param name="window_picking" value="5"/>
                    <param name="SNR_picking_method" value="2"/>
                    <conditional name="methods_for_picking">
                        <param name="picking_method" value="adaptive"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_alignment"/>
                </conditional>
            </repeat>
            <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
            <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size">
                <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
        <test>
            <expand macro="infile_analyze75"/>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Normalization"/>
                    <conditional name="methods_for_normalization">
                        <param name="normalization_method" value="rms"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_picking"/>
                    <param name="blocks_picking" value="20"/>
                    <param name="window_picking" value="5"/>
                    <param name="SNR_picking_method" value="2"/>
                    <conditional name="methods_for_picking">
                        <param name="picking_method" value="mad"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_alignment"/>
                </conditional>
            </repeat>
            <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
            <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size">
                <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="preprocessing_results3.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
        <test>
            <expand macro="infile_analyze75"/>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Normalization"/>
                    <param name="normalization_method" value="tic"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="mz_alignment"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="mz_recalibration"/>                   
                    <param name="alignment_tol" value="2"/>
                    <param name="alignment_units" value="ppm"/>
                    <param name="mz_tabular" value="inputcalibrantfile2.txt" ftype="tabular"/>
                    <param name="feature_column" value="1"/>
                    <param name="feature_header" value="TRUE"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Mass_binning"/>
                        <param name="bin_width" value="0.1"/>
                        <param name="bin_units" value="mz"/>
                </conditional>
            </repeat>
            <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
            <output name="outfile_imzml" ftype="imzml" file="preprocessing_results4.imzml.txt" compare="sim_size">
                <extra_files type="file" file="preprocessing_results4.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="preprocessing_results4.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
        <test>
            <expand macro="processed_infile_imzml"/>
            <conditional name="processed_cond">
                <param name="processed_file" value="processed"/>
                <param name="accuracy" value="200"/>
                <param name="units" value="ppm"/>
            </conditional>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Transformation"/>
                        <conditional name="transf_conditional">
                            <param name="trans_type" value="log2"/>
                        </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Baseline_reduction"/>
                </conditional>
            </repeat>
            <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size" delta="13000"/>
            <output name="outfile_imzml" ftype="imzml" file="preprocessing_results5.imzml.txt" compare="sim_size">
                <extra_files type="file" file="preprocessing_results5.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="preprocessing_results5.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
    </tests>
    <help>
        <![CDATA[

@CARDINAL_DESCRIPTION@

-----

This tool provides multiple Cardinal functions to preprocess mass spectrometry imaging data. 

@MSIDATA_INPUT_DESCRIPTION@
- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed after the data is read by the tool.
@MZ_TABULAR_INPUT_DESCRIPTION@

**Options**

- Normalization: normalization of intensities to total ion current (TIC) or to root-mean-square (RMS)
- Baseline reduction: baseline  reduction removes background intensity generated by chemical noise (common in MALDI datasets)
- Smoothing: Smoothing of the peaks reduces noise and improves peak detection
- m/z alignment: removes small m/z shifts between spectra 
- Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards)
- Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept
- Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot
- Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values
- m/z binning: generates new m/z bins
- Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. 
                    

**Output**

- MSI data as continuous imzML file
- pdf with key values and four random mass spectra after each processing step

        ]]>
    </help>
    <expand macro="citations"/>
</tool>