diff msi_preprocessing.xml @ 2:55e081b9f6ac draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 06c2b45d8644b1d7fc01622a5c59dcbf8886d0f1
author galaxyp
date Mon, 23 Apr 2018 17:17:08 -0400
parents 641316f29395
children c17eb2cc0048
line wrap: on
line diff
--- a/msi_preprocessing.xml	Fri Nov 24 18:07:36 2017 -0500
+++ b/msi_preprocessing.xml	Mon Apr 23 17:17:08 2018 -0400
@@ -1,9 +1,11 @@
-<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.7.0.1">
+<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.7.0.2">
     <description>
         mass spectrometry imaging preprocessing
     </description>
     <requirements>
         <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
+        <requirement type="package" version="2.2.1">r-gridextra</requirement>
+        <requirement type="package" version="0.20-35">r-lattice</requirement>
     </requirements>
     <command detect_errors="exit_code">
     <![CDATA[
@@ -26,6 +28,8 @@
     <configfiles>
         <configfile name="cardinal_preprocessing"><![CDATA[
 library(Cardinal)
+library(gridExtra)
+library(lattice)
 
 #if $infile.ext == 'imzml'
     msidata <- readMSIData('infile.imzML')
@@ -36,19 +40,111 @@
 #end if
 
 
-maxpixel = length(pixels(msidata)) 
-pixelnumber = c(1:maxpixel)
+##################################### Preparations for QC report #######################################################################################
+
+#if $outputs.outputs_select == "quality_control":
+
+    ### values for QC table:
+    maxfeatures = length(features(msidata))
+    medianpeaks = median(colSums(spectra(msidata)[]>0))
+    medint = round(median(spectra(msidata)[]), digits=2)
+    TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+    QC_numbers= data.frame(rawdata = c(maxfeatures, medianpeaks, medint, TICs))
+    vectorofactions = "rawdata"
+
+
+        ### Read tabular file with calibrant masses: 
+        calibrant_list = read.delim("$outputs.calibrant_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
+
+            ### calculate how many input calibrant masses are valid: 
+            inputcalibrants = calibrant_list[calibrant_list[,$outputs.calibrants_column]>min(mz(msidata)) & calibrant_list[,$outputs.calibrants_column]<max(mz(msidata)),$outputs.calibrants_column]
+            number_calibrants_in = length(calibrant_list[,$outputs.calibrants_column])
+            number_calibrants_valid = length(inputcalibrants)
+
+
+
+
+    ### Quality control report
 
+
+    pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
+    plot(0,type='n',axes=FALSE,ann=FALSE)
+    title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
+    title(main=paste0("\n\n\n\n Number valid masses in ", "$outputs.calibrant_file.display_name",": ", number_calibrants_valid, "/", number_calibrants_in))
+
+
+
+    for (calibrant in inputcalibrants)
+    {
+
+        currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                       par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                       scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="raw")
+      
+        assign(paste("rawdata",calibrant, sep="_"), currentimage)
+    }
+
+
+        current_plot_raw <- vector(length(inputcalibrants), mode='list')
+
+
+#end if
+
+################################################### Preprocessing steps #######################################################################################
 #for $method in $methods:
     #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization':
         print('Normalization')
         ##normalization
-        msidata <- normalize(msidata, pixel=pixelnumber, method="tic")
+
+        msidata <- normalize(msidata, method="tic")
+
+        #if $outputs.outputs_select == "quality_control":
+            ### values for QC table:
+            maxfeatures = length(features(msidata))
+            medianpeaks = median(colSums(spectra(msidata)[]>0))
+            medint = round(median(spectra(msidata)[]), digits=2)
+            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+            normalized = c(maxfeatures, medianpeaks, medint, TICs)
+            QC_numbers= cbind(QC_numbers, normalized)
+            ### preparation for QC plots
+            vectorofactions = append(vectorofactions, "normalized")
+
+            for (calibrant in inputcalibrants)
+            {
+                currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                               par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                               scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="normalized")
+              
+               assign(paste("normalized",calibrant, sep="_"), currentimage)
+            }
+
+
+        #end if
 
     #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction':
         print('Baseline_reduction')
         ##baseline reduction
-        msidata = reduceBaseline(msidata, pixel=pixelnumber, method="median", blocks=$method.methods_conditional.blocks_baseline)
+        msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)
+
+        #if $outputs.outputs_select == "quality_control":
+            maxfeatures = length(features(msidata))
+            medianpeaks = median(colSums(spectra(msidata)[]>0))
+            medint = round(median(spectra(msidata)[]), digits=2)
+            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+            baseline= c(maxfeatures, medianpeaks, medint, TICs)
+            QC_numbers= cbind(QC_numbers, baseline)
+            ### preparation for QC plots
+            vectorofactions = append(vectorofactions, "baseline_rem")
+
+            for (calibrant in inputcalibrants)
+            {
+                currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                               par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                               scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="baseline removed")
+              
+               assign(paste("baseline_rem",calibrant, sep="_"), currentimage)
+            }
+        #end if
 
     #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing':
         print('Smoothing')
@@ -65,6 +161,25 @@
             msidata <- smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
         #end if
 
+        #if $outputs.outputs_select == "quality_control":
+            maxfeatures = length(features(msidata))
+            medianpeaks = median(colSums(spectra(msidata)[]>0))
+            medint = round(median(spectra(msidata)[]), digits=2)
+            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+            smoothed= c(maxfeatures, medianpeaks, medint, TICs)
+            QC_numbers= cbind(QC_numbers, smoothed)
+            ### preparation for QC plots
+            vectorofactions = append(vectorofactions, "smoothed")
+
+            for (calibrant in inputcalibrants)
+            {
+                currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                               par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                               scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="smoothed")
+              
+               assign(paste("smoothed",calibrant, sep="_"), currentimage)
+            }
+        #end if
 
     #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
         print('Peak_picking')
@@ -72,62 +187,231 @@
 
         #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
             print('adaptive peakpicking')
-            msidata = peakPick(msidata, pixel=pixelnumber, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)
+            msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)
 
         #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic':
             print('limpic peakpicking')
-            msidata = peakPick(msidata, pixel=pixelnumber, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking)
+            msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking)
 
         #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple':
             print('simple peakpicking')
-            msidata = peakPick(msidata, pixel=pixelnumber, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
+            msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
         #end if
 
+        #if $outputs.outputs_select == "quality_control":
+            maxfeatures = length(features(msidata))
+            medianpeaks = median(colSums(spectra(msidata)[]>0))
+            medint = round(median(spectra(msidata)[]), digits=2)
+            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+            picked= c(maxfeatures, medianpeaks, medint, TICs)
+            QC_numbers= cbind(QC_numbers, picked)
+            ### preparation for QC plots
+            vectorofactions = append(vectorofactions, "picked")
 
+            for (calibrant in inputcalibrants)
+            {
+                currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                               par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                               scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="picked")
+              
+               assign(paste("picked",calibrant, sep="_"), currentimage)
+            }
+        #end if
 
     #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
         print('Peak_alignment')
         ## Peakalignment
 
+        #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref':
+            align_peak_reference = msidata
+
+        #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':
+            align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
+
+            align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column]
+
+            align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]
+            if (length(align_peak_reference) == 0)
+            {align_peak_reference = 0
+            }
+
+
+
+#### if length <0: align_peak_reference = 0
+
+        
+        #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref':
+                  loadRData <- function(fileName){
+                  #loads an RData file, and returns it
+                  load(fileName)
+                  get(ls()[ls() != "fileName"])
+                }
+                align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata')
+        #end if
+
         #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff':
-            print('diff peaklignment')
-            msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment")
+            print('diff peakalignment')
+            msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference)
 
        #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP':
-            print('DPpeaklignment')
-        msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment )
+            print('DPpeakalignment')
+        msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference)
        #end if
 
+        #if $outputs.outputs_select == "quality_control":
+            maxfeatures = length(features(msidata))
+            medianpeaks = median(colSums(spectra(msidata)[]>0))
+            medint = round(median(spectra(msidata)[]), digits=2)
+            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+            aligned= c(maxfeatures, medianpeaks, medint, TICs)
+            QC_numbers= cbind(QC_numbers, aligned)
+            ### preparation for QC plots
+            vectorofactions = append(vectorofactions, "aligned")
+
+            for (calibrant in inputcalibrants)
+            {
+                currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                               par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                               scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="aligned")
+              
+               assign(paste("aligned",calibrant, sep="_"), currentimage)
+            }
+        #end if
+
     #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
         print('Peak_filtering')
         msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering)
+
+        #if $outputs.outputs_select == "quality_control":
+            maxfeatures = length(features(msidata))
+            medianpeaks = median(colSums(spectra(msidata)[]>0))
+            medint = round(median(spectra(msidata)[]), digits=2)
+            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+            filtered= c(maxfeatures, medianpeaks, medint, TICs)
+            QC_numbers= cbind(QC_numbers, filtered)
+            ### preparation for QC plots
+            vectorofactions = append(vectorofactions, "filtered")
+
+            for (calibrant in inputcalibrants)
+            {
+                currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                               par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                               scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="filtered")
+              
+               assign(paste("filtered",calibrant, sep="_"), currentimage)
+            }
+        #end if
+
+    #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
+        print('Data_reduction')
+
+        #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin':
+            print('bin reduction')
+            msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)
+
+        #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
+            print('resample reduction')
+            msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)
+
+        #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks':
+            print('peaks reduction')
+
+            #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':
+                reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
+                reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column]
+                peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
+
+            #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref':
+                      loadRData <- function(fileName){
+                      #loads an RData file, and returns it
+                      load(fileName)
+                      get(ls()[ls() != "fileName"])
+                    }
+                    peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')
+            #end if
+            msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type")
+        #end if
+
+        #if $outputs.outputs_select == "quality_control":
+            maxfeatures = length(features(msidata))
+            medianpeaks = median(colSums(spectra(msidata)[]>0))
+            medint = round(median(spectra(msidata)[]), digits=2)
+            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
+            reduced= c(maxfeatures, medianpeaks, medint, TICs)
+            QC_numbers= cbind(QC_numbers, reduced)
+            ### preparation for QC plots
+            vectorofactions = append(vectorofactions, "reduced")
+
+            for (calibrant in inputcalibrants)
+            {
+                currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
+                               par.strip.text=list(col="black", cex=.9)),lattice=TRUE, 
+                               scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="reduced")
+              
+               assign(paste("reduced",calibrant, sep="_"), currentimage)
+            }
+        #end if
     #end if
 #end for
 
+####################################################### Outputs: RData, tabular and QC report ######################################################
 
-## save as as (.RData)
+
+## save as (.RData)
 save(msidata, file="$msidata_preprocessed")
 
-if (length(features(msidata))> 0)
-{
+
+#if $output_matrix:
+
+
+    if (length(features(msidata))> 0)
+    {
+
+        ## save as intensity matrix
+
+        spectramatrix = spectra(msidata)
+        rownames(spectramatrix) = mz(msidata)
+        newmatrix = rbind(pixels(msidata), spectramatrix)
+        write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+    }else{
+        print("file has no features left")
+        write.table(matrix(rownames(coord(msidata)), ncol=ncol(msidata), nrow=1), file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
+    }
+
+#end if
+
 
-## save as intensity matrix
-spectramatrix = spectra(msidata)
-rownames(spectramatrix) = mz(msidata)
-newmatrix = rbind(pixels(msidata), spectramatrix)
-write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+#if $outputs.outputs_select == "quality_control":
+
+    rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC")
+    grid.table(t(QC_numbers))
 
-}else{
-  print("file has no features left")
-write.table(matrix(rownames(coord(msidata)), ncol=ncol(msidata), nrow=1), file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
-}
+    for (calibrant in inputcalibrants)
+    {
+        imagelist = list()
+      
+        for (numberprepro in 1:length(vectorofactions)){
+        
+            imagelist[[numberprepro]] = get(paste(vectorofactions[numberprepro],calibrant, sep="_"))
+        
+        
+        }
+      
+      do.call(grid.arrange,imagelist)
+    }
+
+
+    dev.off()
+
+#end if
 
 
     ]]></configfile>
     </configfiles>
     <inputs>
         <param name="infile" type="data" format="imzml,rdata,analyze75"
-            label="MSI rawdata as imzml or Cardinal MSImageSet saved as RData"
+            label="MSI rawdata as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
             help="load imzml and ibd file by uploading composite datatype imzml"/>
         <repeat name="methods" title="Preprocessing" min="1" max="50">
             <conditional name="methods_conditional">
@@ -138,11 +422,13 @@
                     <option value="Peak_picking">Peak picking</option>
                     <option value="Peak_alignment">Peak alignment</option>
                     <option value="Peak_filtering">Peak filtering</option>
+                    <option value="Data_reduction">Data reduction</option>
                 </param>
+
                 <when value="Normalization"/>
                 <when value="Baseline_reduction">
                     <param name="blocks_baseline" type="integer" value="50"
-                        label="blocks"/>
+                        label="Blocks"/>
                 </when>
                 <when value="Smoothing">
                     <conditional name="methods_for_smoothing">
@@ -171,25 +457,25 @@
                     <param name="SNR_picking_method" type="integer" value="3"
                         label="Signal to noise ratio"
                         help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/>
-                    <param name="blocks_picking" type="integer" value="100" label = "number of blocks"
+                    <param name="blocks_picking" type="integer" value="100" label = "Number of blocks"
                         help="Number of blocks in which to divide mass spectrum to calculate noise"/>
-                    <param name="window_picking" type="integer" value="5" label= "window size" help="Window width for seeking local maxima"/>
+                    <param name="window_picking" type="integer" value="5" label= "Window size" help="Window width for seeking local maxima"/>
                     <conditional name="methods_for_picking">
-                        <param name="picking_method" type="select" label="Peak picking method" help = "only simple works for processed imzML files">
+                        <param name="picking_method" type="select" label="Peak picking method" help="only simple works for processed imzML files">
                             <option value="adaptive" selected="True">adaptive</option>
                             <option value="limpic">limpic</option>
                             <option value="simple">simple</option>
                         </param>
                         <when value="adaptive">
                             <param name="spar_picking" type="float" value="1.0"
-                                label="spar value" 
+                                label="Spar value" 
                                 help = "Smoothing parameter for the spline smoothing 
                                   applied to the spectrum in order to decide the cutoffs 
                                   for throwing away false noise spikes that might occur inside peaks"/>
                         </when>
                         <when value="limpic">
                             <param name="tresh_picking" type="float" value="0.75"
-                                label="thresh value" help = "The thresholding quantile to use when comparing slopes in order to throw away peaks that are too flat"/>
+                                label="thresh value" help="The thresholding quantile to use when comparing slopes in order to throw away peaks that are too flat"/>
                         </when> 
                         <when value="simple"/>
                     </conditional>
@@ -204,27 +490,111 @@
                             <param name="value_diffalignment" type="integer" value="200"
                                    label="diff.max" help="Peaks that differ less than this value will be aligned together"/>
                             <param name="units_diffalignment" type="select" display = "radio" optional = "False"
-                                   label="units" help= "The coefficients for the moving average filter">
+                                   label="units" help="The coefficients for the moving average filter">
                                     <option value="ppm" selected="True">ppm</option>
                                     <option value="Da">Da</option>
                             </param>
                         </when>
                         <when value="DP">
                             <param name="gap_DPalignment" type="integer" value="0"
-                                   label="gap" help = "The gap penalty for the dynamic programming sequence alignment"/>
+                                   label="Gap" help="The gap penalty for the dynamic programming sequence alignment"/>
+                        </when>
+                    </conditional>
+                    <conditional name="align_ref_type">
+                        <param name="align_reference_datatype" type="select" label="Choose reference">
+                            <option value="align_noref" selected="True">no reference</option>
+                            <option value="align_table" >tabular file as reference</option>
+                            <option value="align_msidata_ref">msidata file as reference</option>
+                        </param>
+                        <when value="align_noref"/>
+                        <when value="align_table">
+                            <param name="align_peaks_table" type="data" format="tabular" 
+                                label="Reference mz values to use for alignment - only these will be kept" help="One column with mz values (without empty cells or letters)"/>
+                            <param name="align_mass_column" data_ref="align_peaks_table" label="Column with reference mz" type="data_column"/>
+                        </when>
+                        <when value="align_msidata_ref">
+                            <param name="align_peaks_msidata" type="data" format="rdata," label="Picked and aligned Cardinal MSImageSet saved as RData"/>
                         </when>
                     </conditional>
                 </when>
                 <when value="Peak_filtering">
-                    <param name="frequ_filtering" type="float" value="1"
-                        label="freq.min" help = "Peaks that occur in the dataset fewer times than this will be removed"/>
+                    <param name="frequ_filtering" type="integer" value="1000"
+                        label="Freq.min" help="Peaks that occur in the dataset fewer times than this will be removed. Number should be between 1 (no filtering) and number of spectra (pixel)"/>
+                </when>
+                <when value="Data_reduction">
+                    <conditional name="methods_for_reduction">
+                        <param name="reduction_method" type="select" label="Reduction method">
+                            <option value="bin" selected="True">bin</option>
+                            <option value="resample">resample</option>
+                            <option value="peaks">peaks</option>
+                        </param>
+                        <when value="bin">
+                            <param name="bin_width" type="float" value="1"
+                                   label="The width of a bin in mz or ppm" help="Width must be greater than Range of mz values/Number of mz features"/>
+                            <param name="bin_units" type="select" display="radio"
+                                   label="Unit for bin">
+                                    <option value="mz" selected="True">mz</option>
+                                    <option value="ppm">ppm</option>
+                            </param>
+                            <param name="bin_fun" type="select" display="radio"
+                                   label="Calculate sum or mean intensity for ions of the same bin">
+                                    <option value="mean" selected="True">mean</option>
+                                    <option value="sum">sum</option>
+                            </param>
+                        </when>
+                        <when value="resample">
+                            <param name="resample_step" type="float" value="1"
+                                   label="The step size in mz" help="Step size must be greater than Range of mz values/Number of mz features"/>
+                        </when>
+                        <when value="peaks">
+                            <param name="peaks_type" type="select" display="radio"
+                                   label="Should the peak height or area under the curve be taken as the intensity value?">
+                                    <option value="height" selected="True">height</option>
+                                    <option value="area">area</option>
+                            </param>                            
+                            <conditional name="ref_type">
+                                <param name="reference_datatype" type="select" label="Choose reference datatype">
+                                    <option value="table" selected="True">tabular file</option>
+                                    <option value="msidata_ref">msidata file</option>
+                                </param>
+                                <when value="table">
+                                    <param name="peaks_table" type="data" format="tabular" 
+                                        label="Reference mz values to use to reduce the dimension" help="One column with mz values (without empty cells or letters, mz outside mz range are not used for filtering)"/>
+                                    <param name="mass_column" data_ref="peaks_table" label="Column with reference mz" type="data_column"/>
+                                </when>
+                                <when value="msidata_ref">
+                                    <param name="peaks_msidata" type="data" format="rdata," label="Picked and aligned Cardinal MSImageSet saved as RData"/>
+                                </when>
+                            </conditional>
+                        </when>
+                    </conditional>
                 </when>
             </conditional>
         </repeat>
+        <conditional name="outputs">
+            <param name="outputs_select" type="select" label="Quality control output">
+                <option value="quality_control" selected="True">yes</option>
+                <option value="no_quality_control">no</option>
+            </param>
+            <when value="quality_control">
+                <param name="calibrant_file" type="data" format="tabular"
+                 label="Provide a list of masses which will be plotted in the quality control report"
+                 help="Use internal calibrant masses"/>
+                 <param name="calibrants_column" data_ref="calibrant_file" label="Column with masses" type="data_column"/>
+                 <param name="plusminus_dalton" value="0.25" type="text" label="Mass range" help="Plusminus mass window in Dalton"/>
+            </when>
+            <when value="no_quality_control"/>
+        </conditional>
+        <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
     </inputs>
     <outputs>
-        <data format="rdata" name="msidata_preprocessed" label="${tool.name} on $infile.display_name"/>
-        <data format="tabular" name="matrixasoutput" label="${tool.name} on $infile.display_name" />
+        <data format="rdata" name="msidata_preprocessed" label="Preprocessed ${on_string}"/>
+        <data format="pdf" name="QC_plots" from_work_dir="Preprocessing.pdf" label = "QC preprocessing report on ${on_string}">
+            <filter>outputs["outputs_select"] == "quality_control"</filter>
+        </data>
+        <data format="tabular" name="matrixasoutput" label="Intensity matrix ${on_string}">
+            <filter>output_matrix</filter>
+        </data>
     </outputs>
     <tests>
         <test>
@@ -239,12 +609,6 @@
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Baseline_reduction" />
-                    <param name="blocks_baseline" value="100" />
-                </conditional>
-            </repeat>
-            <repeat name="methods">
-                <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Smoothing" />
                     <conditional name="methods_for_smoothing">
                         <param name="smoothing_method" value="gaussian" />
@@ -269,22 +633,19 @@
                     </conditional>
                 </conditional>
             </repeat>
-            <!--repeat name="methods">
+            <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_filtering" />
+                    <param name="frequ_filtering" value="2"/>
                 </conditional>
-            </repeat-->
+            </repeat>
+            <param name="outputs_select" value="no_quality_control"/>
+            <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size" />
             <output name="matrixasoutput" file="preprocessing_results1.txt" />
         </test>
         <test>
             <param name="infile" value="example_continous.RData" ftype="rdata"/>
-            <!--repeat name="methods">
-                <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Baseline_reduction" />
-                    <param name="blocks_baseline" value="3" />
-                </conditional>
-            </repeat-->
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_picking" />
@@ -299,8 +660,14 @@
                     <param name="methods_for_alignment" value="DP" />
                 </conditional>
             </repeat>
+            <param name="outputs_select" value="quality_control"/>
+            <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile2.tabular"/>
+            <param name="calibrants_column" value="1"/>
+            <param name="plusminus_dalton" value="0.25"/>
+            <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size" />
             <output name="matrixasoutput" file="preprocessing_results2.txt" lines_diff="2" />
+            <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/>
         </test>
         <test>
             <param name="infile" value="" ftype="analyze75">
@@ -313,12 +680,6 @@
                     <param name="preprocessing_method" value="Normalization" />
                 </conditional>
             </repeat>
-            <!--repeat name="methods">
-                <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Baseline_reduction" />
-                    <param name="blocks_baseline" value="50" />
-                </conditional>
-            </repeat-->
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_picking" />
@@ -333,10 +694,55 @@
                     <param name="methods_for_alignment" value="diff" />
                 </conditional>
             </repeat>
+            <param name="outputs_select" value="quality_control"/>
+            <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile2.tabular"/>
+            <param name="calibrants_column" value="1"/>
+            <param name="plusminus_dalton" value="0.25"/>
             <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size" />
-            <output name="matrixasoutput" file="preprocessing_results3.txt" />
+            <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/>
         </test>
-
+        <test>
+            <param name="infile" value="" ftype="analyze75">
+                <composite_data value="Analyze75.hdr"/>
+                <composite_data value="Analyze75.img"/>
+                <composite_data value="Analyze75.t2m"/>
+            </param>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="Normalization" />
+                </conditional>
+            </repeat>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="Data_reduction" />
+                    <param name="bin_width" value="0.1" />
+                </conditional>
+            </repeat>
+            <param name="outputs_select" value="no_quality_control"/>
+            <param name="output_matrix" value="True"/>
+            <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size" />
+            <output name="matrixasoutput" file="preprocessing_results4.txt" />
+        </test>
+        <test>
+            <param name="infile" value="" ftype="imzml">
+                <composite_data value="Example_Continuous.imzML"/>
+                <composite_data value="Example_Continuous.ibd"/>
+            </param>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="Data_reduction" />
+                    <param name="step_width" value="0.1" />
+                </conditional>
+            </repeat>
+            <param name="outputs_select" value="quality_control"/>
+            <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile1.tabular"/>
+            <param name="calibrants_column" value="1"/>
+            <param name="plusminus_dalton" value="0.25"/>
+            <param name="output_matrix" value="True"/>
+            <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size" />
+            <output name="matrixasoutput" file="preprocessing_results5.txt" />
+            <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/>
+        </test>
     </tests>
     <help>
         <![CDATA[
@@ -348,8 +754,9 @@
 - Normalization: Normalization of intensities to total ion current (TIC)
 - Baseline reduction: Baseline  reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets)
 - Peak picking: relevant peaks are picked while noise-peaks are removed
-- Peak alignment: mz inaccuracies are removed by alignment of same peaks to a common mz value
-- Peak filtering: removes peaks that occur infrequently, such as those which only occur in a small proportion of pixels
+- Peak alignment: after peak picking, mz inaccuracies are removed by alignment of same peaks to a common mz value
+- Peak filtering: after peak picking and alignment, removes peaks that occur infrequently, such as those which only occur in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot.
+- Data reduction: binning, resampling or peak filtering to reduce data
         ]]>
     </help>
     <citations>