view maldi_quant_peakdetection.xml @ 2:41c148280a08 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit d2f311f7fff24e54c565127c40414de708e31b3c
author galaxyp
date Thu, 25 Oct 2018 07:23:53 -0400
parents 96264fce1847
children c42549f04fdd
line wrap: on
line source

<tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.2">
    <description>
        Peak detection, binning and filtering for mass-spectrometry imaging data
    </description>
    <macros>
        <import>maldi_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
    <![CDATA[
        #if $infile.ext == 'imzml'
            cp '${infile.extra_files_path}/imzml' infile.imzML &&
            cp '${infile.extra_files_path}/ibd' infile.ibd &&
        #elif $infile.ext == 'analyze75'
            cp '${infile.extra_files_path}/hdr' infile.hdr &&
            cp '${infile.extra_files_path}/img' infile.img &&
            cp '${infile.extra_files_path}/t2m' infile.t2m &&
        #else
            ln -s $infile infile.RData &&
        #end if
        Rscript '${maldi_quant_peak_detection}'&&
        mkdir $outfile_imzml.files_path &&
        mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
        mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
        echo "imzML file:" > $outfile_imzml &&
        ls -l "$outfile_imzml.files_path" >> $outfile_imzml
    ]]>
    </command>
    <configfiles>
        <configfile name="maldi_quant_peak_detection"><![CDATA[

@R_IMPORTS@

#if $restriction_conditional.restriction == 'restrict':

    print('Reading mask region')

    ## Import imzML file
    coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2]

    maldi_data <- importImzMl('infile.imzML',
                     coordinates = coordinate_matrix, centroided = $centroids)
    pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")


#else:

    print('Reading entire file')
    ## Import imzML file

    #if $infile.ext == 'imzml'
        print('imzML file')
        #if str($centroids) == "TRUE"
            peaks <- importImzMl('infile.imzML', centroided = $centroids)
            pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_")
            coordinates_info = cbind(coordinates(peaks)[,1:2], c(1:length(peaks)))
        #else
            maldi_data <- importImzMl('infile.imzML', centroided = $centroids)
            pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
            coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
        #end if


    #elif $infile.ext == 'tabular'
        print('tabular file')
        #set $centroids = "TRUE" ## will be used in some if conditions
        peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE)
        peak_list = split(peak_tabular, f = peak_tabular\$spectrum) ## will be ordered according to spectrum
        pixelnames = unique(peak_tabular\$spectrum)

        peaks = list()
        for (spectra in 1:length(peak_list))
        {
            single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr)
            peaks[[spectra]] = single_peaks
        }

    #else
        print('rdata file')
        loadRData <- function(fileName){
        #loads an RData file, and returns it
        load(fileName)
        get(ls()[ls() != "fileName"])
        }
        msidata = loadRData('infile.RData')
        centroided(msidata) = $centroids
        ## change to correct pixelnames

        x_coords = unlist(lapply(strsplit(names(Cardinal::pixels(msidata)), ","), `[[`, 1))
        y_coords = unlist(lapply(strsplit(names(Cardinal::pixels(msidata)), ","), `[[`, 2))
        x_coordinates = gsub("x = ","",x_coords)
        y_coordinates = gsub(" y = ","",y_coords)
        pixelnames = paste0("xy_", x_coordinates, "_", y_coordinates)

        cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])

        if (centroided(msidata) == FALSE){
            ## create mass spectrum object
            cardinal_mzs = Cardinal::mz(msidata)
            maldi_data = list()
            for(number_spectra in 1:ncol(msidata)){
                maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
                coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))}
        }else{
            peaks = list()
            for (spectra in 1:ncol(msidata))
            {
                single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata))))
                peaks[[spectra]] = single_peaks
            }}
    #end if
#end if

## default summarized = FALSE
summarized_spectra = FALSE

## Quality control plots during peak detection
pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12)
plot(0,type='n',axes=FALSE,ann=FALSE)

## if no filename is given, name of file in Galaxy history is used
#set $filename = $infile.display_name

title(main=paste("$filename"))

## plot input file spectrum: 
#if $centroids:
        plot(peaks[[1]], main="First spectrum of input file")
#else
    avgSpectra <- averageMassSpectra(maldi_data,method="mean")
    plot(avgSpectra, main="Average spectrum of input file")
#end if


## QC numbers for input file
#if str($centroids) == "TRUE"
    pixel_number = length(peaks)
    minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
    maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
    mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
    medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
    number_features = length(unique(unlist(lapply(peaks,mass))))
    inputdata = c(minmz, maxmz,number_features,mean_features,  medint)
    QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint))
    vectorofactions = "inputdata"
#else
    pixel_number = length(maldi_data)
    minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
    maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
    mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
    medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
    number_features = length(unique(unlist(lapply(maldi_data,mass))))
    inputdata = c(minmz, maxmz,number_features,mean_features,  medint)
    QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint))
    vectorofactions = "inputdata"
#end if

#if str($tabular_annotation.load_annotation) == 'yes_annotation':

    ## read and extract x,y,annotation information
    input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
    annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
    colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"

    ## merge with coordinate information of MSI data
    colnames(coordinates_info)[3] = "pixel_index"
    merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE)
    merged_annotation[is.na(merged_annotation)] = "NA"
    merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
    samples = as.factor(merged_annotation\$annotation)

## print annotation overview into PDF output

        combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
               geom_tile() +
               coord_fixed()+
               ggtitle("Spatial orientation of annotated data")+
               theme_bw()+
               theme(plot.title = element_text(hjust = 0.5))+
               theme(text=element_text(family="ArialMT", face="bold", size=12))+
               theme(legend.position="bottom",legend.direction="vertical")+
               theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
               theme(legend.position="bottom",legend.direction="vertical")+
               guides(fill=guide_legend(ncol=4,byrow=TRUE))

        print(combine_plot)

#end if

#################### Preprocessing methods #####################################

#for $method in $methods:

    #if str( $method.methods_conditional.method ) == 'Peak_detection':
        print('peak detection')
        ##peak detection

        #if $method.methods_conditional.use_annotations:
            maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking
            pixelnames = levels(samples)
            summarized_spectra = TRUE

        #end if

        peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method",
                  halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr)

        ## QC plot and numbers
        ## plot old spectrum with baseline in blue and picked peaks in green
        noise = estimateNoise(maldi_data[[1]], method= "$method.methods_conditional.peak_method")
        plot(maldi_data[[1]], main="First spectrum with noise line (blue) and picked peaks (green)")
        lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue")
        points(peaks[[1]], col="green", pch=20)
        ## plot new spectrum
        plot(peaks[[1]], main="First spectrum after peak detection")
        pixel_number = length(peaks)
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        peaks_picked = c(minmz, maxmz,number_features,mean_features,  medint)
        QC_numbers= cbind(QC_numbers, peaks_picked)
        vectorofactions = append(vectorofactions, "peaks_picked")

        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
        }else{print("There are no spectra with peaks left")}


    #elif str( $method.methods_conditional.method ) == 'monoisotopic_peaks':

        print('monoisotopic peaks')
        ##monoisotopic peaks

        peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size)

        ## QC plot and numbers
        ## plot old spectrum with picked isotopes as green dots
        plot(peaks[[1]], main="First spectrum with picked monoisotopic peaks (green)")
        points(peaks[[1]], col="green", pch=20)
        ## plot new spectrum
        plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection")
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        monoisotopes = c(minmz, maxmz,number_features,mean_features,  medint)
        QC_numbers= cbind(QC_numbers, monoisotopes)
        vectorofactions = append(vectorofactions, "monoisotopes")

        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            ## only for profile imzML file: featurematrix is overwritten:
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
        }else{print("There are no spectra with peaks left")}

    #elif str( $method.methods_conditional.method ) == 'Binning':

        print('binning')
        ##m/z binning

        peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method")

        ## QC plot and numbers
        plot(peaks[[1]], main="First spectrum after binning")
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        binned = c(minmz, maxmz,number_features,mean_features,  medint)
        QC_numbers= cbind(QC_numbers, binned)
        vectorofactions = append(vectorofactions, "binned")

        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #elif str($centroids) == "TRUE"
                    featureMatrix <- intensityMatrix(peaks)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
        }else{print("There are no spectra with peaks left")}


    #elif str( $method.methods_conditional.method ) == 'Filtering':

        print('filtering')
        ##m/z filtering

        ## filtering on all pixels or on pixel groups:
        #if str($method.methods_conditional.filter_annot_groups ) == 'FALSE':

            peaks <- filterPeaks(peaks,
            minFrequency=$method.methods_conditional.minFrequency,
            minNumber=$method.methods_conditional.minNumber,
            mergeWhitelists=$method.methods_conditional.mergeWhitelists)

        #elif str( $method.methods_conditional.filter_annot_groups ) == 'TRUE':

            peaks <- filterPeaks(peaks,
            minFrequency=$method.methods_conditional.minFrequency,
            minNumber=$method.methods_conditional.minNumber,
            mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples)
        #end if

        ##QC plot and numbers
        plot(peaks[[1]], main="First spectrum after m/z filtering")
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        filtered = c(minmz, maxmz,number_features,mean_features,  medint)
        QC_numbers= cbind(QC_numbers, filtered)
        vectorofactions = append(vectorofactions, "filtered")
  
        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
        }else{print("There are no spectra with peaks left")
              featureMatrix2 = matrix(0, ncol=1, nrow=1)}
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
    #end if
#end for

if (length(peaks[!sapply(peaks, isEmpty)])>0){
   ## mass peaks output
    mass_peaks = data.frame(matrix(,ncol=3, nrow=0))
    for (spectrum in 1:length(peaks)){
    spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity)
    spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass))
    mass_peaks = rbind(mass_peaks,spectrum_df)
    }
    colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum")
    write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
}else{print("There are no spectra with peaks left")}

## print table with QC values
rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity")
plot(0,type='n',axes=FALSE,ann=FALSE)
grid.table(t(QC_numbers))

dev.off()

if (summarized_spectra == FALSE){ 
    #if $infile.ext == 'imzml'
        MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed)
    #elif $infile.ext == 'tabular'
        masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE)
        ## extract x and y values and create the coordinate matrix in case tabular was input
        peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3])))
        exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates)
    #elif $infile.ext == 'rdata'
        MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates)
    #end if

}

    ]]>
        </configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML, peaklist or Cardinal MSImageSet saved as RData" help="imzML or tabular format (peak list) or Cardinal MSImageSet saved as RData"/>
        <param name="centroids" type="boolean" label="Input data is centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
        <conditional name="restriction_conditional">
            <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files">
                <option value="no_restriction" selected="True">Calculate on entire file</option>
                <option value="restrict">Restrict to coordinates of interest</option>
            </param>
            <when value="restrict">
                <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/>
                <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
            </when>
            <when value="no_restriction"/>
        </conditional>

        <conditional name="tabular_annotation">
            <param name="load_annotation" type="select" label="Pixels have annotations" help="Annotations can be used during peak detection or filteringfsplit">
                <option value="no_annotation" selected="True">pixels have no annotations</option>
                <option value="yes_annotation">pixel annotation from file</option>
            </param>
            <when value="yes_annotation">
                <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
                    help="Tabular file with three columns: x values, y values and pixel annotations"/>
                <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
                <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
                <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
                <param name="tabular_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
            </when>
            <when value="no_annotation"/>
        </conditional>
        <repeat name="methods" title="Method" min="1">
            <conditional name="methods_conditional">
                <param name="method" type="select" label="Select a method">
                    <option value="Peak_detection">Peak detection</option>
                    <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option>
                    <option value="Binning">Binning</option>
                    <option value="Filtering">Filtering</option>
                </param>
                <when value="Peak_detection">
                    <param name="peak_method" type="select" label="Noise estimation function">
                        <option value="MAD" selected="True">MAD</option>
                        <option value="SuperSmoother">SuperSmoother</option>
                    </param>
                    <param name="halfWindowSize" type="integer" value="20"
                        label="Half window size (number of data points)"
                        help="The resulting window reaches from 
                            mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
                            (window size is 2*halfWindowSize+1).
                            The best size differs depending on the selected smoothing method."/>
                    <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio" help=""/>
                    <param name="use_annotations" type="boolean" label="Detect peaks on average mass spectra" help="Spectra with same annotation are averaged before peak detection, no imzML output" truevalue="TRUE" falsevalue="FALSE"/>
                </when>
                <when value="monoisotopic_peaks">
                    <param name="minCor" type="float" value="0.95" label="Minimal correlation"
                        help="Minimal correlation between the peak pattern generated by the model and the experimental peaks in the MassPeaks object to be recognized as isotopic pattern"/>
                    <param name="tolerance" type="float" label="Tolerance" value="0.00005"
                        help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" />
                    <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/>
                    <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/>
                </when>
                <when value="Binning">
                    <param name="bin_tolerance" type="float" value="0.002" label="Binning tolerance"
                        help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/>
                    <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin.">
                    <option value="strict" selected="True" >strict</option>
                    <option value="relaxed" >relaxed</option>
                </param>
                </when>
                <when value="Filtering">
                    <param name="minFrequency" type="float" value="0.25"
                        label="Removal of all peaks which occur in less than minFrequency spectra" help="Relative threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be used."/>
                    <param name="minNumber" type="float" value="1.0"
                        label="Removal of all peaks which occur in less than minNumber spectra" help="Absolute threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be used."/>
                    <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations." help="If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/>
                    <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE"
                        label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/>
                </when>
            </conditional>
        </repeat>
        <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="True" truevalue="TRUE" falsevalue="FALSE"/>
    </inputs>
    <outputs>
        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}">
            <!--filter>methods_conditional['method'] == 'Peak_detection'</filter-->
        </data>
        <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/>
        <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/>
        <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
                <conditional name="tabular_annotation">
                    <param name="load_annotation" value="yes_annotation"/>
                    <param name="annotation_file" value="pixel_annotations.tabular"/>
                    <param name="column_x" value="1"/>
                    <param name="column_y" value="2"/>
                    <param name="column_names" value="3"/>
                    <param name="tabular_header" value="TRUE"/>
                </conditional>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Peak_detection"/>
                    <param name="peak_method" value="SuperSmoother"/>
                    <param name="halfWindowSize" value="1"/>
                    <param name="snr" value="5"/>
                    <param name="use_annotations" value="TRUE"/>
                </conditional>
            </repeat>
            <output name="plots" file="peakdetection1_QC.pdf" compare="sim_size"/>
            <output name="masspeaks" file="masspeaks1.tabular"/>
            <output name="intensity_matrix" file="int1.tabular"/>
        </test>
        <test>
            <param name="infile" value="masspeaks3_forinput.tabular"/>
            <param name="centroids" value="TRUE"/>
             <repeat name="methods">
                <conditional name="methods_conditional">
                <param name="method" value="monoisotopic_peaks"/>
                <param name="tolerance" value="0.0004"/>
            </conditional>
            </repeat>
            <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
            <output name="masspeaks" file="masspeaks2.tabular"/>
            <output name="intensity_matrix" file="int2.tabular"/>
        </test>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
                <conditional name="tabular_annotation">
                    <param name="load_annotation" value="yes_annotation"/>
                    <param name="annotation_file" value="pixel_annotations.tabular"/>
                    <param name="column_x" value="1"/>
                    <param name="column_y" value="2"/>
                    <param name="column_names" value="3"/>
                    <param name="tabular_header" value="TRUE"/>
                </conditional>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Peak_detection"/>
                    <param name="peak_method" value="MAD"/>
                    <param name="halfWindowSize" value="1"/>
                    <param name="snr" value="2"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Binning"/>
                    <param name="bin_tolerance" value="0.01"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Filtering"/>
                    <param name="minFrequency" value="0.5"/>
                    <param name="minNumber" value="3"/>
                    <param name="filter_annot_groups" value="TRUE"/>
                    <param name="mergeWhitelists" value="FALSE"/>
                </conditional>
            </repeat>
            <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/>
            <output name="intensity_matrix" file="intensity_matrix3.tabular"/>
            <output name="masspeaks" file="masspeaks3.tabular"/>
        </test>
        <test>
            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
            <param name="method" value="Peak_detection"/>
            <param name="peak_method" value="MAD"/>
            <param name="halfWindowSize" value="20"/>
            <param name="snr" value="2"/>
            <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/>
            <output name="intensity_matrix" file="intensity_matrix4.tabular"/>
            <output name="masspeaks" file="masspeaks4.tabular"/>
        </test>
    </tests>
    <help>
        <![CDATA[

@MADLI_QUANT_DESCRIPTION@

-----

**Input data**

- MSI data: 3 types of input data can be used:

    - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
    - Cardinal "MSImageSet" data saved as .RData
    - MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". The spectrum has to be in the following format: xy_1_1 (for pixel coordinates x1y1). The header must have exactly the four column names.

        ::    
        
            snr          mass      intensity   spectrum
            5.34	304.16     0.10         xy_1_1
            12.09	305        0.2          xy_1_1
            6.80	306.25     0.133        xy_1_1
            ...
            ...    


- Optional:  Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported. 

    ::

       x_coord     y_coord 
         1            1    
         2            1    
         3            1    
        ...
        ...


- Optional: Tabular file(s) containing pixel coordinates and annotation. X and y values in separate columns and the corresponding annotation in a third column. Tabular files with any header name or no header at all are supported. The annotations can be used to either detect peak on the average mass spectrum of each annotation group ('Detect peaks on average mass spectra') or to filter for peaks within pixel groups ('Group wise filtering'). This option is not compatible with tabular peak list inputs.

    ::

     x_coord     y_coord    annotation
        1            1        healthy
        2            1        healthy
        3            1        disease
       ...
       ...

**Options**

- Peak detection: detection of peaks, only possible with profile mode input. A peak is a local maximum above a user defined noise threshold.

    - Two functions exist to estimate the noise: MAD and supersmoother. 
    - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak.
    - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak.
    - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking.


- Monoisotopic peaks: detection of monoisotopic peaks




- Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow:

    1. Put all mass in a sorted vector.
    2. Calculate differences between each neighbor.
    3. Divide the mass vector at the largest gap (largest difference) and form a left and a right bin.
    4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria:

    - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance).
    - method == "strict": The bin doesn't contain two or more peaks of the same sample.


- Peak filtering: Removal of less frequent m/z features:

    - minFrequency : between 0 and 1: m/z has to occur in 0 - 100% of all spectra; minNumber: m/z has to occur in at least this amount of spectra --> out of those two criteria the stricter value will be used
    - Group wise filtering with pixel annotations: 'Yes' means that the filtering criteria are applied for each annotation group separately. 
    - mergeWhitelists: 'Yes' means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups
    - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5.


**Output**

- centroided imzML file (processed or continuous), imzML file is empty when 'Detect peaks on average mass spectra' is chosen.
- pdf with mass spectra plots after each preprocessing step and a table with key values after each preprocessing step
- peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum"
- tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z.

.. _MALDIquant: http://strimmerlab.org/software/maldiquant/

        ]]>
    </help>
    <expand macro="citation"/>
</tool>