view msi_filtering.xml @ 1:a170455feb59 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit 06c2b45d8644b1d7fc01622a5c59dcbf8886d0f1
author galaxyp
date Mon, 23 Apr 2018 17:16:25 -0400
parents 91e2a0c9695c
children bc1ff8d086e3
line wrap: on
line source

<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.7.0.1">
    <description>tool for filtering mass spectrometry imaging data</description>
    <requirements>
        <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
        <requirement type="package" version="2.2.1">r-gridextra</requirement>
    </requirements>
    <command detect_errors="exit_code">
    <![CDATA[

        #if $infile.ext == 'imzml'
            cp '${infile.extra_files_path}/imzml' infile.imzML &&
            cp '${infile.extra_files_path}/ibd' infile.ibd &&
        #elif $infile.ext == 'analyze75'
            cp '${infile.extra_files_path}/hdr' infile.hdr &&
            cp '${infile.extra_files_path}/img' infile.img &&
            cp '${infile.extra_files_path}/t2m' infile.t2m &&
        #else
            ln -s $infile infile.RData &&
        #end if
        cat '${MSI_subsetting}' &&
        echo ${MSI_subsetting} &&
        Rscript '${MSI_subsetting}'

    ]]>
    </command>
    <configfiles>
        <configfile name="MSI_subsetting"><![CDATA[


################################# load libraries and read file #########################


library(Cardinal)
library(gridExtra)

## Read MALDI Imaging dataset

#if $infile.ext == 'imzml'
    msidata = readMSIData('infile.imzML')
#elif $infile.ext == 'analyze75'
    msidata = readMSIData('infile.hdr')
#else
    load('infile.RData')
#end if

###################################### inputfile properties in numbers ######################

#if $outputs.outputs_select == "quality_control":
    ## Number of features (mz)
    maxfeatures = length(features(msidata))
    ## Range mz
    minmz = round(min(mz(msidata)), digits=2)
    maxmz = round(max(mz(msidata)), digits=2)
    ## Number of spectra (pixels)
    pixelcount = length(pixels(msidata))
    ## Range x coordinates
    minimumx = min(coord(msidata)[,1])
    maximumx = max(coord(msidata)[,1])
    ## Range y coordinates
    minimumy = min(coord(msidata)[,2])
    maximumy = max(coord(msidata)[,2])
    ## Number of intensities > 0
    npeaks= sum(spectra(msidata)[]>0)
    ## Spectra multiplied with mz (potential number of peaks)
    numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
    ## Percentage of intensities > 0
    percpeaks = round(npeaks/numpeaks*100, digits=2)
    ## Number of empty TICs
    TICs = colSums(spectra(msidata)[])
    NumemptyTIC = sum(TICs == 0)
    ## median TIC
    medint = round(median(TICs), digits=2)
    ## Store features for QC plot
    featuresinfile = mz(msidata)
#end if


###################################### filtering of pixels ######################

#if str($pixels_cond.pixel_filtering) == "single_column":
    print("single column")

    #if $pixels_cond.single_pixels:
        input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE)
        numberpixels = length(input_list[,$pixels_cond.pixel_column])
        valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata))
        validpixels = sum(valid_entries)

                if (validpixels != 0)
            {
                pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]]
                msidata = msidata[,pixelsofinterest]
            }else{
                validpixels=0
            }

    #else
        validpixels=0
        numberpixels = 0
    #end if 

#elif str($pixels_cond.pixel_filtering) == "two_columns":
    print("two columns")

    #if $pixels_cond.two_columns_pixel:

        input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, 
        stringsAsFactors = FALSE)
        numberpixels = length(input_list[,$pixels_cond.pixel_column_x])

        inputpixel_x = input_list[,$pixels_cond.pixel_column_x]
        inputpixel_y = input_list[,$pixels_cond.pixel_column_y]

        inputpixels = cbind(inputpixel_x, inputpixel_y)
        colnames(inputpixels) = c("x", "y")
        valid_rows = merge(inputpixels, coord(msidata)[,1:2])
        validpixels = nrow(valid_rows)

        if (validpixels != 0)
        {

            pixelvector = character()

            for (pixel in 1:nrow(valid_rows))
            {
            pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2])
            }

            pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
            msidata = msidata[,pixelsofinterest]
        }else{
            validpixels=0
        }

        
    #else
        validpixels=0
        numberpixels = 0
    #end if 

#elif str($pixels_cond.pixel_filtering) == "pixel_range":
    print("pixel range")

    numberpixels = "range"
    validpixels = "range"

    if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0)
    {
        msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range]
    }

    if (sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0)
    {
        msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range]
    }



#elif str($pixels_cond.pixel_filtering) == "none":
    print("no pixel filtering")
    numberpixels = 0
    validpixels = 0

#end if



###################################### filtering of features ######################

#if str($features_cond.features_filtering) == "features_list":

    print("feature list")

    input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE)

    startingrow = $features_cond.feature_header+1
    extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column]
    numberfeatures = length(extracted_features)


    if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE)

    {   print("no m/z = in data")

        if (class(extracted_features) == "numeric")
        {
            charactervector = rep("m/z = ", numberfeatures)
            mz_added = paste0(charactervector, round(extracted_features,digits=2))
            validfeatures = mz_added %in% names(features(msidata))
            featuresofinterest = features(msidata)[names(features(msidata)) %in% mz_added[validfeatures]]
        }else{
            validfeatures = 0
            featuresofinterest = features(msidata)
        }
    }else{   
        validfeatures = extracted_features %in% names(features(msidata))
        featuresofinterest = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]]
    }

    msidata = msidata[featuresofinterest,]



#elif str($features_cond.features_filtering) == "features_range":

    print("feature range")

    numberfeatures = "range"
    validfeatures = NA

    if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0)
    {
    msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,]
    }


#elif str($features_cond.features_filtering) == "none":

    print("no feature filtering")
    validfeatures = 0
    numberfeatures = 0
#end if



# save msidata as Rfile
save(msidata, file="$msidata_filtered")

###################################### outputfile properties in numbers ######################

#if $outputs.outputs_select == "quality_control":

## Number of features (mz)
maxfeatures2 = length(features(msidata))
## Range mz
minmz2 = round(min(mz(msidata)), digits=2)
maxmz2 = round(max(mz(msidata)), digits=2)
## Number of spectra (pixels)
pixelcount2 = length(pixels(msidata))
## Range x coordinates
minimumx2 = min(coord(msidata)[,1])
maximumx2 = max(coord(msidata)[,1])
## Range y coordinates
minimumy2 = min(coord(msidata)[,2])
maximumy2 = max(coord(msidata)[,2])
## Number of intensities > 0
npeaks2= sum(spectra(msidata)[]>0)
## Spectra multiplied with mz (potential number of peaks)
numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
## Percentage of intensities > 0
percpeaks2 = round(npeaks2/numpeaks2*100, digits=2)
## Number of empty TICs
TICs2 = colSums(spectra(msidata)[]) 
NumemptyTIC2 = sum(TICs2 == 0)
## median TIC
medint2 = round(median(TICs2), digits=2)


properties = c("Number of mz features",
               "Range of mz values [Da]",
               "Number of pixels", 
               "Range of x coordinates", 
               "Range of y coordinates",
               "Intensities > 0",
               "Median TIC per pixel",
               "Number of zero TICs", 
               "pixel overview", 
               "feature overview")

before = c(paste0(maxfeatures), 
           paste0(minmz, " - ", maxmz), 
           paste0(pixelcount), 
           paste0(minimumx, " - ", maximumx),  
           paste0(minimumy, " - ", maximumy), 
           paste0(percpeaks, " %"), 
           paste0(medint),
           paste0(NumemptyTIC), 
           paste0("input pixels: ", numberpixels),
           paste0("input mz: ", numberfeatures))

filtered = c(paste0(maxfeatures2), 
           paste0(minmz2, " - ", maxmz2), 
           paste0(pixelcount2), 
           paste0(minimumx2, " - ", maximumx2),  
           paste0(minimumy2, " - ", maximumy2), 
           paste0(percpeaks2, " %"), 
           paste0(medint2),
           paste0(NumemptyTIC2), 
           paste0("valid pixels: ", validpixels),
           paste0("valid mz: ", sum(validfeatures)))


property_df = data.frame(properties, before, filtered)



######################################## PDF QC #############################################

    pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
    plot(0,type='n',axes=FALSE,ann=FALSE)

    title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))


    
    grid.table(property_df, rows= NULL)

    ### heatmap image as visual pixel control

if (length(features(msidata))> 0 & length(pixels(msidata)) > 0)
{


    image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", 
          main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2))

    ### control features which are left

    plot(featuresinfile, rep(1,length(featuresinfile)), yaxt="n", ylab=NA, xlab="m/z values", col="red", ylim=c(0.8, 1.1), main="Distribution of m/z values")
    lines(mz(msidata),rep(0.9, length(mz(msidata))), col="green", type="p")
    legend("top", horiz=TRUE,
           legend = c("before", "filtered"), 
           fill = c("red", "green"))




}else{
    print("file has no features or pixels left")
}

    dev.off()

#end if

######################################## intensity matrix ##################################

#if $output_matrix:

if (length(features(msidata))> 0 & length(pixels(msidata)) > 0)
{

    spectramatrix = spectra(msidata)
    rownames(spectramatrix) = mz(msidata)
    newmatrix = rbind(pixels(msidata), spectramatrix)
    write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")

}else{
    print("file has no features or pixels left")
}

#end if


    ]]></configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="imzml, rdata, analyze75"
               label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
                help="Upload composite datatype imzML (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>

        <conditional name="pixels_cond">
            <param name="pixel_filtering" type="select" label="Select pixel filtering option">
                <option value="none" selected="True">none</option>
                <option value="single_column">tabular file with single column (x = 1, y = 1)</option>
                <option value="two_columns">tabular file with separate columns for x and y values</option>
                <option value="pixel_range">ranges for x and y</option>
            </param>
            <when value="none"/>
            <when value="single_column">
                <param name="single_pixels" type="data" format="tabular" label="Pixels in single column for filtering of MSI data"
                    help="tabular file with pixels of interest in the form x = 1, y = 1"/>
                <param name="pixel_column" data_ref="single_pixels" label="Column with pixels" type="data_column"/>
            </when> 
            <when value="two_columns">
                <param name="two_columns_pixel" type="data" format="tabular" label="Pixels in two columns for filtering of MSI data"
                    help="tabular file with pixels of interest in two separate columns"/>
                <param name="pixel_column_x" data_ref="two_columns_pixel" label="Column with x values" type="data_column"/>
                <param name="pixel_column_y" data_ref="two_columns_pixel" label="Column with y values" type="data_column"/>
            </when> 
            <when value="pixel_range">
                <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/>
                <param name="max_x_range" type="integer" value="100" label="Maximum value for x"/>
                <param name="min_y_range" type="integer" value="0" label="Minimum value for y"/>
                <param name="max_y_range" type="integer" value="100" label="Maximum value for y"/>
            </when> 
        </conditional>

        <conditional name="features_cond">
            <param name="features_filtering" type="select" label="Select feature filtering option">
                <option value="none" selected="True">none</option>
                <option value="features_list">tabular file with features (data type: 800.12 or m/z = 800.12)</option>
                <option value="features_range">range of features</option>
            </param>

            <when value="none"/>
            <when value="features_list">
                <param name="inputfeatures" type="data" format="tabular" label="Features for filtering of MSI data" help="tabular file with masses of interest either as numbers (800.05) or in the form m/z = 800.05"/>
                <param name="feature_column" data_ref="inputfeatures" label="Column with features" type="data_column"/>
                <param name="feature_header" label="Number of header lines to skip" value="0" type="integer"/>
            </when> 
            <when value="features_range">
                <param name="min_mz" type="integer" value="1" label="Minimum value for mz (in Dalton)"/>
                <param name="max_mz" type="integer" value="100" label="Maximum value for mz (in Dalton)"/>
            </when> 
        </conditional>

        <conditional name="outputs">
           <param name="outputs_select" type="select" label="Quality control output">
               <option value="quality_control" selected="True">yes</option>
               <option value="no_quality_control">no</option>
           </param>
           <when value="quality_control">
              <param name="inputmz" type="float" value="1296.7" label="Mass for which a heatmap image will be drawn" help="Use a mass which is still present in all pixels to control if the pixel filtering went well"/>
              <param name="plusminus_dalton" value="0.25" type="float" label="mass range for mz value" help="plusminus mass window in Dalton"/>
           </when>
           <when value="no_quality_control"/>
         </conditional>
         <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
    </inputs>
    <outputs>
        <data format="rdata" name="msidata_filtered" label="${tool.name} ${on_string}"/>
        <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "QC ${tool.name} ${on_string}">
            <filter>outputs["outputs_select"] == "quality_control"</filter>
        </data>
        <data format="tabular" name="matrixasoutput" label="Matrix ${tool.name} on ${on_string}">
            <filter>output_matrix</filter>
        </data>
    </outputs>

    <tests>
        <test expect_num_outputs="2">
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <param name="pixel_filtering" value="single_column"/>
            <param name="single_pixels" ftype="tabular" value = "inputpixels.tabular"/>
            <param name="pixel_column" value="1"/>
            <param name="features_filtering" value="features_list"/>
            <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/>
            <param name="feature_column" value="2"/>
            <param name="feature_header" value="1"/>
             <param name="outputs_select" value="quality_control"/>
                <param name="inputmz" value="328.9"/>
                <param name="plusminus_dalton" value="0.25"/>
            <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/>
            <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size" />
        </test>
        <test expect_num_outputs="2">
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <param name="pixel_filtering" value="pixel_range"/>
            <param name="min_x_range" value="0"/>
            <param name="max_x_range" value="10"/>
            <param name="min_y_range" value="2"/>
            <param name="max_y_range" value="2"/>
             <param name="outputs_select" value="quality_control"/>
                <param name="inputmz" value="328.9"/>
                <param name="plusminus_dalton" value="0.25"/>
            <output name="filtering_qc" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/>
            <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size" />
        </test>
        <test expect_num_outputs="3">
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <param name="pixel_filtering" value="pixel_range"/>
            <param name="min_x_range" value="0"/>
            <param name="max_x_range" value="10"/>
            <param name="min_y_range" value="2"/>
            <param name="max_y_range" value="2"/>
            <param name="features_filtering" value="features_range"/>
            <param name="min_mz" value="200" />
            <param name="max_mz" value="500"/>
             <param name="outputs_select" value="quality_control"/>
                <param name="inputmz" value="328.9"/>
                <param name="plusminus_dalton" value="0.25"/>
            <param name="output_matrix" value="True"/>
            <output name="filtering_qc" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/>
            <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size" />
            <output name="matrixasoutput" file="imzml_matrix3.tabular"/>
        </test>
        <test expect_num_outputs="2">
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <param name="pixel_filtering" value="two_columns"/>
            <param name="two_columns_pixel" ftype="tabular" value = "inputpixels_2column.tabular"/>
            <param name="pixel_column_x" value="1"/>
            <param name="pixel_column_y" value="3"/>
            <param name="features_filtering" value="features_list"/>
            <param name="inputfeatures" ftype="tabular" value = "inputcalibrantfile2.txt"/>
            <param name="feature_column" value="1"/>
            <param name="feature_header" value="0"/>
             <param name="outputs_select" value="quality_control"/>
                <param name="inputmz" value="328.9"/>
                <param name="plusminus_dalton" value="0.25"/>
            <output name="filtering_qc" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/>
            <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size" />
        </test>
        <test expect_num_outputs="2">
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <param name="pixel_filtering" value="pixel_range"/>
            <param name="min_x_range" value="0"/>
            <param name="max_x_range" value="10"/>
            <param name="min_y_range" value="2"/>
            <param name="max_y_range" value="20"/>
            <param name="features_filtering" value="features_range"/>
            <param name="min_mz" value="1" />
            <param name="max_mz" value="150"/>
             <param name="outputs_select" value="quality_control"/>
                <param name="inputmz" value="328.9"/>
                <param name="plusminus_dalton" value="0.25"/>
            <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/>
            <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" />
        </test>
        <test expect_num_outputs="3">
           <param name="infile" value="" ftype="analyze75">
                <composite_data value="Analyze75.hdr"/>
                <composite_data value="Analyze75.img"/>
                <composite_data value="Analyze75.t2m"/>
            </param>
            <param name="pixel_filtering" value="single_column"/>
            <param name="single_pixels" ftype="tabular" value = "inputpixels2.tabular"/>
            <param name="pixel_column" value="1"/>
            <param name="features_filtering" value="features_list"/>
            <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/>
            <param name="feature_column" value="1"/>
            <conditional name="outputs">
                <param name="outputs_select" value="quality_control"/>
                    <param name="inputmz" value="702"/>
                    <param name="plusminus_dalton" value="0.25"/>
            </conditional>
            <param name="output_matrix" value="True"/>
            <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/>
            <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" />
            <output name="matrixasoutput" file="analyze_matrix.tabular"/>
        </test>
        <test expect_num_outputs="2">
           <param name="infile" value="" ftype="analyze75">
                <composite_data value="Analyze75.hdr"/>
                <composite_data value="Analyze75.img"/>
                <composite_data value="Analyze75.t2m"/>
            </param>
            <conditional name="outputs">
                <param name="outputs_select" value="quality_control"/>
                    <param name="inputmz" value="702"/>
                    <param name="plusminus_dalton" value="0.25"/>
            </conditional>
            <output name="filtering_qc" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/>
            <output name="msidata_filtered" file="analyze_originaloutput2.RData" compare="sim_size" />
        </test>
        <test expect_num_outputs="2">
            <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/>
            <conditional name="outputs">
                <param name="outputs_select" value="no_quality_control"/>
            </conditional>
            <param name="output_matrix" value="True"/>
            <output name="matrixasoutput" file="rdata_matrix.tabular"/>
            <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" />
        </test>
    </tests>
    <help>
        <![CDATA[

This tool can filter three types of mass-spectrometry imaging files (see below) for pixels and features of interest. This can be used to keep only pixels in a regions of interest.
For filtering at least one valid pixel/feature is needed otherwise no filtering will be performed. It is recommended to use the filtering tool only for feature masses which have been extracted from the same dataset. If you have feature masses from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature masses from dataset B to filter dataset B. 

Input data: 3 types of input data can be used:

- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)

The output of this tool is a subsetted Cardinal "MSImageSet" with the variable name "msidata" saved as .RData. 
        ]]>
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btv146</citation>
    </citations>
</tool>