Mercurial > repos > bgruening > deeptools_compute_matrix

<tool id="deeptools_compute_matrix" name="computeMatrix" version="@WRAPPER_VERSION@.0">
    <description>preparation step to plot a heatmap or a profile</description>
    <macros>
        <token name="@BINARY@">computeMatrix</token>
        <import>deepTools_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command>
<![CDATA[
        #import tempfile

        #for $rf in $regionsFiles:
            cat "$rf.regionsFile" >> ./temp_input_path;
            #if str($rf.label.value).strip():
                echo "\#$rf.label.value" >> ./temp_input_path;
            #else:
                echo "\#$rf.regionsFile.name" >> ./temp_input_path;
            #end if
        #end for

        @BINARY@

            $mode.mode_select
            --regionsFileName ./temp_input_path
            --scoreFileName '$scoreFile'
            --outFileName '$outFileName'

            @THREADS@

            #if $output.showOutputSettings == "yes"
                #if $output.saveData:
                    --outFileNameData '$outFileNameData'
                #end if
                #if $output.saveMatrix:
                --outFileNameMatrix '$outFileNameMatrix'
                #end if

                #if $output.saveSortedRegions:
                    --outFileSortedRegions '$outFileSortedRegions'
                #end if
            #end if

            #if $mode.mode_select == "reference-point":
                --referencePoint $mode.referencePoint
                $mode.nanAfterEnd
                --beforeRegionStartLength $mode.beforeRegionStartLength
                --afterRegionStartLength $mode.afterRegionStartLength
            #else
                --regionBodyLength $mode.regionBodyLength
                --startLabel "$mode.startLabel"
                --endLabel "$mode.endLabel"
                #if $mode.regionStartLength.regionStartLength_select == "yes":
                    --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength
                    --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength
                #end if
            #end if

            #if $advancedOpt.showAdvancedOpt == "yes":
                --sortRegions '$advancedOpt.sortRegions'
                --sortUsing '$advancedOpt.sortUsing'
                --averageTypeBins '$advancedOpt.averageTypeBins'
                $advancedOpt.skipNAs
                $advancedOpt.skipZeros
                --binSize $advancedOpt.binSize

                #if $advancedOpt.minThreshold is not None and str($advancedOpt.minThreshold) != '':
                    --minThreshold $advancedOpt.minThreshold
                #end if
                #if $advancedOpt.maxThreshold is not None and str($advancedOpt.maxThreshold) != '':
                    --maxThreshold $advancedOpt.maxThreshold
                #end if
                #if $advancedOpt.scale is not None and str($advancedOpt.scale) != '':
                    --scale $advancedOpt.scale
                #end if

            #end if
]]>
    </command>
    <inputs>

        <repeat name="regionsFiles" title="regions to plot" min="1">
            <param name="regionsFile" format="bed" type="data" label="Regions to plot"
                help="File, in BED format, containing the regions to plot."/>
            <param name="label" type="text" size="30" optional="true" value="" label="Label"
                help="Label to use in the output."/>
        </repeat>

        <param name="scoreFile" format="bigwig" type="data"
            label="Score file"
            help="Should be a bigWig file (containing a score, usually covering
            the whole genome). You can generate a bigWig file either from a
            bedGraph or WIG file using UCSC tools or from a BAM file using the
            deepTool bamCoverage. (-scoreFile)"/>

        <conditional name="mode" >
            <param name="mode_select" type="select"
                label="computeMatrix has two main output options"
                help="In the scale-regions mode, all regions in the BED file are
                stretched or shrunk to the same length (bp) that is indicated
                by the user. Reference-point refers to a position within the BED
                regions (e.g start of region). In the reference-point mode only
                those genomic positions before (downstream) and/or after (upstream)
                the reference point will be plotted.">
                <option value="scale-regions" selected="true">scale-regions</option>
                <option value="reference-point">reference-point</option>
            </param>

            <when value="scale-regions" >
                <param argument="--regionBodyLength" type="integer" value="500"
                    label="Distance in bp to which all regions are going to be fitted" help=""/>
                <param argument="--startLabel" type="text" value="TSS" size="10"
                    label="Label for the region start"
                    help ="Label shown in the plot for the start of the region.
                    Default is TSS (transcription start site), but could be changed to anything,
                    e.g. &quot;peak start&quot;." />
                <param argument="--endLabel" type="text" value="TES" size="10"
                    label="Label for the region end"
                    help="Label shown in the plot for the region end. Default is TES (transcription end site)."/>
                <conditional name="regionStartLength">
                    <param name="regionStartLength_select" type="select" label="Set distance up- and downstream of the given regions">
                        <option value="no" selected="true">no</option>
                        <option value="yes">yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <param argument="--beforeRegionStartLength" type="integer" value="1000" min="1"
                            label="Distance upstream of the start site of the regions defined in the region file"
                            help="If the regions are genes, this would be the
                            distance upstream of the transcription start site."/>
                        <param argument="--afterRegionStartLength" type="integer" value="1000" min="1"
                            label="Distance downstream of the end site of the given regions"
                            help="If the regions are genes, this would be the
                            distance downstream of the transcription end site."/>
                    </when>
                </conditional>
            </when>
            <when value="reference-point">
                <param name="referencePoint" type="select" label="The reference point for the plotting">
                    <option value="TSS" selected="true">beginning of region (e.g. TSS)</option>
                    <option	 value="TES">end of region (e.g. TES)</option>
                    <option value="center">center of region</option>
                </param>
                <param name="nanAfterEnd" type="boolean" truevalue="--nanAfterEnd" falsevalue=""
                    label="Discard any values after the region end"
                    help="This is useful to visualize the region end when not using the
                    scale-regions mode and when the reference-point is set to the TSS. (--nanAfterEnd)"/>
                <param name="beforeRegionStartLength" type="integer" value="1000" min="1"
                    label="Distance upstream of the start site of the regions defined in the region file"
                    help="If the regions are genes, this would be the distance upstream of the transcription start site. (--beforeRegionStartLength)"/>
                <param name="afterRegionStartLength" type="integer" value="1000" min="1"
                    label="Distance downstream of the end site of the given regions"
                    help="If the regions are genes, this would be the distance downstream of the transcription end site. (--afterRegionStartLength)"/>
            </when>
        </conditional>

        <expand macro="input_graphic_output_settings">
            <expand macro="input_save_matrix_values" />
        </expand>

        <conditional name="advancedOpt" >
            <param name="showAdvancedOpt" type="select" label="Show advanced options" >
                <option value="no" selected="true">no</option>
                <option value="yes">yes</option>
            </param>
            <when value="no" />
            <when value="yes">
                <param name="binSize" type="integer" value="50" min="1"
                    label="Length, in base pairs, of the non-overlapping bin for averaging the score over the regions length"
                    help="(--binSize)"/>

                <expand macro="sortRegions" />
                <expand macro="sortUsing" />

                <param name="averageTypeBins" type="select"
                    label="Define the type of statistic that should be displayed."
                    help="The value is computed for each bin. (--averageTypeBins)">
                    <option value="mean" selected="true">mean</option>
                    <option value="median">median</option>
                    <option value="min">min</option>
                    <option value="max">max</option>
                    <option value="sum">sum</option>
                    <option value="std">std</option>
                </param>

                <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" checked="False"
                    label="Convert missing values to 0?."
                    help="If set to 'yes', missing values (NAs) are converted to 0.
                          The default is to ignore such cases, which will be
                          depicted as black areas in the heatmap by default." />

                <expand macro="skipZeros" />
                <expand macro="skipNAs" />

                <param name="minThreshold" type="float" optional="True"
                    label="Minimum threshold"
                    help="Any region containing a value that is equal or less than this numeric
                    value will be skipped. This is useful to skip, for example, genes where the
                    read count is zero for any of the bins. This could be the result of
                    unmappable areas and can bias the overall results. (--minThreshold)"/>
                <param name="maxThreshold" type="float" optional="True"
                    label="Maximum threshold"
                    help="Any region containing a value that is equal or higher that this
                    numeric value will be skipped. The max threshold is useful to skip those
                    few regions with very high read counts (e.g. major satellites) that may
                    bias the average values. (--maxThreshold)"/>
                <param name="scale" type="float" optional="True" label="Scaling factor"
                    help="If set, all values are multiplied by this number. (--scale)"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="deeptools_compute_matrix_archive" name="outFileName" label="${tool.name} on ${on_string}: Matrix" />
        <expand macro="output_graphic_outputs" />
        <expand macro="output_save_matrix_values" />
    </outputs>
    <!--
    computeMatrix -S test.bw -R test2.bed -a 100 -b 100 -bs 1
    -->
    <tests>
        <test>
            <param name="regionsFile" value="computeMatrix1.bed" ftype="bed" />
            <param name="scoreFile" value="bamCoverage_result4.bw" ftype="bigwig" />
            <param name="showAdvancedOpt" value="yes" />
            <param name="mode_select" value="reference-point" />
            <param name="binSize" value="10" />
            <param name="sortUsing" value="sum" />
            <param name="averageTypeBins" value="sum" />
            <param name="skipNAs" value="False" />
            <param name="beforeRegionStartLength" value="10" />
            <param name="afterRegionStartLength" value="10" />
            <output name="outFileName" file="computeMatrix_result1.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
        </test>
        <test>
            <param name="regionsFile" value="computeMatrix2.bed" ftype="bed" />
            <param name="scoreFile" value="computeMatrix2.bw" ftype="bigwig" />
            <param name="showAdvancedOpt" value="yes" />
            <param name="mode_select" value="reference-point" />
            <param name="binSize" value="10" />
            <param name="beforeRegionStartLength" value="10" />
            <param name="afterRegionStartLength" value="10" />
            <output name="outFileName" file="computeMatrix_result2.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
        </test>
        <test>
            <param name="regionsFile" value="computeMatrix2.bed" ftype="bed" />
            <param name="scoreFile" value="computeMatrix2.bw" ftype="bigwig" />
            <param name="showAdvancedOpt" value="yes" />
            <param name="mode_select" value="scale-regions" />
            <param name="endLabel" value="END" />
            <param name="regionStartLength" value="yes" />
            <output name="outFileName" file="computeMatrix_result3.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
        </test>
    </tests>
  <help>
<![CDATA[
**What it does**

This tool prepares an intermediary file (a gzipped table of values)
that contains scores associated with genomic regions that can be used
afterwards to plot a heatmap or a profile.

Genomic regions can really be anything - genes, parts of genes, ChIP-seq
peaks, favorite genome regions... as long as you provide a proper file
in BED or INTERVAL format. If you would like to compare different groups of regions
(i.e. genes from chromosome 2 and 3), you can supply more than 1 BED file, one for each group.

computeMatrix can also be used to filter and sort
regions according to their score by making use of its advanced output options.


.. image:: $PATH_TO_IMAGES/flowChart_computeMatrixetc.png
   :alt: Relationship between computeMatrix, heatmapper and profiler


You can find more details on the computeMatrix wiki page: https://github.com/fidelram/deepTools/wiki/Visualizations#wiki-computeMatrix


-----

@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>
author	bgruening
date	Mon, 21 Dec 2015 07:25:28 -0500
parents	de51aa03c9b9
children	345a9a2281e0