Mercurial > repos > bgruening > deeptools_plot_correlation
changeset 27:cf1409a29f50 draft
planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit 09975f870c75347fba5c6777c9f3b442bdeeb289
line wrap: on
line diff
--- a/deepTools_macros.xml Tue Jan 24 04:05:06 2017 -0500 +++ b/deepTools_macros.xml Fri Mar 31 08:16:42 2017 -0400 @@ -1,5 +1,17 @@ <macros> + <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token> + <token name="@WRAPPER_VERSION@">2.5.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="@WRAPPER_VERSION@">deeptools</requirement> + <yield /> + </requirements> + <expand macro="stdio" /> + <version_command>@BINARY@ --version</version_command> + </xml> + <xml name="advancedOpt_scaffold"> <conditional name="advancedOpt"> <param name="showAdvancedOpt" type="select" label="Show advanced options" > @@ -97,18 +109,6 @@ </param> </xml> - <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token> - <token name="@WRAPPER_VERSION@">2.4.2</token> - <xml name="requirements"> - <requirements> - <requirement type="package" version="2.7.10">python</requirement> - <requirement type="package" version="2.4.2">deeptools</requirement> - <yield /> - </requirements> - <expand macro="stdio" /> - <version_command>@BINARY@ --version</version_command> - </xml> - <xml name="smoothLength"> <param argument="--smoothLength" type="integer" value="" optional="True" min="1" label="Smooth values using the following length (in bases)" @@ -181,10 +181,10 @@ <xml name="fragLength"> <param argument="--minFragmentLength" type="integer" optional="True" value="0" min="0" label="Minimum fragment length for inclusion." - help="A value greater than 0 will filter out ALL single-end reads. This is primarily useful in things like ATACseq, where one would like to look specifically at mono- or di-nucleosome fragments." /> + help="This is primarily useful in things like ATACseq, where one would like to look specifically at mono- or di-nucleosome fragments." /> <param argument="--maxFragmentLength" type="integer" optional="True" value="0" min="0" label="Maximum fragment length for inclusion." - help="As above, but the maximum length. A value of 0 (the default) is equivalent to no maximum." /> + help="A value of 0 (the default) is equivalent to no maximum." /> </xml> <xml name="read_processing_options"> @@ -324,9 +324,7 @@ <xml name="scaleFactor"> <param argument="--scaleFactor" type="float" value="1" label="Scaling factor" - help="When used in combination with --normalizeTo1x or - --normalizeUsingRPKM, the computed scaling factor will - be multiplied by the given scale factor." /> + help="The computed scaling factor will be multiplied by this (default 1)." /> </xml> <xml name="scaleFactors"> @@ -441,19 +439,22 @@ <![CDATA[ #set files=[] #set labels=[] + #import re #if $multibam_conditional.orderMatters == "No": #for $counter, $bamfile in enumerate($multibam_conditional.bamfiles): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($bamfile.element_identifier)) ln -s "${bamfile}" "./${counter}.bam" && ln -s "${bamfile.metadata.bam_index}" "./${counter}.bam.bai" && #silent $files.append('%s.bam' % $counter) - #silent $labels.append("'%s'" % ($bamfile.display_name)) + #silent $labels.append("'%s'" % identifier) #end for #else: #for $counter, $f in enumerate($multibam_conditional.multibam_repeats): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($f.bamfiles.element_identifier)) ln -s "${f.bamfiles}" "./${counter}.bam" && ln -s "${f.bamfiles.metadata.bam_index}" "./${counter}.bam.bai" && #silent $files.append('%s.bam' % $counter) - #silent $labels.append("'%s'" % ($f.bamfiles.display_name)) + #silent $labels.append("'%s'" % $identifier) #end for #end if ]]> @@ -463,17 +464,20 @@ <![CDATA[ #set files=[] #set labels=[] + #import re #if $multibigwig_conditional.orderMatters == "No": #for $counter, $bigwig in enumerate($multibigwig_conditional.bigwigfiles): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($bigwig.element_identifier)) ln -s "${bigwig}" "${counter}.bw" && #silent $files.append('%s.bw' % $counter) - #silent $labels.append("'%s'" % ($bigwig.display_name)) + #silent $labels.append("'%s'" % $identifier) #end for #else: #for $counter, $f in enumerate($multibigwig_conditional.multibigwig_repeats): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($f.bigwigfiles.element_identifier)) ln -s "${f.bigwigfiles}" "${counter}.bw" && #silent $files.append('%s.bw' % $counter) - #silent $labels.append("'%s'" % ($f.bigwigfiles.display_name)) + #silent $labels.append("'%s'" % $identifier) #end for #end if ]]>
--- a/plotCorrelation.xml Tue Jan 24 04:05:06 2017 -0500 +++ b/plotCorrelation.xml Fri Mar 31 08:16:42 2017 -0400 @@ -1,142 +1,142 @@ -<tool id="deeptools_plot_correlation" name="plotCorrelation" version="@WRAPPER_VERSION@.0"> - <description>Create a heatmap or scatterplot of correlation scores between different samples </description> - <macros> - <token name="@BINARY@">plotCorrelation</token> - <import>deepTools_macros.xml</import> - </macros> - <expand macro="requirements"/> - <command> -<![CDATA[ - @BINARY@ - --corData "$corData" - --plotFile "$outFileName" - --corMethod "$corMethod" - --whatToPlot "$plotting_type.whatToPlot" - #if str($plotting_type.whatToPlot) == 'heatmap': - @HEATMAP_OPTIONS@ - #else: - --plotTitle '$plotting_type.plotTitle' - #end if - $skipZeros - --plotFileFormat "$outFileFormat" - $removeOutliers - #if $outFileCorMatrix: - --outFileCorMatrix "$matrix" - #end if - -]]> - </command> - <inputs> - <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary tool"/> - <expand macro="corMethod" /> - - <conditional name="plotting_type" > - <param argument="--whatToPlot" type="select" label="Plotting type"> - <option value="heatmap" selected="True">Heatmap</option> - <option value="scatterplot">Scatterplot</option> - </param> - <when value="heatmap"> - <expand macro="heatmap_options" /> - </when> - <when value="scatterplot"> - <expand macro="plotTitle" /> - </when> - </conditional> - - <expand macro="skipZeros" /> - - <expand macro="input_image_file_format" /> - - <param argument="--removeOutliers" type="boolean" - truevalue="--removeOutliers" falsevalue="" label="Remove regions with very large counts" - help="If set, bins with very large counts are removed. Bins - with abnormally high reads counts artificially - increase pearson correlation; that's why, by default, - plotCorrelation tries to remove outliers using the median - absolute deviation (MAD) method applying a threshold - of 200 to only consider extremely large deviations - from the median. ENCODE blacklist page (https://sites. - google.com/site/anshulkundaje/projects/blacklists) - contains useful information about regions with - unusually high counts."/> - - <param name="outFileCorMatrix" type="boolean" label="Save the matrix of values underlying the heatmap"/> - - </inputs> - <outputs> - <expand macro="output_image_file_format_not_nested" /> - <data format="tabular" name="matrix" label="${tool.name} on ${on_string}: Correlation matrix"> - <filter>outFileCorMatrix is True</filter> - </data> - </outputs> - <tests> - <test> - <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" /> - <param name="outFileFormat" value="png" /> - <param name="outFileCorMatrix" value="True" /> - <output name="matrix" file="plotCorrelation_result1.tabular" ftype="tabular" /> - <output name="outFileName" file="plotCorrelation_result1.png" ftype="png" compare="sim_size" delta="300" /> - </test> - <test> - <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" /> - <param name="outFileFormat" value="png" /> - <param name="whatToPlot" value="scatterplot" /> - <param name="removeOutliers" value="True" /> - <param name="plotTitle" value="Test Plot" /> - <output name="outFileName" file="plotCorrelation_result2.png" ftype="png" compare="sim_size" delta="300" /> - </test> - </tests> - <help> -<![CDATA[ -What it does ------------- - -This tools takes the default output of ``multiBamSummary`` or ``multiBigwigSummary``, and computes the pairwise correlation among samples. -Results can be visualized as **scatterplots** or as a **heatmap** of correlation coefficients (see below for examples). - -Theoretical Background ----------------------- - -The result of the correlation computation is a **table of correlation coefficients** that indicates how "strong" the relationship between two samples is and it will consist of numbers between -1 and 1. (-1 indicates perfect anti-correlation, 1 perfect correlation.) - -We offer two different functions for the correlation computation: *Pearson* or *Spearman*. - -The *Pearson method* measures the **metric differences** between samples and is therefore influenced by outliers. -The *Spearman method* is based on **rankings**. - -Output ------- - -The default output is a **diagnostic plot** -- either a scatterplot or a clustered heatmap displaying the values for each pair-wise correlation (see below for example plots). - -Optionally, you can also obtain a table of the pairwise correlation coefficients. - -.. image:: $PATH_TO_IMAGES/plotCorrelation_output.png - :width: 600 - :height: 271 - -Example plots -------------- - -The following is the output of ``plotCorrelation`` with our test ChIP-Seq datasets (to be found under "Shared Data" --> "Data Library"). - -Average coverages were computed over 10 kb bins for chromosome X, -from bigWig files using ``multiBigwigSummary``. This was then used with ``plotCorrelation`` to make a heatmap of Spearman correlation coefficients. - -.. image:: $PATH_TO_IMAGES/plotCorrelation_galaxy_bw_heatmap_output.png - :width: 600 - :height: 518 - -The scatterplot could look like this: - -.. image:: $PATH_TO_IMAGES/plotCorrelation_scatterplot_PearsonCorr_bigwigScores.png - :width: 600 - :height: 600 - ------ - -@REFERENCES@ -]]> - </help> - <expand macro="citations" /> -</tool> +<tool id="deeptools_plot_correlation" name="plotCorrelation" version="@WRAPPER_VERSION@.0"> + <description>Create a heatmap or scatterplot of correlation scores between different samples </description> + <macros> + <token name="@BINARY@">plotCorrelation</token> + <import>deepTools_macros.xml</import> + </macros> + <expand macro="requirements"/> + <command> +<![CDATA[ + @BINARY@ + --corData "$corData" + --plotFile "$outFileName" + --corMethod "$corMethod" + --whatToPlot "$plotting_type.whatToPlot" + #if str($plotting_type.whatToPlot) == 'heatmap': + @HEATMAP_OPTIONS@ + #else: + --plotTitle '$plotting_type.plotTitle' + #end if + $skipZeros + --plotFileFormat "$outFileFormat" + $removeOutliers + #if $outFileCorMatrix: + --outFileCorMatrix "$matrix" + #end if + +]]> + </command> + <inputs> + <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary tool"/> + <expand macro="corMethod" /> + + <conditional name="plotting_type" > + <param argument="--whatToPlot" type="select" label="Plotting type"> + <option value="heatmap" selected="True">Heatmap</option> + <option value="scatterplot">Scatterplot</option> + </param> + <when value="heatmap"> + <expand macro="heatmap_options" /> + </when> + <when value="scatterplot"> + <expand macro="plotTitle" /> + </when> + </conditional> + + <expand macro="skipZeros" /> + + <expand macro="input_image_file_format" /> + + <param argument="--removeOutliers" type="boolean" + truevalue="--removeOutliers" falsevalue="" label="Remove regions with very large counts" + help="If set, bins with very large counts are removed. Bins + with abnormally high reads counts artificially + increase pearson correlation; that's why, by default, + plotCorrelation tries to remove outliers using the median + absolute deviation (MAD) method applying a threshold + of 200 to only consider extremely large deviations + from the median. ENCODE blacklist page (https://sites. + google.com/site/anshulkundaje/projects/blacklists) + contains useful information about regions with + unusually high counts."/> + + <param name="outFileCorMatrix" type="boolean" label="Save the matrix of values underlying the heatmap"/> + + </inputs> + <outputs> + <expand macro="output_image_file_format_not_nested" /> + <data format="tabular" name="matrix" label="${tool.name} on ${on_string}: Correlation matrix"> + <filter>outFileCorMatrix is True</filter> + </data> + </outputs> + <tests> + <test> + <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" /> + <param name="outFileFormat" value="png" /> + <param name="outFileCorMatrix" value="True" /> + <output name="matrix" file="plotCorrelation_result1.tabular" ftype="tabular" /> + <output name="outFileName" file="plotCorrelation_result1.png" ftype="png" compare="sim_size" delta="300" /> + </test> + <test> + <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" /> + <param name="outFileFormat" value="png" /> + <param name="whatToPlot" value="scatterplot" /> + <param name="removeOutliers" value="True" /> + <param name="plotTitle" value="Test Plot" /> + <output name="outFileName" file="plotCorrelation_result2.png" ftype="png" compare="sim_size" delta="300" /> + </test> + </tests> + <help> +<![CDATA[ +What it does +------------ + +This tools takes the default output of ``multiBamSummary`` or ``multiBigwigSummary``, and computes the pairwise correlation among samples. +Results can be visualized as **scatterplots** or as a **heatmap** of correlation coefficients (see below for examples). + +Theoretical Background +---------------------- + +The result of the correlation computation is a **table of correlation coefficients** that indicates how "strong" the relationship between two samples is and it will consist of numbers between -1 and 1. (-1 indicates perfect anti-correlation, 1 perfect correlation.) + +We offer two different functions for the correlation computation: *Pearson* or *Spearman*. + +The *Pearson method* measures the **metric differences** between samples and is therefore influenced by outliers. +The *Spearman method* is based on **rankings**. + +Output +------ + +The default output is a **diagnostic plot** -- either a scatterplot or a clustered heatmap displaying the values for each pair-wise correlation (see below for example plots). + +Optionally, you can also obtain a table of the pairwise correlation coefficients. + +.. image:: $PATH_TO_IMAGES/plotCorrelation_output.png + :width: 600 + :height: 271 + +Example plots +------------- + +The following is the output of ``plotCorrelation`` with our test ChIP-Seq datasets (to be found under "Shared Data" --> "Data Library"). + +Average coverages were computed over 10 kb bins for chromosome X, +from bigWig files using ``multiBigwigSummary``. This was then used with ``plotCorrelation`` to make a heatmap of Spearman correlation coefficients. + +.. image:: $PATH_TO_IMAGES/plotCorrelation_galaxy_bw_heatmap_output.png + :width: 600 + :height: 518 + +The scatterplot could look like this: + +.. image:: $PATH_TO_IMAGES/plotCorrelation_scatterplot_PearsonCorr_bigwigScores.png + :width: 600 + :height: 600 + +----- + +@REFERENCES@ +]]> + </help> + <expand macro="citations" /> +</tool>
--- a/test-data/computeMatrixOperations.txt Tue Jan 24 04:05:06 2017 -0500 +++ b/test-data/computeMatrixOperations.txt Fri Mar 31 08:16:42 2017 -0400 @@ -1,4 +1,4 @@ Groups: genes Samples: - file_0 + bamCoverage_result4_bw_0
--- a/test-data/plotCorrelation_result1.tabular Tue Jan 24 04:05:06 2017 -0500 +++ b/test-data/plotCorrelation_result1.tabular Fri Mar 31 08:16:42 2017 -0400 @@ -1,3 +1,3 @@ - 'bowtie2-test1.bam' 'bowtie2-test1.bam' -'bowtie2-test1.bam' 1.0000 1.0000 -'bowtie2-test1.bam' 1.0000 1.0000 + 'bowtie2 test1.bam' 'bowtie2 test1.bam' +'bowtie2 test1.bam' 1.0000 1.0000 +'bowtie2 test1.bam' 1.0000 1.0000
--- a/test-data/plotFingerprint_quality_metrics.tabular Tue Jan 24 04:05:06 2017 -0500 +++ b/test-data/plotFingerprint_quality_metrics.tabular Fri Mar 31 08:16:42 2017 -0400 @@ -1,3 +1,3 @@ Sample AUC Synthetic AUC X-intercept Synthetic X-intercept Elbow Point Synthetic Elbow Point JS Distance Synthetic JS Distance % genome enriched diff. enrichment CHANCE divergence -bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269861238192 NA NA NA -bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269861238192 NA NA NA +bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269004498068 NA NA NA +bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269004498068 NA NA NA