# HG changeset patch # User bgruening # Date 1376479098 14400 # Node ID c5847db0cb414f8f0b0de8123d5875fe46e7dc92 # Parent 1f312af2f8db08a2466bd9f9f57f01212c667d76 Uploaded diff -r 1f312af2f8db -r c5847db0cb41 bamCompare.xml --- a/bamCompare.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/bamCompare.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,9 +1,10 @@ - + normalizes and compares two BAM files to obtain the ratio, log2ratio or difference. - deepTools + deepTools numpy ucsc_tools + deepTools bamCompare @@ -34,11 +35,16 @@ --ratio $comparison.type #if $comparison.type=='subtract': - #if $comparison.normalization.type=='rpkm': - --normalizeUsingRPKM - #elif $comparison.normalization.type=='1x': - --normalizeTo1x $comparison.normalization.normalizeTo1x - #end if + #if $comparison.normalization.type=='rpkm': + --normalizeUsingRPKM + #elif $comparison.normalization.type=='1x': + --normalizeTo1x $comparison.normalization.normalizeTo1x + #end if + + #if str($comparison.ignoreForNormalization).strip() != '': + --ignoreForNormalization $comparison.ignoreForNormalization + #end if + #end if #if $advancedOpt.showAdvancedOpt == "yes": @@ -65,10 +71,10 @@ + help="The BAM file must be sorted."/> + help="The BAM file must be sorted."/> + @@ -158,7 +167,7 @@ + help= "If set, only reads that have a mapping quality score higher than the given value are considered. *Note* Bowtie's Mapping quality is related to uniqueness: the higher the score, the more unique is a read. A mapping quality defined by Bowtie of 10 or less indicates that there is at least a 1 in 10 chance that the read truly originated elsewhere."/> + correlates pairs of BAM files - deepTools + deepTools + deepTools #import tempfile @@ -46,10 +47,7 @@ #end if #if $advancedOpt.showAdvancedOpt == "yes": - #if $advancedOpt.smoothLength: - --smoothLength '$advancedOpt.smoothLength' - #end if - + #if str($advancedOpt.region.value) != '': --region '$advancedOpt.region' #end if @@ -73,7 +71,7 @@ + help="The BAM file must be sorted."/> @@ -81,7 +79,7 @@ + help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> @@ -95,9 +93,7 @@ - + + help= "If set, only reads that have a mapping quality score higher than the given value are considered. *Note* Bowtie's Mapping quality is related to uniqueness: the higher the score, the more unique is a read. A mapping quality defined by Bowtie of 10 or less indicates that there is at least a 1 in 10 chance that the read truly originated elsewhere."/> + help ="If set, then regions with zero counts for *all* BAM files given are included. The default behavior is to ignore those cases." /> @@ -159,7 +155,7 @@ This tool is useful to assess the overall similarity of different BAM files. A typical application is to check the correlation between replicates or published data sets. -The tool splits the genomes are into bins of given length. For each bin, the number of reads +The tool splits the genomes into bins of given length. For each bin, the number of reads found in each BAM file is counted and a correlation is computed for all pairs of BAM files. @@ -167,14 +163,14 @@ .. class:: infomark -If you would like to give us feedback or you run into any trouble, please sent an email to deeptools@googlegroups.com +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + diff -r 1f312af2f8db -r c5847db0cb41 bamCoverage.xml --- a/bamCoverage.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/bamCoverage.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,9 +1,10 @@ - - Given a BAM file, generates a coverage bigwig file. Multiple options available to count reads and normalize coverage. + + generates a coverage bigWig file from a given BAM file. Multiple options are available to count reads and normalize coverage. - deepTools + deepTools ucsc_tools numpy + deepTools bamCoverage @@ -26,7 +27,11 @@ #elif $scaling.type=='own': --scaleFactor $scaling.scaleFactor #end if - + + ##if str($ignoreForNormalization).strip() != '': + ## --ignoreForNormalization $ignoreForNormalization + ##end if + #if $advancedOpt.showAdvancedOpt == "yes": #if $advancedOpt.smoothLength: --smoothLength '$advancedOpt.smoothLength' @@ -47,7 +52,7 @@ + help="The BAM file must be sorted."/> + + @@ -107,7 +119,7 @@ + help= "If set, only reads that have a mapping quality score higher than the given value are considered. *Note* Bowtie's Mapping quality is related to uniqueness: the higher the score, the more unique is a read. A mapping quality defined by Bowtie of 10 or less indicates that there is at least a 1 in 10 chance that the read truly originated elsewhere."/> @@ -129,14 +141,13 @@ .. class:: infomark -If you would like to give us feedback or you run into any trouble, please sent an email to deeptools@googlegroups.com +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de diff -r 1f312af2f8db -r c5847db0cb41 bamFingerprint.xml --- a/bamFingerprint.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/bamFingerprint.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,7 +1,8 @@ - - plots profiles of bam files + + plots profiles of BAM files; useful for assesing ChIP signal strength - deepTools + deepTools + deepTools #import tempfile @@ -45,10 +46,7 @@ #end if #if $advancedOpt.showAdvancedOpt == "yes": - #if $advancedOpt.smoothLength: - --smoothLength '$advancedOpt.smoothLength' - #end if - + #if str($advancedOpt.region.value) != '': --region '$advancedOpt.region' #end if @@ -72,7 +70,7 @@ + help="The BAM file must be sorted."/> @@ -86,10 +84,7 @@ - - + @@ -112,11 +107,11 @@ + help= "If set, only reads that have a mapping quality score higher than the given value are considered. *Note* Bowtie's Mapping quality is related to uniqueness: the higher the score, the more unique is a read. A mapping quality defined by Bowtie of 10 or less indicates that there is at least a 1 in 10 chance that the read truly originated elsewhere."/> + help ="If set, then zero counts that happen for *all* BAM files given are ignored. This might have the effect that fewer regions are considered than indicated in the option where the number of samples is defined." /> @@ -143,7 +138,7 @@ This tool is based on a method developed by Diaz et al. (2012). Stat Appl Genet Mol Biol 11(3). The resulting plot can be used to assess the strength of a ChIP (for factors that bind to narrow regions). -The tool first samples indexed bam files and counts all reads overlapping a window (bin) of specified length. +The tool first samples indexed BAM files and counts all reads overlapping a window (bin) of specified length. These counts are then sorted according to their rank and the cumulative sum of read counts are plotted. An ideal input with perfect uniform distribution of reads along the genome (i.e. without enrichments in open chromatin etc.) should generate a straight diagonal line. A very specific and strong ChIP enrichment will be indicated by a prominent and steep @@ -154,14 +149,13 @@ .. class:: infomark -If you would like to give us feedback or you run into any trouble, please sent an email to deeptools@googlegroups.com +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de diff -r 1f312af2f8db -r c5847db0cb41 bigwigCompare.xml --- a/bigwigCompare.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/bigwigCompare.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,9 +1,10 @@ - + normalizes and compares two bigWig files to obtain the ratio, log2ratio or difference - deepTools + deepTools ucsc_tools numpy + deepTools bigwigCompare @@ -99,14 +100,13 @@ .. class:: infomark -If you would like to give us feedback or you run into any trouble, please sent an email to deeptools@googlegroups.com +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de diff -r 1f312af2f8db -r c5847db0cb41 computeGCBias.xml --- a/computeGCBias.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/computeGCBias.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,8 +1,9 @@ - + to see whether your samples should be normalized for GC bias - deepTools + deepTools + deepTools @@ -69,7 +70,7 @@ + help="The BAM file must be sorted."/> @@ -95,7 +96,7 @@ + help ="If paired-end reads are used, the fragment length is computed from the BAM file."/> @@ -117,7 +118,7 @@ + help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read."/> - (output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True) + saveBiasPlot is True + **What it does** -This tool computes the GC bias ussing the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. +This tool computes the GC bias using the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. (see below for more explanations) The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias. +There are two plots produced by the tool: a boxplot showing the absolute read numbers per genomic-GC bin and an x-y plot +depicting the ratio of observed/expected reads per genomic GC content bin. + +----- + +**Summary of the method used** + +In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the specific +reference genome contains for each amount of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...). +We then sample a large number of equally sized genome bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any +sequencing as it only depends on the respective reference genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents). +The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content. +In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples +usually shows a significant bias towards reads with high GC content (>50%) ----- .. class:: infomark -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de diff -r 1f312af2f8db -r c5847db0cb41 computeMatrix.xml --- a/computeMatrix.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/computeMatrix.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,7 +1,8 @@ - + summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile - deepTools + deepTools + deepTools #import tempfile @@ -82,14 +83,14 @@ - + - + - + @@ -162,7 +163,7 @@ - + @@ -171,7 +172,7 @@ - + @@ -209,19 +210,17 @@ **What it does** -This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or GFF format can be used. This tool can also be used to filter and sort regions according to their score. +This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or INTERVAL format can be used. This tool can also be used to filter and sort regions according to their score. ----- .. class:: infomark -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de diff -r 1f312af2f8db -r c5847db0cb41 correctGCBias.xml --- a/correctGCBias.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/correctGCBias.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,8 +1,9 @@ - - use the output from computeGCBias to obtain corrected sample files + + uses the output from computeGCBias to generate corrected BAM files - deepTools + deepTools ucsc_tools + deepTools #import tempfile @@ -44,7 +45,7 @@ - + @@ -111,14 +112,14 @@ .. class:: infomark -If you would like to give us feedback or you run into any trouble, please sent an email to deeptools@googlegroups.com +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + diff -r 1f312af2f8db -r c5847db0cb41 heatmapper.xml --- a/heatmapper.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/heatmapper.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,4 +1,4 @@ - + creates a heatmap for a score associated to genomic regions @@ -7,7 +7,8 @@ matplotlib scipy ucsc_tools - deepTools + deepTools + deepTools @@ -119,8 +120,8 @@ - - + + @@ -347,19 +348,19 @@ **What it does** -HeatMapper visualizes scores associated with genomic regions, for example log2 fold change values obtained from ChIP-seq experiments. Those values can be visualized individually along each of the regions provided by the user. +The heatmapper visualizes scores associated with genomic regions, for example ChIP enrichment values around the TSS of genes. Those values can be visualized individually along each of the regions provided by the user in INTERVAL or BED format. In addition to the heatmap, an average profile plot is plotted on top of the heatmap (can be turned off by the user; it can also be generated separately by the tool profiler). We implemented vast optional parameters and we encourage you to play around with the min/max values displayed in the heatmap as well as with the different coloring options. If you would like to plot heatmaps for different groups of genomic regions individually, e.g. one plot per chromosome, simply supply each group as an individual BED file. ----- .. class:: infomark -If you would like to give us feedback or you run into any trouble, please sent an email to deeptools@googlegroups.com +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + diff -r 1f312af2f8db -r c5847db0cb41 profiler.xml --- a/profiler.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/profiler.xml Wed Aug 14 07:18:18 2013 -0400 @@ -1,9 +1,10 @@ - + creates a profile plot for a score associated to genomic regions - deepTools + deepTools + deepTools profiler @@ -13,16 +14,16 @@ #if $output.showOutputSettings == "yes" #set newoutFileName = str($outFileName)+"."+str($output.outFileFormat) --outFileName $newoutFileName - #if $output.outFileNameData: - --outFileNameData '$output.outFileNameData' + #if $output.saveData: + --outFileNameData '$outFileNameData' #end if - #if $output.outFileNameMatrix: - --outFileNameMatrix '$output.outFileNameMatrix' + #if $output.saveMatrix: + --outFileNameMatrix '$outFileNameMatrix' #end if - - #if $output.outFileSortedRegions: - --outFileSortedRegions '$output.outFileSortedRegions' + + #if $output.saveSortedRegions: + --outFileSortedRegions '$outFileSortedRegions' #end if #else #set newoutFileName = str($outFileName)+".png" @@ -32,7 +33,6 @@ #if $scaleRegions.showScaleRegionsOpt == "yes": --startLabel $scaleRegions.startLabel --endLabel $scaleRegions.endLabel - --refPointLabel $scaleRegions.refPointLabel #end if #if $advancedOpt.showAdvancedOpt == "yes" @@ -76,7 +76,6 @@ - @@ -141,12 +140,12 @@ - - - - - - + + + + + + (output['showOutputSettings'] == 'yes' and output['saveData'] == True) @@ -164,21 +163,20 @@ This tool creates a profile plot for a score associated to genomic regions. Typically, these regions are genes, but any other regions defined in a BED or -GFF format will work. A preprocessed matrix generated by the tool +INTERVAL format will work. A preprocessed matrix generated by the tool computeMatrix is required. ----- .. class:: infomark -If you would like to give us feedback or you run into any trouble, please sent an email to deeptools@googlegroups.com +If you would like to give us feedback or you run into any trouble, please send an email to deeptools@googlegroups.com This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. .. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ .. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de diff -r 1f312af2f8db -r c5847db0cb41 tool_dependencies.xml --- a/tool_dependencies.xml Tue Aug 06 08:20:47 2013 -0400 +++ b/tool_dependencies.xml Wed Aug 14 07:18:18 2013 -0400 @@ -42,11 +42,11 @@ The tools downloaded by this dependency definition are free for academic use. TODO: UCSC tools are only available with their latest version. That is not good for reproducibility. - + git clone --recursive https://github.com/fidelram/deepTools.git - git reset --hard 59e067cce039cb93add04823c9f51cab202f8c2b + git reset --hard df852fa1ef13251a17274ee18fbf919fbc515079 $INSTALL_DIR/lib/python export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python &&