# HG changeset patch
# User bgruening
# Date 1376309781 14400
# Node ID 675d25a0b9d4529f22f75b4fb18025db7a0a9ce2
# Parent a281b5931ffbd87c4e4c0bc0c7c5fb8c5b54aa23
Uploaded
diff -r a281b5931ffb -r 675d25a0b9d4 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,80 @@
+Galaxy datatypes for HOMER tools
+================================
+
+These HOMER datatypes are copyright 2013 by Björn Grüning.
+
+See the licence text below.
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.0.1 - First release.
+======= ======================================================================
+
+
+Installation
+============
+
+Doing this automatically via the Galaxy Tool Shed is probably simplest.
+
+
+Manual Installation
+===================
+
+Normally you would install this via the Galaxy ToolShed, which would move
+the provided homer.py file into a suitable location and process the
+datatypes_conf.xml entry to be combined with your local configuration.
+
+However, if you really want to this should work for a manual install. Add
+the following lines to the datatypes_conf.xml file in the Galaxy main folder::
+
+
+
+Also create the file lib/galaxy/datatypes/homer.py by moving, copying or linking
+the homer.py file provided in this tar-ball. Finally add 'import homer' near
+the start of file lib/galaxy/datatypes/registry.py (after the other import
+lines).
+
+
+Bug Reports
+===========
+
+You can file an issue here https://github.com/bgruening/galaxytools/issues or ask
+us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev
+
+
+Developers
+==========
+
+Development is happening here:
+
+ https://github.com/bgruening/galaxytools/
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+NOTE: This is the licence for the Galaxy HOMER datatypes **only**. HOMER
+and associated data files are available and licenced separately.
diff -r a281b5931ffb -r 675d25a0b9d4 bamCompare.xml
--- a/bamCompare.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-
- Normalize and compare two BAM files to output ratio, log2ratio or difference.
-
- deepTools
- numpy
- ucsc_tools
-
-
- bamCompare
- --bamfile1 '$bamFile1'
- -bai1 '${bamFile1.metadata.bam_index}'
- --bamfile2 '$bamFile2'
- -bai2 '${bamFile2.metadata.bam_index}'
-
- --outFileName '$outFileName'
- --outFileFormat '$outFileFormat'
-
- --fragmentLength $fragmentLength
- --binSize $binSize
-
- #if $scaling.method == 'SES':
- --scaleFactorsMethod SES
- --sampleLength $scaling.sampleLength
- #elif $scaling.method == 'readCount':
- --scaleFactorsMethod readCount
- #elif $scaling.method == 'own':
- --scaleFactors '$scaling.scaleFactor1:$scaling.scaleFactor2'
- #end if
-
- --ratio $comparison.type
-
-
- #if $comparison.type=='subtract':
- #if $comparison.normalization.type=='rpkm':
- --normalizeUsingRPKM
- #elif $comparison.normalization.type=='1x':
- --normalizeTo1x $comparison.normalization.normalizeTo1x
- #end if
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- #if $advancedOpt.smoothLength:
- --smoothLength '$advancedOpt.smoothLength'
- #end if
-
- #if str($advancedOpt.region.value) != '':
- --region '$advancedOpt.region'
- #end if
- $advancedOpt.doNotExtendPairedEnds
- $advancedOpt.ignoreDuplicates
-
- #if $advancedOpt.minMappingQuality:
- --minMappingQuality '$advancedOpt.minMappingQuality'
- #end if
-
- --missingDataAsZero $advancedOpt.missingDataAsZero
-
- #end if
- --numberOfProcessors 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool compares two BAM files based on the number of mapped reads. To
-compare the BAM files the genome is partitioned into bins of equal size, then
-the number of reads found in each BAM file are counted for such bins and
-finally a summarizing value is reported. This vaule can be the ratio of the
-number of reads per bin, the log2 of the ratio or the difference. This tool
-can normalize the number of reads on each BAM file using the SES method
-proposed by Diaz et al. (2012). "Normalization, bias correction, and peak
-calling for ChIP-seq". Statistical applications in genetics and molecular
-biology, 11(3). Normalization based on read counts is also available. The
-output is either a bedgraph or a bigwig file containing the bin location and
-the resulting comparison values. By default if reads are mated the fragment
-length reported in the BAM file is used.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 bamCorrelate.xml
--- a/bamCorrelate.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,167 +0,0 @@
-
- corrlates pairs of bam files
-
- deepTools
-
-
- #set files=[]
- #set labels=[]
- #for $i in $inputs
- #set $files += [str($i.bamfile)]
- #if str($i.label.value) != "":
- #set $labels += ["\"%s\"" % ($i.label.value)]
- #else
- #set $labels += ["\"%s\"" % ($i.bamfile.name)]
- #end if
- #end for
- bamCorrelate
- --bamfiles #echo " ".join($files)
- --labels #echo " ".join($labels)
-
- --fragmentLength $fragmentLength
- --corMethod $corMethod
-
- #set newoutFileName=str($outFileName)+".png"
- --plotFile $newoutFileName
-
- #if $outputOpt.showOutputOpt == "yes"
- #if $outputOpt.outFileRawCounts:
- --outRawCounts '$outputOpt.outFileRawCounts'
- #end if
- #if $outputOpt.outFileCorMatrix:
- --outFileCorMatrix '$outputOpt.outFileCorMatrix'
- #end if
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- #if $advancedOpt.smoothLength:
- --smoothLength '$advancedOpt.smoothLength'
- #end if
-
- #if str($advancedOpt.region.value) != '':
- --region '$advancedOpt.region'
- #end if
-
- --binSize '$advancedOpt.binSize'
- --numberOfSamples '$advancedOpt.numberOfSamples'
-
- $advancedOpt.doNotExtendPairedEnds
- $advancedOpt.ignoreDuplicates
- $advancedOpt.includeZeros
-
- #if $advancedOpt.minMappingQuality:
- --minMappingQuality '$advancedOpt.minMappingQuality'
- #end if
- #end if
-
- --numberOfProcessors 4; mv $newoutFileName $outFileName
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)
-
-
- (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)
-
-
-
-
-**What it does**
-
-Genomes are split into bins of given length. For each bin the number of reads
-found for each of the bam files is counted. A correlation is computed for all
-pairs of bam files.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 bamCoverage.xml
--- a/bamCoverage.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-
- Given a BAM file, generates a coverage bigwig file. Multiple options available to count reads and normalize coverage.
-
- deepTools
- ucsc_tools
- numpy
-
- bamCoverage
- --bam '$bamInput'
- --bamIndex ${bamInput.metadata.bam_index}
- --outFileName '$outFileName'
- --outFileFormat '$outFileFormat'
-
- --fragmentLength $fragmentLength
- --binSize $binSize
-
-
- #if $scaling.type=='rpkm':
- --normalizeUsingRPKM
- #elif $scaling.type=='1x':
- --normalizeTo1x $scaling.normalizeTo1x
- #elif $scaling.type=='own':
- --scaleFactor $scaling.scaleFactor
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- #if $advancedOpt.smoothLength:
- --smoothLength '$advancedOpt.smoothLength'
- #end if
-
- #if str($advancedOpt.region.value) != '':
- --region '$advancedOpt.region'
- #end if
- $advancedOpt.doNotExtendPairedEnds
- $advancedOpt.ignoreDuplicates
-
- #if $advancedOpt.minMappingQuality:
- --minMappingQuality '$advancedOpt.minMappingQuality'
- #end if
-
- #end if
- --numberOfProcessors 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Given a BAM file, this tool generates a bigWig or bedGraph file of fragment or read coverages. The way the method works is by first calculating all the number of reads (either extended to match the fragment length or not) that overlap each bin in the genome. Bins with zero counts are skipped, i.e. not added to the output file. The resulting read counts can be normalized using either a given scaling factor, the RPKM formula or to get a 1x depth of coverage (RPGC).
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 bamFingerprint.xml
--- a/bamFingerprint.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-
- plots profiles of bam files
-
- deepTools
-
-
- #set files=[]
- #set labels=[]
- #for $i in $inputs
- #set $files += [str($i.bamfile)]
- #if str($i.label.value) != "":
- #set $labels += ["\"%s\"" % ($i.label.value)]
- #else
- #set $labels += ["\"%s\"" % ($i.bamfile.name)]
- #end if
- #end for
- bamFingerprint
- --bamfiles #echo " ".join($files)
- --labels #echo " ".join($labels)
-
- --fragmentLength $fragmentLength
-
- #set newoutFileName=str($outFileName)+".png"
- --plotFile $newoutFileName
-
- #if $outputOpt.showOutputOpt == "yes"
- #if $outputOpt.saveRawCounts:
- --outRawCounts '$outFileRawCounts'
- #end if
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- #if $advancedOpt.smoothLength:
- --smoothLength '$advancedOpt.smoothLength'
- #end if
-
- #if str($advancedOpt.region.value) != '':
- --region '$advancedOpt.region'
- #end if
-
- --binSize '$advancedOpt.binSize'
- --numberOfSamples '$advancedOpt.numberOfSamples'
-
- $advancedOpt.doNotExtendPairedEnds
- $advancedOpt.ignoreDuplicates
- $advancedOpt.skipZeros
-
- #if $advancedOpt.minMappingQuality:
- --minMappingQuality '$advancedOpt.minMappingQuality'
- #end if
- #end if
-
- --numberOfProcessors 4; mv $newoutFileName $outFileName
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)
-
-
-
-
-**What it does**
-
-Samples indexed bam files and plots a profile for each bam file. At each
-sample position all reads overlaping a window (bin) of specified length are
-counted. This counts are then sorted and the cumulative sum plotted
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 bigwigCompare.xml
--- a/bigwigCompare.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-
- compares two bigwig files based on the number of mapped reads
-
- deepTools
- ucsc_tools
- numpy
-
-
- bigwigCompare
- --bigwig1 '$bigwigFile1'
- --bigwig2 '$bigwigFile2'
-
- --outFileName '$outFileName'
- --outFileFormat '$outFileFormat'
-
- --ratio $comparison_type
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- #if str($advancedOpt.region.value) != '':
- --region '$advancedOpt.region'
- #end if
-
- --missingDataAsZero $advancedOpt.missingDataAsZero
- --scaleFactors '$advancedOpt.scaleFactor1:$advancedOpt.scaleFactor2'
- --pseudocount '$advancedOpt.pseudocount'
- --binSize $advancedOpt.binSize
-
- #end if
- --numberOfProcessors 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool compares two bigwig files based on the number of mapped reads. To
-compare the bigwig files the genome is partitioned into bins of equal size,
-then the number of reads found in each BAM file are counted for such bins and
-finally a summarizing value is reported. This vaule can be the ratio of the
-number of readsper bin, the log2 of the ratio, the sum or the difference.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 computeGCBias.xml
--- a/computeGCBias.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-
-
-
- deepTools
-
-
-
-
-
- computeGCBias
- --bamfile '$bamInput'
- --species '$species'
- --GCbiasFrequenciesFile $outFileName
- --fragmentLength $fragmentLength
-
- #if $source.ref_source=="history":
- --genome $source.input1
- #else:
- --genome "${source.input1_2bit.fields.path}"
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- #if str($advancedOpt.region.value) != '':
- --region '$advancedOpt.region'
- #end if
-
- --binSize '$advancedOpt.binSize'
- --sampleSize '$advancedOpt.sampleSize'
- --regionSize '$advancedOpt.regionSize'
-
- #if $advancedOpt.filterOut:
- --filterOut $advancedOpt.filterOut
- #end if
-
- #if $advancedOpt.extraSampling:
- --extraSampling $advancedOpt.extraSampling
- #end if
-
- #end if
-
- #set move=""
- #if $output.showOutputSettings == "yes"
- #if $output.saveBiasPlot:
- --biasPlot biasPlot.png
- #set move="mv biasPlot.png $biasPlot"
- #end if
- #end if
- ; $move
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)
-
-
-
-
-**What it does**
-
-Computes the GC bias ussing Benjamini's method [citation]. The resulting GC
-bias can later be used to plot the bias or to correct the bias.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 computeMatrix.xml
--- a/computeMatrix.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,208 +0,0 @@
-
- summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile
-
- deepTools
-
-
- computeMatrix
- $mode.mode_select
- --regionsFileName '$regionsFile'
- --scoreFileName '$scoreFile'
- --outFileName '$outFileName'
-
- #if $output.showOutputSettings == "yes"
- #if $output.saveData:
- --outFileNameData '$outFileNameData'
- #end if
- #if $output.saveMatrix:
- --outFileNameMatrix '$outFileNameMatrix'
- #end if
-
- #if $output.saveSortedRegions:
- --outFileSortedRegions '$outFileSortedRegions'
- #end if
- #end if
-
- #if $mode.mode_select == "reference-point":
- --referencePoint $mode.referencePoint
- $mode.nanAfterEnd
- --beforeRegionStartLength $mode.beforeRegionStartLength
- --afterRegionStartLength $mode.afterRegionStartLength
- #else
- --regionBodyLength $mode.regionBodyLength
- --startLabel $mode.startLabel
- --endLabel $mode.endLabel
- #if $mode.regionStartLength.regionStartLength_select == "yes":
- --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength
- --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength
- #end if
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- --sortRegions '$advancedOpt.sortRegions'
- --sortUsing '$advancedOpt.sortUsing'
- --averageTypeBins '$advancedOpt.averageTypeBins'
- $advancedOpt.missingDataAsZero
- $advancedOpt.skipZeros
- $advancedOpt.binSize
-
- #if $advancedOpt.minThreshold:
- --minThreshold $advancedOpt.minThreshold
- #end if
- #if $advancedOpt.maxThreshold:
- --maxThreshold $advancedOpt.maxThreshold
- #end if
- #if $advancedOpt.scale:
- --scale $advancedOpt.scale
- #end if
-
- #end if
- --numberOfProcessors 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (output['showOutputSettings'] == 'yes' and output['saveData'] == True)
-
-
- (output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)
-
-
- (output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or GFF format can be used. This tool can also be used to filter and sort regions according to their score.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 correctGCBias.xml
--- a/correctGCBias.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-
-
-
-
- deepTools
- ucsc_tools
-
-
- correctGCBias
- --bamfile '$bamInput'
- --species '$species'
- --GCbiasFrequenciesFile $GCbiasFrequenciesFile
-
- #if $source.ref_source=="history":
- --genome $source.input1
- #else:
- --genome "${source.input1_2bit.fields.path}"
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes":
- #if str($advancedOpt.region.value) != '':
- --region '$advancedOpt.region'
- #end if
-
- --binSize '$advancedOpt.binSize'
- #end if
-
- #set newoutFileName="corrected."+str($outFileFormat)
-
- --correctedFile $newoutFileName; mv $newoutFileName $outFileName
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Computes the GC bias ussing Benjamini's method [citation]. The resulting GC
-bias can later be used to plot the bias or to correct the bias.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff -r a281b5931ffb -r 675d25a0b9d4 heatmapper.xml
--- a/heatmapper.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,370 +0,0 @@
-
- creates a heatmap for a score associated to genomic regions
-
-
- samtools
- numpy
- matplotlib
- scipy
- ucsc_tools
- deepTools
-
-
-
- heatmapper
- --matrixFile $matrixFile
- #if $output.showOutputSettings == "yes"
- #set newoutFileName=str($outFileName)+"."+str($output.outFileFormat)
- --outFileName $newoutFileName
- #if $outFileNameData:
- --outFileNameData '$outFileNameData'
- #end if
-
- #if $outFileNameMatrix:
- --outFileNameMatrix '$outFileNameMatrix'
- #end if
-
- #if $outFileSortedRegions:
- --outFileSortedRegions '$outFileSortedRegions'
- #end if
- #else
- #set newoutFileName=str($outFileName)+".png"
- --outFileName $newoutFileName
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes"
- #if $advancedOpt.sortRegions:
- --sortRegions '$advancedOpt.sortRegions'
- #end if
-
- #if $advancedOpt.sortUsing:
- --sortUsing '$advancedOpt.sortUsing'
- #end if
-
- #if $advancedOpt.averageTypeSummaryPlot:
- --averageTypeSummaryPlot '$advancedOpt.averageTypeSummaryPlot'
- #end if
-
- #if str($advancedOpt.missingDataColor.value) != "None":
- --missingDataColor '$advancedOpt.missingDataColor'
- #end if
-
- --colorMap '$advancedOpt.colorMap'
-
- #if $advancedOpt.zMin:
- --zMin $advancedOpt.zMin
- #end if
- #if $advancedOpt.zMax:
- --zMax $advancedOpt.zMax
- #end if
-
- #if $advancedOpt.yMin:
- --yMin $advancedOpt.yMin
- #end if
- #if $advancedOpt.yMax:
- --yMax $advancedOpt.yMax
- #end if
-
- --xAxisLabel '$advancedOpt.xAxisLabel'
- --yAxisLabel '$advancedOpt.yAxisLabel'
-
- --heatmapWidth $advancedOpt.heatmapWidth
- --heatmapHeight $advancedOpt.heatmapHeight
-
- --whatToShow '$advancedOpt.whatToShow'
-
- --startLabel '$advancedOpt.startLabel'
- --endLabel '$advancedOpt.endLabel'
- --refPointLabel '$advancedOpt.referencePointLabel'
- --regionsLabel '$advancedOpt.regionsLabel'
-
- #if str($advancedOpt.plotTitle.value) != "None":
- --plotTitle '$advancedOpt.plotTitle'
- #end if
-
- $advancedOpt.onePlotPerGroup
- #end if
- ; mv $newoutFileName $outFileName
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (output['showOutputSettings'] == 'yes' and output['saveData'] == True)
-
-
- (output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)
-
-
- (output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-HeatMapper visualizes scores associated with genomic regions, for example log2 fold change values obtained from ChIP-seq experiments. Those values can be visualized individually along each of the regions provided by the user.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 homer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/homer.py Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,76 @@
+"""
+HOMER special datatypes
+"""
+
+from galaxy.datatypes.data import get_file_peek
+from galaxy.datatypes.data import Text, Data
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.images import Html
+
+
+class TagDirectory(object):
+ """Base class for HOMER's Tag Directory datatype."""
+
+ file_ext = 'homer_tagdir'
+ composite_type = 'auto_primary_file'
+ allow_datatype_change = False
+
+ def __init__(self, **kwd):
+ Text.__init__( self, **kwd )
+ #self.add_composite_file('tagInfo.txt', description = 'basic configuration information', mimetype = 'text/html') # Contains basic configuration information
+ self.add_composite_file('tagLengthDistribution.txt', description = 'histogram of read lengths used for alignment', mimetype = 'text/html') # File contains a histogram of read lengths used for alignment.
+ self.add_composite_file('tagCountDistribution.txt', description = 'histogram of clonal read depth, showing the number of reads per unique position', mimetype = 'text/html') # File contains a histogram of clonal read depth, showing the number of reads per unique position.
+ self.add_composite_file('tagAutocorrelation.txt', description = 'distribution of distances between adjacent reads in the genome', mimetype = 'text/html') # The autocorrelation routine creates a distribution of distances between adjacent reads in the genome.
+ self.add_composite_file('tagFreq.txt', description = "nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads", mimetype = 'text/html', optional=True) # Calculates the nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads.
+ self.add_composite_file('tagFreqUniq.txt', description = "nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads (counted only once)", mimetype = 'text/html', optional=True) # Same as tagFreq.txt, however individual genomic positions are only counted once.
+ self.add_composite_file('tagGCcontent.txt', description = 'Distribution of fragment GC%-content', mimetype = 'text/html', optional=True) # Distribution of fragment GC%-content.
+ self.add_composite_file('genomeGCcontent.txt', description = 'Distribution of fragment GC%-content at each location in the genome', mimetype = 'text/html', optional=True) # Distribution of fragment GC%-content at each location in the genome.
+
+ def generate_primary_file( self, dataset = None ):
+ rval = ['
HOMER database files
']
+ for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
+ opt_text = ''
+ if composite_file.optional:
+ opt_text = ' (optional)'
+ rval.append( '
' )
+ return "\n".join( rval )
+
+ def display_data(self, trans, data, preview=False, filename=None,
+ to_ext=None, size=None, offset=None, **kwd):
+ """Apparently an old display method, but still gets called.
+
+ This allows us to format the data shown in the central pane via the "eye" icon.
+ """
+ return "This is a HOMER database."
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ """Set the peek and blurb text."""
+ if not dataset.dataset.purged:
+ dataset.peek = "HOMER database (multiple files)"
+ dataset.blurb = "HOMER database (multiple files)"
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+ def display_peek( self, dataset ):
+ """Create HTML content, used for displaying peek."""
+ try:
+ return dataset.peek
+ except:
+ return "HOMER database (multiple files)"
+
+ def get_mime(self):
+ """Returns the mime type of the datatype (pretend it is text for peek)"""
+ return 'text/plain'
+
+ def merge(split_files, output_file):
+ """Merge HOMER databases (not implemented)."""
+ raise NotImplementedError("Merging HOMER databases is not supported")
+
+ def split( cls, input_datasets, subdir_generator_function, split_params):
+ """Split a HOMER database (not implemented)."""
+ if split_params is None:
+ return None
+ raise NotImplementedError("Can't split HOMER databases")
+
diff -r a281b5931ffb -r 675d25a0b9d4 profiler.xml
--- a/profiler.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,189 +0,0 @@
-
-
- creates a profile plot for a score associated to genomic regions
-
-
- deepTools
-
-
- profiler
- --matrixFile $matrixFile
-
- #if $output.showOutputSettings == "yes"
- #set newoutFileName=str($outFileName)+"."+str($output.outFileFormat)
- --outFileName $newoutFilename
- #if $output.outFileNameData:
- --outFileNameData '$output.outFileNameData'
- #end if
-
- #if $output.outFileNameMatrix:
- --outFileNameMatrix '$output.outFileNameMatrix'
- #end if
-
- #if $output.outFileSortedRegions:
- --outFileSortedRegions '$output.outFileSortedRegions'
- #end if
- #else
- #set newoutFileName=str($outFileName)+".png"
- --outFileName $newoutFileName
- #end if
-
- #if $scaleRegions.showScaleRegionsOpt == "yes":
- --startLabel $scaleRegions.startLabel
- --endLabel $scaleRegions.endLabel
- --refPointLabel $scaleRegions.refPointLabel
- #end if
-
- #if $advancedOpt.showAdvancedOpt == "yes"
- #if $advancedOpt.averageType:
- --averageType '$advancedOpt.averageType'
- #end if
- --plotHeight $advancedOpt.plotHeight
- --plotWidth $advancedOpt.plotWidth
- --plotType $advancedOpt.plotType
-
- --regionsLabel '$advancedOpt.regionsLabel'
-
- #if str($advancedOpt.plotTitle.value) != "None":
- --plotTitle '$advancedOpt.plotTitle'
- #end if
-
- $advancedOpt.onePlotPerGroup
-
- #if $advancedOpt.yMin:
- --yMin $advancedOpt.yMin
- #end if
- #if $advancedOpt.yMax:
- --yMax $advancedOpt.yMax
- #end if
-
- --xAxisLabel '$advancedOpt.xAxisLabel'
- #if str($advancedOpt.yAxisLabel.value) != "None":
- --yAxisLabel '$advancedOpt.yAxisLabel'
- #end if
- #end if
- ; mv $newoutFileName $outFileName
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (output['showOutputSettings'] == 'yes' and output['saveData'] == True)
-
-
- (output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)
-
-
- (output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)
-
-
-
-
-**What it does**
-
-This tool creates a profile plot for a score associated to genomic regions.
-Typically, these regions are genes, but any other regions defined in a BED or
-GFF format will work. A preprocessed matrix generated by the tool
-computeMatrix is required.
-
------
-
-.. class:: infomark
-
-Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_.
-
-This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_.
-
-
-.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/
-.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de
-.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de
-
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 test-data/master.mat.gz
Binary file test-data/master.mat.gz has changed
diff -r a281b5931ffb -r 675d25a0b9d4 test-data/master.png
Binary file test-data/master.png has changed
diff -r a281b5931ffb -r 675d25a0b9d4 test-data/test.bw
Binary file test-data/test.bw has changed
diff -r a281b5931ffb -r 675d25a0b9d4 test-data/test2.bed
--- a/test-data/test2.bed Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-ch1 100 150 CG11023 0 +
-ch2 150 175 cda5 0 -
-ch3 100 125 cda8 0 +
-#Group 1
-ch1 75 125 C11023 0 +
-ch2 125 150 ca5 0 -
-ch3 75 100 ca8 0 +
-#Group 2
diff -r a281b5931ffb -r 675d25a0b9d4 tool-data/deepTools_seqs.loc.sample
--- a/tool-data/deepTools_seqs.loc.sample Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of 2bit genome files for use with deepTools. You will
-#need to supply these files and then create a deepTools_seqs.loc file
-#similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The deepTools_seqs.loc
-#file has this format (white space characters are TAB characters):
-#
-#
-#
-#So, for example, if your deepTools_seqs.loc began like this:
-#
-#hg18Human (Homo sapiens): hg18/depot/data2/galaxy/twobit/hg18.2bit
-#hg19Human (Homo sapiens): hg19/depot/data2/galaxy/twobit/hg19.2bit
-#mm9Mouse (Mus musculus): mm9/depot/data2/galaxy/twobit/mm9.2bit
-#
-#then your /depot/data2/galaxy/twobit/ directory
-#would need to contain the following 2bit files:
-#
-#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.2bit
-#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg19.2bit
-#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 mm9.2bit
-#
-#Your deepTools_seqs.loc file should include an entry per line for
-#each file you have stored that you want to be available. Note that
-#your files should all have the extension '2bit'.
-#
-#Note that for backwards compatibility with workflows, the unique ID of
-#an entry must be the path that was in the original loc file, because that
-#is the value stored in the workflow for that parameter.
-#
diff -r a281b5931ffb -r 675d25a0b9d4 tool-data/homer.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/homer.loc.sample Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,30 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of homer installations with associated databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#That files should enable the administrator the establish some kind of
+#reproducibility of HOMER data. It is recommended to install HOMER from scratch
+#in a new folder and define the location here. If you want to update your HOMER
+#installation consider to check out a completly new HOMER version in a new PATH.
+#
+#The entries are as follows:
+#
+#
+#
+#Your homer.loc file should include an entry per line for each "base name"
+#you have stored. For example:
+#
+#homer_08_Aug_2013 HOMER 4.2 08 Aug 2013 /data/0/galaxy_data/homer/08_08_2013/
+#homer_02_July_2013 HOMER 4.2 02 July 2013 /data/0/galaxy_data/homer/02_07_2013/
+#homer_1_Jan_2013 HOMER 4.1 1 Jan 2013 /data/0/galaxy_data/homer/01_01_2013
+
+#...etc...
+#
+#You can install and populate HOMER with all relevant data, following the instructions here:
+#http://biowhat.ucsd.edu/homer/introduction/install.html
+#
+#The Galaxy Toolshed will take care to install all requirements, but they are only
+#accessible during execution time. So feel free to ignore warnings during the installation,
+#that your are missing weblogo, blat and Co.
+
+
diff -r a281b5931ffb -r 675d25a0b9d4 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-
-
-
- value, name, path
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 tool_dependencies.xml
--- a/tool_dependencies.xml Fri Aug 09 14:04:34 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-src.tar.gz
- cd c++ && ./configure --with-boost=$BOOST_ROOT_DIR --prefix=$INSTALL_DIR && make && make install
-
- $INSTALL_DIR/bin
-
-
-
-
-Downloads and compiles BLAST+ from the NCBI, which assumes you have
-all the required build dependencies installed. See:
-http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
-
-
-
diff -r a281b5931ffb -r 675d25a0b9d4 tools/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/README Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,15 @@
+Homer wrapper for Galaxy
+
+The homer tools will need to be accessible from command line
+
+Code repo: https://bitbucket.org/gvl/homer
+
+=========================================:
+LICENSE for this wrapper:
+=========================================:
+Kevin Ying
+Garvan Institute: http://www.garvan.org.au
+GVL: https://genome.edu.au/wiki/GVL
+
+http://opensource.org/licenses/mit-license.php
+
diff -r a281b5931ffb -r 675d25a0b9d4 tools/annotatePeaks.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/annotatePeaks.xml Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,164 @@
+
+
+ homer
+
+
+
+
+ annotatePeaks.pl $input_bed $genome_selector 1> $out_annotated
+ 2> $out_log || echo "Error running annotatePeaks." >&2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .. class:: infomark
+
+ **Homer annoatePeaks**
+
+ More information on accepted formats and options
+
+ http://biowhat.ucsd.edu/homer/ngs/annotation.html
+
+ TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.
+
+**Parameter list**
+
+Command line options (not all of them are supported)::
+
+ Usage: annotatePeaks.pl <peak file | tss> <genome version> [additional options...]
+
+ Available Genomes (required argument): (name,org,directory,default promoter set)
+ -- or --
+ Custom: provide the path to genome FASTA files (directory or single file)
+
+ User defined annotation files (default is UCSC refGene annotation):
+ annotatePeaks.pl accepts GTF (gene transfer formatted) files to annotate positions relative
+ to custom annotations, such as those from de novo transcript discovery or Gencode.
+ -gtf <gtf format file> (-gff and -gff3 can work for those files, but GTF is better)
+
+ Peak vs. tss/tts/rna mode (works with custom GTF file):
+ If the first argument is "tss" (i.e. annotatePeaks.pl tss hg18 ...) then a TSS centric
+ analysis will be carried out. Tag counts and motifs will be found relative to the TSS.
+ (no position file needed) ["tts" now works too - e.g. 3' end of gene]
+ ["rna" specifies gene bodies, will automaticall set "-size given"]
+ NOTE: The default TSS peak size is 4000 bp, i.e. +/- 2kb (change with -size option)
+ -list <gene id list> (subset of genes to perform analysis [unigene, gene id, accession,
+ probe, etc.], default = all promoters)
+ -cTSS <promoter position file i.e. peak file> (should be centered on TSS)
+
+ Primary Annotation Options:
+ -mask (Masked repeats, can also add 'r' to end of genome name)
+ -m <motif file 1> [motif file 2] ... (list of motifs to find in peaks)
+ -mscore (reports the highest log-odds score within the peak)
+ -nmotifs (reports the number of motifs per peak)
+ -mdist (reports distance to closest motif)
+ -mfasta <filename> (reports sites in a fasta file - for building new motifs)
+ -fm <motif file 1> [motif file 2] (list of motifs to filter from above)
+ -rmrevopp <#> (only count sites found within <#> on both strands once, i.e. palindromic)
+ -matrix <prefix> (outputs a motif co-occurrence files:
+ prefix.count.matrix.txt - number of peaks with motif co-occurrence
+ prefix.ratio.matrix.txt - ratio of observed vs. expected co-occurrence
+ prefix.logPvalue.matrix.txt - co-occurrence enrichment
+ prefix.stats.txt - table of pair-wise motif co-occurrence statistics
+ additional options:
+ -matrixMinDist <#> (minimum distance between motif pairs - to avoid overlap)
+ -matrixMaxDist <#> (maximum distance between motif pairs)
+ -mbed <filename> (Output motif positions to a BED file to load at UCSC (or -mpeak))
+ -mlogic <filename> (will output stats on common motif orientations)
+ -d <tag directory 1> [tag directory 2] ... (list of experiment directories to show
+ tag counts for) NOTE: -dfile <file> where file is a list of directories in first column
+ -bedGraph <bedGraph file 1> [bedGraph file 2] ... (read coverage counts from bedGraph files)
+ -wig <wiggle file 1> [wiggle file 2] ... (read coverage counts from wiggle files)
+ -p <peak file> [peak file 2] ... (to find nearest peaks)
+ -pdist to report only distance (-pdist2 gives directional distance)
+ -pcount to report number of peaks within region
+ -vcf <VCF file> (annotate peaks with genetic variation infomation, one col per individual)
+ -editDistance (Computes the # bp changes relative to reference)
+ -individuals <name1> [name2] ... (restrict analysis to these individuals)
+ -gene <data file> ... (Adds additional data to result based on the closest gene.
+ This is useful for adding gene expression data. The file must have a header,
+ and the first column must be a GeneID, Accession number, etc. If the peak
+ cannot be mapped to data in the file then the entry will be left empty.
+ -go <output directory> (perform GO analysis using genes near peaks)
+ -genomeOntology <output directory> (perform genomeOntology analysis on peaks)
+ -gsize <#> (Genome size for genomeOntology analysis, default: 2e9)
+
+ Annotation vs. Histogram mode:
+ -hist <bin size in bp> (i.e 1, 2, 5, 10, 20, 50, 100 etc.)
+ The -hist option can be used to generate histograms of position dependent features relative
+ to the center of peaks. This is primarily meant to be used with -d and -m options to map
+ distribution of motifs and ChIP-Seq tags. For ChIP-Seq peaks for a Transcription factor
+ you might want to use the -center option (below) to center peaks on the known motif
+ ** If using "-size given", histogram will be scaled to each region (i.e. 0-100%), with
+ the -hist parameter being the number of bins to divide each region into.
+ Histogram Mode specific Options:
+ -nuc (calculated mononucleotide frequencies at each position,
+ Will report by default if extracting sequence for other purposes like motifs)
+ -di (calculated dinucleotide frequencies at each position)
+ -histNorm <#> (normalize the total tag count for each region to 1, where <#> is the
+ minimum tag total per region - use to avoid tag spikes from low coverage
+ -ghist (outputs profiles for each gene, for peak shape clustering)
+ -rm <#> (remove occurrences of same motif that occur within # bp)
+
+ Peak Centering: (other options are ignored)
+ -center <motif file> (This will re-center peaks on the specified motif, or remove peak
+ if there is no motif in the peak. ONLY recentering will be performed, and all other
+ options will be ignored. This will output a new peak file that can then be reanalyzed
+ to reveal fine-grain structure in peaks (It is advised to use -size < 200) with this
+ to keep peaks from moving too far (-mirror flips the position)
+ -multi (returns genomic positions of all sites instead of just the closest to center)
+
+ Advanced Options:
+ -len <#> / -fragLength <#> (Fragment length, default=auto, might want to set to 0 for RNA)
+ -size <#> (Peak size[from center of peak], default=inferred from peak file)
+ -size #,# (i.e. -size -10,50 count tags from -10 bp to +50 bp from center)
+ -size "given" (count tags etc. using the actual regions - for variable length regions)
+ -log (output tag counts as log2(x+1+rand) values - for scatter plots)
+ -sqrt (output tag counts as sqrt(x+rand) values - for scatter plots)
+ -strand <+|-|both> (Count tags on specific strands relative to peak, default: both)
+ -pc <#> (maximum number of tags to count per bp, default=0 [no maximum])
+ -cons (Retrieve conservation information for peaks/sites)
+ -CpG (Calculate CpG/GC content)
+ -ratio (process tag values as ratios - i.e. chip-seq, or mCpG/CpG)
+ -nfr (report nuclesome free region scores instead of tag counts, also -nfrSize <#>)
+ -norevopp (do not search for motifs on the opposite strand [works with -center too])
+ -noadj (do not adjust the tag counts based on total tags sequenced)
+ -norm <#> (normalize tags to this tag count, default=1e7, 0=average tag count in all directories)
+ -pdist (only report distance to nearest peak using -p, not peak name)
+ -map <mapping file> (mapping between peak IDs and promoter IDs, overrides closest assignment)
+ -noann, -nogene (skip genome annotation step, skip TSS annotation)
+ -homer1/-homer2 (by default, the new version of homer [-homer2] is used for finding motifs)
+
+
+
+
+
diff -r a281b5931ffb -r 675d25a0b9d4 tools/bed2pos.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/bed2pos.xml Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,37 @@
+
+
+ homer
+
+
+
+
+ bed2pos.pl $input_bed 1> $out_pos
+ 2> $out_log || echo "Error running bed2pos." >&2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .. class:: infomark
+
+ Converts: BED -(to)-> homer peak positions
+
+ **Homer bed2pos.pl**
+
+ http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+
+
+
diff -r a281b5931ffb -r 675d25a0b9d4 tools/findPeaks.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/findPeaks.xml Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,122 @@
+
+
+ homer
+
+ Homer's peakcaller. Requires tag directories (see makeTagDirectory)
+
+
+ findPeaks $tagDir.extra_files_path $options -o $outputPeakFile
+
+ #if $control_tagDir:
+ -i $control_tagDir.extra_files_path
+ #end if
+
+ 2> $out_log || echo "Error running findPeaks." >&2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .. class:: infomark
+
+ **Homer findPeaks**
+
+ For more options, look under: "Command line options for findPeaks"
+
+ http://biowhat.ucsd.edu/homer/ngs/peaks.html
+
+ TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.
+
+**Parameter list**
+
+Command line options (not all of them are supported)::
+
+ Usage: findPeaks <tag directory> [options]
+
+ Finds peaks in the provided tag directory. By default, peak list printed to stdout
+
+ General analysis options:
+ -o <filename|auto> (file name for to output peaks, default: stdout)
+ "-o auto" will send output to "<tag directory>/peaks.txt", ".../regions.txt",
+ or ".../transcripts.txt" depending on the "-style" option
+ -style <option> (Specialized options for specific analysis strategies)
+ factor (transcription factor ChIP-Seq, uses -center, output: peaks.txt, default)
+ histone (histone modification ChIP-Seq, region based, uses -region -size 500 -L 0, regions.txt)
+ groseq (de novo transcript identification from GroSeq data, transcripts.txt)
+ tss (TSS identification from 5' RNA sequencing, tss.txt)
+ dnase (Hypersensitivity [crawford style (nicking)], peaks.txt)
+
+ chipseq/histone options:
+ -i <input tag directory> (Experiment to use as IgG/Input/Control)
+ -size <#> (Peak size, default: auto)
+ -minDist <#> (minimum distance between peaks, default: peak size x2)
+ -gsize <#> (Set effective mappable genome size, default: 2e9)
+ -fragLength <#|auto> (Approximate fragment length, default: auto)
+ -inputFragLength <#|auto> (Approximate fragment length of input tags, default: auto)
+ -tbp <#> (Maximum tags per bp to count, 0 = no limit, default: auto)
+ -inputtbp <#> (Maximum tags per bp to count in input, 0 = no limit, default: auto)
+ -strand <both|separate> (find peaks using tags on both strands or separate, default:both)
+ -norm # (Tag count to normalize to, default 10000000)
+ -region (extends start/stop coordinates to cover full region considered "enriched")
+ -center (Centers peaks on maximum tag overlap and calculates focus ratios)
+ -nfr (Centers peaks on most likely nucleosome free region [works best with mnase data])
+ (-center and -nfr can be performed later with "getPeakTags"
+
+ Peak Filtering options: (set -F/-L/-C to 0 to skip)
+ -F <#> (fold enrichment over input tag count, default: 4.0)
+ -P <#> (poisson p-value threshold relative to input tag count, default: 0.0001)
+ -L <#> (fold enrichment over local tag count, default: 4.0)
+ -LP <#> (poisson p-value threshold relative to local tag count, default: 0.0001)
+ -C <#> (fold enrichment limit of expected unique tag positions, default: 2.0)
+ -localSize <#> (region to check for local tag enrichment, default: 10000)
+ -inputSize <#> (Size of region to search for control tags, default: 2x peak size)
+ -fdr <#> (False discovery rate, default = 0.001)
+ -poisson <#> (Set poisson p-value cutoff, default: uses fdr)
+ -tagThreshold <#> (Set # of tags to define a peak, default: 25)
+ -ntagThreshold <#> (Set # of normalized tags to define a peak, by default uses 1e7 for norm)
+ -minTagThreshold <#> (Absolute minimum tags per peak, default: expected tags per peak)
+
+ GroSeq Options: (Need to specify "-style groseq"):
+ -tssSize <#> (size of region for initiation detection/artifact size, default: 250)
+ -minBodySize <#> (size of regoin for transcript body detection, default: 1000)
+ -maxBodySize <#> (size of regoin for transcript body detection, default: 10000)
+ -tssFold <#> (fold enrichment for new initiation dectection, default: 4.0)
+ -bodyFold <#> (fold enrichment for new transcript dectection, default: 4.0)
+ -endFold <#> (end transcript when levels are this much less than the start, default: 10.0)
+ -fragLength <#> (Approximate fragment length, default: 150)
+ -uniqmap <directory> (directory of binary files specifying uniquely mappable locations)
+ Download from http://biowhat.ucsd.edu/homer/groseq/
+ -confPvalue <#> (confidence p-value: 1.00e-05)
+ -minReadDepth <#> (Minimum initial read depth for transcripts, default: auto)
+ -pseudoCount <#> (Pseudo tag count, default: 2.0)
+ -gtf <filename> (Output de novo transcripts in GTF format)
+ "-o auto" will produce <dir>/transcripts.txt and <dir>/transcripts.gtf
+
+
+
diff -r a281b5931ffb -r 675d25a0b9d4 tools/makeTagDirectory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/makeTagDirectory.py Mon Aug 12 08:16:21 2013 -0400
@@ -0,0 +1,94 @@
+"""
+
+
+"""
+import re
+import os
+import sys
+import subprocess
+import optparse
+import shutil
+import tempfile
+
+def getFileString(fpath, outpath):
+ """
+ format a nice file size string
+ """
+ size = ''
+ fp = os.path.join(outpath, fpath)
+ s = '? ?'
+ if os.path.isfile(fp):
+ n = float(os.path.getsize(fp))
+ if n > 2**20:
+ size = ' (%1.1f MB)' % (n/2**20)
+ elif n > 2**10:
+ size = ' (%1.1f KB)' % (n/2**10)
+ elif n > 0:
+ size = ' (%d B)' % (int(n))
+ s = '%s %s' % (fpath, size)
+ return s
+
+class makeTagDirectory():
+ """wrapper
+ """
+
+ def __init__(self,opts=None, args=None):
+ self.opts = opts
+ self.args = args
+
+ def run_makeTagDirectory(self):
+ """
+ makeTagDirectory