Mercurial > repos > bgruening > upload_testing
changeset 54:675d25a0b9d4
Uploaded
author | bgruening |
---|---|
date | Mon, 12 Aug 2013 08:16:21 -0400 |
parents | a281b5931ffb |
children | e9cd105a8856 |
files | README.rst bamCompare.xml bamCorrelate.xml bamCoverage.xml bamFingerprint.xml bigwigCompare.xml computeGCBias.xml computeMatrix.xml correctGCBias.xml datatypes_conf.xml heatmapper.xml homer.py profiler.xml test-data/master.mat.gz test-data/master.png test-data/test.bw test-data/test2.bed tool-data/deepTools_seqs.loc.sample tool-data/homer.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml tools/README tools/annotatePeaks.xml tools/bed2pos.xml tools/findPeaks.xml tools/makeTagDirectory.py tools/makeTagDirectory.xml tools/pos2bed.xml tools/tool_dependencies.xml |
diffstat | 29 files changed, 831 insertions(+), 1886 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,80 @@ +Galaxy datatypes for HOMER tools +================================ + +These HOMER datatypes are copyright 2013 by Björn Grüning. + +See the licence text below. + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.0.1 - First release. +======= ====================================================================== + + +Installation +============ + +Doing this automatically via the Galaxy Tool Shed is probably simplest. + + +Manual Installation +=================== + +Normally you would install this via the Galaxy ToolShed, which would move +the provided homer.py file into a suitable location and process the +datatypes_conf.xml entry to be combined with your local configuration. + +However, if you really want to this should work for a manual install. Add +the following lines to the datatypes_conf.xml file in the Galaxy main folder:: + + <datatype extension="homer_tagdir" type="galaxy.datatypes.homer:TagDirectory" mimetype="text/html" display_in_upload="false"/> + +Also create the file lib/galaxy/datatypes/homer.py by moving, copying or linking +the homer.py file provided in this tar-ball. Finally add 'import homer' near +the start of file lib/galaxy/datatypes/registry.py (after the other import +lines). + + +Bug Reports +=========== + +You can file an issue here https://github.com/bgruening/galaxytools/issues or ask +us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev + + +Developers +========== + +Development is happening here: + + https://github.com/bgruening/galaxytools/ + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +NOTE: This is the licence for the Galaxy HOMER datatypes **only**. HOMER +and associated data files are available and licenced separately.
--- a/bamCompare.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -<tool id="bamCompare" name="bamCompare" version="1.0"> - <description>Normalize and compare two BAM files to output ratio, log2ratio or difference.</description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="0.1">ucsc_tools</requirement> - </requirements> - <command> - bamCompare - --bamfile1 '$bamFile1' - -bai1 '${bamFile1.metadata.bam_index}' - --bamfile2 '$bamFile2' - -bai2 '${bamFile2.metadata.bam_index}' - - --outFileName '$outFileName' - --outFileFormat '$outFileFormat' - - --fragmentLength $fragmentLength - --binSize $binSize - - #if $scaling.method == 'SES': - --scaleFactorsMethod SES - --sampleLength $scaling.sampleLength - #elif $scaling.method == 'readCount': - --scaleFactorsMethod readCount - #elif $scaling.method == 'own': - --scaleFactors '$scaling.scaleFactor1:$scaling.scaleFactor2' - #end if - - --ratio $comparison.type - - - #if $comparison.type=='subtract': - #if $comparison.normalization.type=='rpkm': - --normalizeUsingRPKM - #elif $comparison.normalization.type=='1x': - --normalizeTo1x $comparison.normalization.normalizeTo1x - #end if - #end if - - #if $advancedOpt.showAdvancedOpt == "yes": - #if $advancedOpt.smoothLength: - --smoothLength '$advancedOpt.smoothLength' - #end if - - #if str($advancedOpt.region.value) != '': - --region '$advancedOpt.region' - #end if - $advancedOpt.doNotExtendPairedEnds - $advancedOpt.ignoreDuplicates - - #if $advancedOpt.minMappingQuality: - --minMappingQuality '$advancedOpt.minMappingQuality' - #end if - - --missingDataAsZero $advancedOpt.missingDataAsZero - - #end if - --numberOfProcessors 4 - - - </command> - - <inputs> - <param name="bamFile1" format="bam" type="data" label="Treatment BAM file" - help="The BAM file must be sorted and indexed."/> - - <param name="bamFile2" format="bam" type="data" label="Input BAM file" - help="The BAM file must be sorted and indexed."/> - - <param name="fragmentLength" type="integer" value="300" min="1" - label="Length of the average fragment size" - help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. If this value is set to the read length or smaller, the read will not be extended. *Warning* the fragment length affects the normalization to 1x (see "normalize coverage to 1x"). The formula to normalize using the sequencing depth is genomeSize/(number of mapped reads * fragment length). *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> - - <param name="binSize" type="integer" value="50" min="1" - label="Bin size in bp" - help="The genome will be divided in bins (also called tiles) of the specified length. For each bin the overlaping number of fragments (or reads) will be reported. If only half a fragment overlaps, this fraction will be reported. "/> - - - <conditional name="scaling"> - <param name="method" type="select" - label="Method to use for scaling the largest sample to the smallest"> - <option value="readCount" selected="true">read count</option> - <option value="SES">signal extraction scaling (SES)</option> - <option value="own">enter own scaling factors</option> - </param> - <when value="SES"> - <param name="sampleLength" type="integer" value="1000" min="10" - label="Length in base pairs used to sample the genome and compute the size or scaling factors to compare the two BAM files " - help="The default is fine. Only change it if you know what you are doing" /> - </when> - <when value="readCount" /> - <when value="own"> - <param name="scaleFactor1" type="float" value="1" - label="Scale factor for treatment"/> - - <param name="scaleFactor2" type="float" value="1" - label="Scale factor for input"/> - </when> - </conditional> - - <conditional name="comparison"> - <param name="type" type="select" - label="How to compare the two files"> - <option value="log2" selected="true">compute log2 of the number of reads ratio</option> - <option value="ratio">compute the ratio of the number of reads</option> - <option value="subtract">compute difference (subtract input from treatment) of the number of reads</option> - </param> - <when value="log2" /> - <when value="ratio" /> - <when value="subtract"> - <conditional name="normalization"> - <param name="type" type="select" label="Normalization method" > - <option value="1x">Normalize coverage to 1x</option> - <option value="rpkm">Normalize to fragments (reads) per kilobase per million (RPKM)</option> - <option value="no">Do not normalize or scale</option> - </param> - <when value="rpkm" /> - <when value="no" /> - <when value="1x"> - <param name="normalizeTo1x" type="integer" value="2150570000" - label="Report normalized coverage to 1x sequenceing depth" - help ="Sequencing depth is defined as the total number of mapped reads * fragment length / effective genome size. To use this option, the effective genome size has to be given. Common values are: mm9: 2150570000, hg19:2451960000, dm3:121400000 and ce10:93260000."/> - </when> - </conditional> - </when> - </conditional> - - <param name="outFileFormat" type="select" label="Coverage file format"> - <option value="bigwig" selected="true">bigwig</option> - <option value="bedgraph">bedgraph</option> - </param> - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - - <param name="smoothLength" type="integer" value="1" optional="true" min="1" - label="Smooth values using the following length (in bp)" - help ="The smooth length defines a window, larger than the bin size, to average the number of reads. For example, if the bin size is set to 20 bp and the smooth length is set to 60 bp, then, for each bin size the average of it and its left and right neighbors is considered. Any value smaller than the bin size will be ignored and no smoothing will be applied."/> - - <param name="region" type="text" value="" - label="Region of the genome to limit the operation to" - help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> - - <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" - label="Do not extend paired ends" - help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> - - <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" - label="Ignore duplicates" - help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> - - <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" - label="Minimum mapping quality" - help= "If set, only reads that have a mapping quality score higher than the given value are considered"/> - - <param name="missingDataAsZero" type="boolean" truevalue="yes" falsevalue="no" checked="True" - label ="Treat missing data as zero" - help ="This parameter determines if missing data should be treated as zeros. If unchecked, missing data will be ignored and not included in the output file. Missing data is defined as those regions for which both BAM files have 0 reads." /> - - </when> - </conditional> - - </inputs> - <outputs> - <data format="bigwig" name="outFileName"> - <change_format> - <when input="outFileFormat" value="bigwig" format="bigwig" /> - <when input="outFileFormat" value="bedgraph" format="bedgraph" /> - </change_format> - </data> - </outputs> - <help> - -**What it does** - -This tool compares two BAM files based on the number of mapped reads. To -compare the BAM files the genome is partitioned into bins of equal size, then -the number of reads found in each BAM file are counted for such bins and -finally a summarizing value is reported. This vaule can be the ratio of the -number of reads per bin, the log2 of the ratio or the difference. This tool -can normalize the number of reads on each BAM file using the SES method -proposed by Diaz et al. (2012). "Normalization, bias correction, and peak -calling for ChIP-seq". Statistical applications in genetics and molecular -biology, 11(3). Normalization based on read counts is also available. The -output is either a bedgraph or a bigwig file containing the bin location and -the resulting comparison values. By default if reads are mated the fragment -length reported in the BAM file is used. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - - </help> - -</tool>
--- a/bamCorrelate.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,167 +0,0 @@ -<tool id="bamCorrelate" name="bamCorrelate" version="1.0"> - <description>corrlates pairs of bam files</description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - </requirements> - <command> - #set files=[] - #set labels=[] - #for $i in $inputs - #set $files += [str($i.bamfile)] - #if str($i.label.value) != "": - #set $labels += ["\"%s\"" % ($i.label.value)] - #else - #set $labels += ["\"%s\"" % ($i.bamfile.name)] - #end if - #end for - bamCorrelate - --bamfiles #echo " ".join($files) - --labels #echo " ".join($labels) - - --fragmentLength $fragmentLength - --corMethod $corMethod - - #set newoutFileName=str($outFileName)+".png" - --plotFile $newoutFileName - - #if $outputOpt.showOutputOpt == "yes" - #if $outputOpt.outFileRawCounts: - --outRawCounts '$outputOpt.outFileRawCounts' - #end if - #if $outputOpt.outFileCorMatrix: - --outFileCorMatrix '$outputOpt.outFileCorMatrix' - #end if - #end if - - #if $advancedOpt.showAdvancedOpt == "yes": - #if $advancedOpt.smoothLength: - --smoothLength '$advancedOpt.smoothLength' - #end if - - #if str($advancedOpt.region.value) != '': - --region '$advancedOpt.region' - #end if - - --binSize '$advancedOpt.binSize' - --numberOfSamples '$advancedOpt.numberOfSamples' - - $advancedOpt.doNotExtendPairedEnds - $advancedOpt.ignoreDuplicates - $advancedOpt.includeZeros - - #if $advancedOpt.minMappingQuality: - --minMappingQuality '$advancedOpt.minMappingQuality' - #end if - #end if - - --numberOfProcessors 4; mv $newoutFileName $outFileName - </command> - - <inputs> - - <repeat name="inputs" title="Input files" min="2"> - <param name="bamfile" type="data" format="bam" - label="Bam file" - help="The BAM file must be sorted and indexed."/> - <param name="label" type="text" size="30" optional="true" value="" - label="Label" - help="Label to use in the output. If not given the dataset name will be used instead."/> - </repeat> - - <param name="fragmentLength" type="integer" value="300" min="1" - label="Length of the average fragment size" - help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. If this value is set to the read length or smaller, the read will not be extended. *Warning* the fragment length affects the normalization to 1x (see "normalize coverage to 1x"). The formula to normalize using the sequencing depth is genomeSize/(number of mapped reads * fragment length). *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> - - <param name="corMethod" type="select" label="Correlation method"> - <option value="pearson">Pearson</option> - <option value="spearman">Spearman</option> - </param> - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="smoothLength" type="integer" value="1" optional="true" min="1" - label="Smooth values using the following length (in bp)" - help ="The smooth length defines a window, larger than the bin size, to average the number of reads. For example, if the bin size is set to 20 bp and the smooth length is set to 60 bp, then, for each bin size the average of it and its left and right neighbors is considered. Any value smaller than the bin size will be ignored and no smoothing will be applied."/> - - <param name="region" type="text" value="" - label="Region of the genome to limit the operation to" - help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> - - <param name="binSize" type="integer" value="10000" min="1" - label="Bin size in bp" - help="Length in base pairs for a window used to sample the genome."/> - - <param name="numberOfSamples" type="integer" value="100000" min="1" - label="Number of samples" - help="Number of samples taken from the genome to compute the scaling factors"/> - - <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" - label="Do not extend paired ends" - help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> - - <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" - label="Ignore duplicates" - help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> - - <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" - label="Minimum mapping quality" - help= "If set, only reads that have a mapping quality score higher than the given value are considered"/> - - <param name="includeZeros" type="boolean" truevalue="--includeZeros" falsevalue="" - label ="Include zeros" - help ="If set, then zero counts that happen for *all* bam files given are included. The default behavior is to ignore those cases" /> - - </when> - </conditional> - - <conditional name="outputOpt"> - <param name="showOutputOpt" type="select" label="Show additional output options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="saveRawCounts" type="boolean" label="Save the bin counts"/> - <param name="saveCorMatrix" type="boolean" label="Save the correlation matrix"/> - </when> - </conditional> - - </inputs> - <outputs> - <data format="png" name="outFileName" /> - <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts"> - <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter> - </data> - <data format="tabular" name="outFileCorMatrix" label="${tool.name} on ${on_string}: correlation matrix"> - <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)</filter> - </data> - </outputs> - <help> - -**What it does** - -Genomes are split into bins of given length. For each bin the number of reads -found for each of the bam files is counted. A correlation is computed for all -pairs of bam files. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - - </help> - -</tool>
--- a/bamCoverage.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,143 +0,0 @@ -<tool id="bamCoverage" name="bamCoverage" version="1.0"> - <description>Given a BAM file, generates a coverage bigwig file. Multiple options available to count reads and normalize coverage.</description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - <requirement type="package" version="0.1">ucsc_tools</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - </requirements> - <command>bamCoverage - --bam '$bamInput' - --bamIndex ${bamInput.metadata.bam_index} - --outFileName '$outFileName' - --outFileFormat '$outFileFormat' - - --fragmentLength $fragmentLength - --binSize $binSize - - - #if $scaling.type=='rpkm': - --normalizeUsingRPKM - #elif $scaling.type=='1x': - --normalizeTo1x $scaling.normalizeTo1x - #elif $scaling.type=='own': - --scaleFactor $scaling.scaleFactor - #end if - - #if $advancedOpt.showAdvancedOpt == "yes": - #if $advancedOpt.smoothLength: - --smoothLength '$advancedOpt.smoothLength' - #end if - - #if str($advancedOpt.region.value) != '': - --region '$advancedOpt.region' - #end if - $advancedOpt.doNotExtendPairedEnds - $advancedOpt.ignoreDuplicates - - #if $advancedOpt.minMappingQuality: - --minMappingQuality '$advancedOpt.minMappingQuality' - #end if - - #end if - --numberOfProcessors 4 - </command> - - <inputs> - <param name="bamInput" format="bam" type="data" label="Input BAM file" - help="The BAM file must be sorted and indexed."/> - - <param name="fragmentLength" type="integer" value="300" min="1" - label="Length of the average fragment size" - help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. If this value is set to the read length or smaller, the read will not be extended. *Warning* the fragment length affects the normalization to 1x (see "normalize coverage to 1x"). The formula to normalize using the sequencing depth is genomeSize/(number of mapped reads * fragment length). *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> - - <param name="binSize" type="integer" value="50" min="1" - label="Bin size in bp" - help="The genome will be divided in bins (also called tiles) of the specified length. For each bin the overlaping number of fragments (or reads) will be reported. If only half a fragment overlaps, this fraction will be reported. "/> - - <conditional name="scaling"> - <param name="type" type="select" label="Scaling/Normalization method" > - <option value="1x">Normalize coverage to 1x</option> - <option value="rpkm">Normalize to fragments (reads) per kilobase per million (RPKM)</option> - <option value="own">Set your own scaling factor</option> - <option value="no">Do not normalize or scale</option> - </param> - <when value="rpkm"/> - <when value="no"/> - <when value="1x"> - <param name="normalizeTo1x" type="integer" value="2150570000" - label="Genome size" - help ="Enter the genome size to normalize the reads counts. Sequencing depth is defined as the total number of mapped reads * fragment length / effective genome size. To use this option, the effective genome size has to be given. Common values are: mm9: 2150570000, hg19:2451960000, dm3:121400000 and ce10:93260000."/> - </when> - <when value="own"> - <param name="scaleFactor" type="float" value="1" size="3" - label="Scale factor to multiply all values" /> - </when> - </conditional> - - <param name="outFileFormat" type="select" label="Coverage file format"> - <option value="bigwig" selected="true">bigwig</option> - <option value="bedgraph">bedgraph</option> - </param> - - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - - <param name="smoothLength" type="integer" value="1" optional="true" min="1" - label="Smooth values using the following length (in bp)" - help ="The smooth length defines a window, larger than the bin size, to average the number of reads. For example, if the bin size is set to 20 bp and the smooth length is set to 60 bp, then, for each bin size the average of it and its left and right neighbors is considered. Any value smaller than the bin size will be ignored and no smoothing will be applied."/> - - <param name="region" type="text" value="" - label="Region of the genome to limit the operation to" - help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> - - <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" - label="Do not extend paired ends" - help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> - - <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" - label="Ignore duplicates" - help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> - - <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" - label="Minimum mapping quality" - help= "If set, only reads that have a mapping quality score higher than the given value are considered"/> - </when> - </conditional> - - </inputs> - <outputs> - <data format="bigwig" name="outFileName"> - <change_format> - <when input="outFileFormat" value="bigwig" format="bigwig" /> - <when input="outFileFormat" value="bedgraph" format="bedgraph" /> - </change_format> - </data> - </outputs> - <help> - -**What it does** - -Given a BAM file, this tool generates a bigWig or bedGraph file of fragment or read coverages. The way the method works is by first calculating all the number of reads (either extended to match the fragment length or not) that overlap each bin in the genome. Bins with zero counts are skipped, i.e. not added to the output file. The resulting read counts can be normalized using either a given scaling factor, the RPKM formula or to get a 1x depth of coverage (RPGC). - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - -</help> - -</tool>
--- a/bamFingerprint.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,151 +0,0 @@ -<tool id="bamFingerprint" name="bamFingerprint" version="1.0"> - <description>plots profiles of bam files</description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - </requirements> - <command> - #set files=[] - #set labels=[] - #for $i in $inputs - #set $files += [str($i.bamfile)] - #if str($i.label.value) != "": - #set $labels += ["\"%s\"" % ($i.label.value)] - #else - #set $labels += ["\"%s\"" % ($i.bamfile.name)] - #end if - #end for - bamFingerprint - --bamfiles #echo " ".join($files) - --labels #echo " ".join($labels) - - --fragmentLength $fragmentLength - - #set newoutFileName=str($outFileName)+".png" - --plotFile $newoutFileName - - #if $outputOpt.showOutputOpt == "yes" - #if $outputOpt.saveRawCounts: - --outRawCounts '$outFileRawCounts' - #end if - #end if - - #if $advancedOpt.showAdvancedOpt == "yes": - #if $advancedOpt.smoothLength: - --smoothLength '$advancedOpt.smoothLength' - #end if - - #if str($advancedOpt.region.value) != '': - --region '$advancedOpt.region' - #end if - - --binSize '$advancedOpt.binSize' - --numberOfSamples '$advancedOpt.numberOfSamples' - - $advancedOpt.doNotExtendPairedEnds - $advancedOpt.ignoreDuplicates - $advancedOpt.skipZeros - - #if $advancedOpt.minMappingQuality: - --minMappingQuality '$advancedOpt.minMappingQuality' - #end if - #end if - - --numberOfProcessors 4; mv $newoutFileName $outFileName - </command> - - <inputs> - - <repeat name="inputs" title="Input files" min="2"> - <param name="bamfile" type="data" format="bam" - label="Bam file" - help="The BAM file must be sorted and indexed."/> - <param name="label" type="text" size="30" optional="true" value="" - label="Label" - help="Label to use in the output. If not given the dataset name will be used instead."/> - </repeat> - - <param name="fragmentLength" type="integer" value="200" min="1" - label="Length of the average fragment size"/> - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="smoothLength" type="integer" value="1" optional="true" min="1" - label="Smooth values using the following length (in bp)" - help ="The smooth length defines a window, larger than the bin size, to average the number of reads. For example, if the bin size is set to 20 bp and the smooth length is set to 60 bp, then, for each bin size the average of it and its left and right neighbors is considered. Any value smaller than the bin size will be ignored and no smoothing will be applied."/> - - <param name="region" type="text" value="" - label="Region of the genome to limit the operation to" - help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> - - <param name="binSize" type="integer" value="10000" min="1" - label="Bin size in bp" - help="Length in base pairs for a window used to sample the genome."/> - - <param name="numberOfSamples" type="integer" value="100000" min="1" - label="Number of samples" - help="Number of samples taken from the genome to compute the scaling factors"/> - - <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" - label="Do not extend paired ends" - help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> - - <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" - label="Ignore duplicates" - help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> - - <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" - label="Minimum mapping quality" - help= "If set, only reads that have a mapping quality score higher than the given value are considered"/> - - <param name="skipZeros" type="boolean" truevalue="--skipZeros" falsevalue="" - label ="Include zeros" - help ="If set, then zero counts that happen for *all* bam files given are ignored. This will result in a reduced number of read counts than the specified in number of samples" /> - </when> - </conditional> - - <conditional name="outputOpt"> - <param name="showOutputOpt" type="select" label="Show additional output options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="saveRawCounts" type="boolean" label="Save the bin counts"/> - </when> - </conditional> - </inputs> - <outputs> - <data format="png" name="outFileName" /> - <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts"> - <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter> - </data> - </outputs> - <help> - -**What it does** - -Samples indexed bam files and plots a profile for each bam file. At each -sample position all reads overlaping a window (bin) of specified length are -counted. This counts are then sorted and the cumulative sum plotted - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - - </help> - -</tool>
--- a/bigwigCompare.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,116 +0,0 @@ -<tool id="bigwigCompare" name="bigwigCompare" version="1.0"> - <description>compares two bigwig files based on the number of mapped reads</description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - <requirement type="package" version="0.1">ucsc_tools</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - </requirements> - <command> - bigwigCompare - --bigwig1 '$bigwigFile1' - --bigwig2 '$bigwigFile2' - - --outFileName '$outFileName' - --outFileFormat '$outFileFormat' - - --ratio $comparison_type - - #if $advancedOpt.showAdvancedOpt == "yes": - #if str($advancedOpt.region.value) != '': - --region '$advancedOpt.region' - #end if - - --missingDataAsZero $advancedOpt.missingDataAsZero - --scaleFactors '$advancedOpt.scaleFactor1:$advancedOpt.scaleFactor2' - --pseudocount '$advancedOpt.pseudocount' - --binSize $advancedOpt.binSize - - #end if - --numberOfProcessors 4 - - - </command> - - <inputs> - <param name="bigwigFile1" format="bigwig" type="data" label="Treatment bigwig file" /> - - <param name="bigwigFile2" format="bigwig" type="data" label="Input bigwig file" /> - - <param name="comparison_type" type="select" - label="How to compare the two files" - help="The reciprocal ratio returns the negative of the inverse of the ratio if the ratio is less than 0. The resulting values are interpreted as negative fold changes." > - <option value="log2" selected="true">log2 ratio</option> - <option value="ratio">simple ratio</option> - <option value="subtract">difference (subtract input from treatment)</option> - <option value="add">sum</option> - <option value="reciprocal_ratio">reciprocal ratio</option> - </param> - - <param name="outFileFormat" type="select" label="Coverage file format"> - <option value="bigwig" selected="true">bigwig</option> - <option value="bedgraph">bedgraph</option> - </param> - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - - <param name="binSize" type="integer" value="50" min="1" - label="Bin size in bp" - help="Size of the bins in bp for the ouput of the bigwig/bedgraph file "/> - - <param name="region" type="text" value="" - label="Region of the genome to limit the operation to" - help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> - - <param name="missingDataAsZero" type="boolean" truevalue="yes" falsevalue="no" checked="True" - label ="Treat missing data as zero" - help ="This parameter determines if missing data should be replaced with a zero. If set to "no", missing data will be ignored and will not be included in the output file at all. Missing data is defined as those regions for which no value exists in *any* of the bigwig files. The decision to include or exclude missing data depends on the interpretation of the data. Missing data in a bigwig file may mean that there is no information available for certain regions, for example a repetitive region that is not being considered. In the same file regions with low coverage may get zero read counts. If missing data is replaced by zero, this would convert the excluded repetitive regions into regions of low coverage." /> - - <param name="scaleFactor1" type="float" value="1" label="Scale factor for treatment"/> - <param name="scaleFactor2" type="float" value="1" label="Scale factor for input"/> - <param name="pseudocount" type="float" value="1" label="Pseudocount" help="Small number to avoid dividing by zero."/> - - </when> - </conditional> - - </inputs> - <outputs> - <data format="bigwig" name="outFileName"> - <change_format> - <when input="outFileFormat" value="bigwig" format="bigwig" /> - <when input="outFileFormat" value="bedgraph" format="bedgraph" /> - </change_format> - </data> - </outputs> - - <help> - -**What it does** - -This tool compares two bigwig files based on the number of mapped reads. To -compare the bigwig files the genome is partitioned into bins of equal size, -then the number of reads found in each BAM file are counted for such bins and -finally a summarizing value is reported. This vaule can be the ratio of the -number of readsper bin, the log2 of the ratio, the sum or the difference. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - - </help> - -</tool>
--- a/computeGCBias.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -<tool id="computeGCBias" name="computeGCBias" version="1.0"> - <description></description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - </requirements> - <stdio> - <exit_code range="0" level="warning" description="Warning" /> - </stdio> - <command> - computeGCBias - --bamfile '$bamInput' - --species '$species' - --GCbiasFrequenciesFile $outFileName - --fragmentLength $fragmentLength - - #if $source.ref_source=="history": - --genome $source.input1 - #else: - --genome "${source.input1_2bit.fields.path}" - #end if - - #if $advancedOpt.showAdvancedOpt == "yes": - #if str($advancedOpt.region.value) != '': - --region '$advancedOpt.region' - #end if - - --binSize '$advancedOpt.binSize' - --sampleSize '$advancedOpt.sampleSize' - --regionSize '$advancedOpt.regionSize' - - #if $advancedOpt.filterOut: - --filterOut $advancedOpt.filterOut - #end if - - #if $advancedOpt.extraSampling: - --extraSampling $advancedOpt.extraSampling - #end if - - #end if - - #set move="" - #if $output.showOutputSettings == "yes" - #if $output.saveBiasPlot: - --biasPlot biasPlot.png - #set move="mv biasPlot.png $biasPlot" - #end if - #end if - ; $move - - </command> - - <inputs> - - <param name="bamInput" format="bam" type="data" label="Input BAM file" - help="The BAM file must be sorted and indexed."/> - - <param name="species" type="text" value="" label="Species name abbreviation" /> - - <conditional name="source"> - <param name="ref_source" type="select" label="Reference genome"> - <option value="cached">locally cached</option> - <option value="history">in your history</option> - </param> - <when value="cached"> - <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> - <options from_data_table="deepTools_seqs" /> - </param> - </when> - <when value="history"> - <param name="input1" type="data" format="twobit" label="Select a reference dataset in 2bit format" /> - </when> - </conditional> - <param name="fragmentLength" type="integer" value="300" min="1" - label="Fragment length used for the sequencing" - help ="If paired-end reads are used the fragment length is computed based from the bam file."/> - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="region" type="text" value="" - label="Region of the genome to limit the operation to" - help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> - - <param name="binSize" type="integer" value="50" min="1" - label="Bin size in bp" - help="Size of the bins in bp for the ouput of the bigwig/bedgraph file."/> - - <param name="sampleSize" type="integer" value="50000000" min="1" - label="Number of sampling points to be considered" /> - - <param name="regionSize" type="integer" value="300" min="1" - label="Region size" - help ="To plot the reads per GC over a region the size of the region is required. By default, the bin size is set to 300bp, which is close to the standard fragment size for Illumina machines. However, if the depth of sequencing is low a larger bin size will be required, otherwise many bins will not overlap with any read."/> - - <param name="filterOut" type="data" format="bed" optional="true" - label="BED file containing genomic regions to be excluded from the estimation of the correction" - help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." /> - <param name="extraSampling" type="data" format="bed" optional="true" - label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome" - help="" /> - </when> - </conditional> - - <conditional name="output" > - <param name="showOutputSettings" type="select" label="Show additional output options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="saveBiasPlot" type="boolean" label="Save a diagnostic image summarizing the GC bias found on the sample"/> - </when> - </conditional> - </inputs> - <outputs> - <data format="tabular" name="outFileName" /> - <data format="png" name="biasPlot" label="${tool.name} on ${on_string}: bias plot"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)</filter> - </data> - </outputs> - <help> - -**What it does** - -Computes the GC bias ussing Benjamini's method [citation]. The resulting GC -bias can later be used to plot the bias or to correct the bias. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - - </help> - -</tool>
--- a/computeMatrix.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,208 +0,0 @@ -<tool id="computeMatrix" name="computeMatrix" version="1.0"> - <description>summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile</description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - </requirements> - <command> - computeMatrix - $mode.mode_select - --regionsFileName '$regionsFile' - --scoreFileName '$scoreFile' - --outFileName '$outFileName' - - #if $output.showOutputSettings == "yes" - #if $output.saveData: - --outFileNameData '$outFileNameData' - #end if - #if $output.saveMatrix: - --outFileNameMatrix '$outFileNameMatrix' - #end if - - #if $output.saveSortedRegions: - --outFileSortedRegions '$outFileSortedRegions' - #end if - #end if - - #if $mode.mode_select == "reference-point": - --referencePoint $mode.referencePoint - $mode.nanAfterEnd - --beforeRegionStartLength $mode.beforeRegionStartLength - --afterRegionStartLength $mode.afterRegionStartLength - #else - --regionBodyLength $mode.regionBodyLength - --startLabel $mode.startLabel - --endLabel $mode.endLabel - #if $mode.regionStartLength.regionStartLength_select == "yes": - --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength - --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength - #end if - #end if - - #if $advancedOpt.showAdvancedOpt == "yes": - --sortRegions '$advancedOpt.sortRegions' - --sortUsing '$advancedOpt.sortUsing' - --averageTypeBins '$advancedOpt.averageTypeBins' - $advancedOpt.missingDataAsZero - $advancedOpt.skipZeros - $advancedOpt.binSize - - #if $advancedOpt.minThreshold: - --minThreshold $advancedOpt.minThreshold - #end if - #if $advancedOpt.maxThreshold: - --maxThreshold $advancedOpt.maxThreshold - #end if - #if $advancedOpt.scale: - --scale $advancedOpt.scale - #end if - - #end if - --numberOfProcessors 4 - </command> - <inputs> - <param name="regionsFile" format="bed,gff" type="data" label="Regions to plot" help="File, in BED or GFF format, containing the regions to plot."/> - <param name="scoreFile" format="bigwig,bam" type="data" label="Score file" help="Either a bigWig file (containing a score, usually covering the whole genome) or a BAM file. For this last case, coverage counts will be used for the heatmap."/> - - <conditional name="mode" > - <param name="mode_select" type="select" label="computeMatrix has two main output options" help="In the scale-regions mode, all regions in the BED/GFF file are stretched or shrunk to the same length (bp) that is indicated by the user. Reference-point refers to a position within the BED/GFF regions (e.g start of region). In the reference-point mode only those genomic positions before (downstream) and/or after (upstream) the reference point will be plotted."> - <option value="scale-regions" selected="true">scale-regions</option> - <option value="reference-point">reference-point</option> - </param> - - <when value="scale-regions" > - <param name="regionBodyLength" type="integer" value="500" label="Distance in bp to which all regions are going to be fitted"/> - <param name="startLabel" type="text" value="TSS" size="10" label="Label for the region start" help ="Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. "peak start"." /> - <param name="endLabel" type="text" value="TES" size="10" label="Label for the region end" help="Label shown in the plot for the region end. Default is TES (transcription end site)."/> - <conditional name="regionStartLength"> - <param name="regionStartLength_select" type="select" label="Set distance up- and downstream of the given regions"> - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="beforeRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/> - - <param name="afterRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/> - </when> - </conditional> - </when> - - <when value="reference-point"> - <param name="referencePoint" type="select" label="The reference point for the plotting"> - <option value="TSS" selected="true">region start (TSS)</option> - <option value="TES" selected="true">region end (TES)</option> - <option value="center" selected="true">center of the region</option> - </param> - <param name="nanAfterEnd" type="boolean" truevalue="--nanAfterEnd" falsevalue="" label="Discard any values after the region end" help="This is useful to visualize the region end when not using the scale-regions mode and when the reference-point is set to the TSS."/> - <param name="beforeRegionStartLength" type="integer" value="1000" min="1" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/> - - <param name="afterRegionStartLength" type="integer" value="1000" min="1" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/> - </when> - </conditional> - - <conditional name="output" > - <param name="showOutputSettings" type="select" label="Show additional output options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="saveData" type="boolean" label="Save the averages per matrix column into a text file" help="This corresponds to the underlying data used to plot a summary profile."/> - <param name="saveMatrix" type="boolean" label="Save the matrix of values underlying the heatmap" help="This matrix can easily be loaded into R or other programs."/> - <param name="saveSortedRegions" type="boolean" label="Save the regions after skiping zeros or min/max threshold values" help="The order of the regions in the file follows the sorting order selected. This is useful, for example, to generate other heatmaps keeping the sorting of the first heatmap."/> - </when> - </conditional> - - - <conditional name="advancedOpt" > - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - - <param name="binSize" type="integer" value="100" min="1" optional="true" label="Length, in base pairs, of the non-overlapping bin for averaging the score over the regions length" /> - - <param name="sortRegions" type="select" label="Sort regions" - help="Whether the output file should present the regions sorted."> - <option value="no" selected="true">no ordering</option> - <option value="descend">descending order</option> - <option value="ascend">ascending order</option> - </param> - - <param name="sortUsing" type="select" label="Method used for sorting." help="The value is computed for each row." > - <option value="mean" selected="true">mean</option> - <option value="median">median</option> - <option value="min">min</option> - <option value="max">max</option> - <option value="sum">sum</option> - <option value="region_length">region length</option> - </param> - - <param name="averageTypeBins" type="select" label="Define the type of statistic that should be used over the bin size range"> - <option value="mean" selected="true">mean</option> - <option value="median">median</option> - <option value="min">min</option> - <option value="max">max</option> - <option value="sum">sum</option> - <option value="std">std</option> - </param> - - <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" label="Indicate missing data as zero" help="Only for bigwig input! Set to "yes", if missing data should be indicated as zeros. Default is to ignore such cases which will be depicted as black areas in the heatmap. (see "Missing data color" options of the heatmapper for additional options)."/> - - <param name="skipZeros" type="boolean" truevalue="--skipZeros" falsevalue="" label="Skip zeros" help="Whether regions with only scores of zero should be included or not. Default is to include them."/> - - <param name="minThreshold" type="float" optional="true" label="Minimum threshold" help="Any region containing a value that is equal or less than this numeric value will be skipped. This is useful to skip, for example, genes where the read count is zero for any of the bins. This could be the result of unmappable areas and can bias the overall results."/> - <param name="maxThreshold" type="float" optional="true" label="Maximum threshold" help="Any region containing a value that is equal or higher that this numeric value will be skipped. The max threshold is useful to skip those few regions with very high read counts (e.g. major satellites) that may bias the average values."/> - <param name="scale" type="float" optional="true" label="Scale" help="If set, all values are multiplied by this number."/> - </when> - </conditional> - - </inputs> - <outputs> - <data format="bgzip" name="outFileName" label="${tool.name} on ${on_string}: matrix"> - </data> - <data format="tabular" name="outFileNameData" label="${tool.name} on ${on_string}: raw data"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveData'] == True)</filter> - </data> - <data format="tabular" name="outFileNameMatrix" label="${tool.name} on ${on_string}: matrix of values"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)</filter> - </data> - <data format="bed" name="outFileSortedRegions" label="${tool.name} on ${on_string}: sorted/filtered regions"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)</filter> - </data> - </outputs> - <!-- - computeMatrix -S test.bw -R test2.bed -a 100 -b 100 -bs 1 - --> - <tests> - <test> - <param name="regionsFile" value="test2.bed" ftype="bed" /> - <param name="scoreFile" value="test.bw" ftype="bigwig" /> - <param name="advancedOpt.binSize" value="1" /> - <param name="mode.beforeRegionStartLength" value="100" /> - <param name="mode.afterRegionStartLength" value="100" /> - <output name="outFileName" file="master.mat.gz" ftype="bgzip" compare="sim_size" delta="100" /> - </test> - </tests> - <help> -**What it does** - -This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or GFF format can be used. This tool can also be used to filter and sort regions according to their score. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - </help> - -</tool>
--- a/correctGCBias.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -<tool id="correctGCBias" name="correctGCBias" version="1.0"> - <description> - </description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - <requirement type="package" version="0.1">ucsc_tools</requirement> - </requirements> - <command> - correctGCBias - --bamfile '$bamInput' - --species '$species' - --GCbiasFrequenciesFile $GCbiasFrequenciesFile - - #if $source.ref_source=="history": - --genome $source.input1 - #else: - --genome "${source.input1_2bit.fields.path}" - #end if - - #if $advancedOpt.showAdvancedOpt == "yes": - #if str($advancedOpt.region.value) != '': - --region '$advancedOpt.region' - #end if - - --binSize '$advancedOpt.binSize' - #end if - - #set newoutFileName="corrected."+str($outFileFormat) - - --correctedFile $newoutFileName; mv $newoutFileName $outFileName - - </command> - - <inputs> - - <param name="GCbiasFrequenciesFile" type="data" format="tabular" label="Output of computeGCBias" /> - - <param name="bamInput" format="bam" type="data" label="Input BAM file" help="The BAM file must be sorted and indexed."/> - - <param name="species" type="text" value="" label="Species name abbreviation" /> - - <conditional name="source"> - <param name="ref_source" type="select" label="Reference genome"> - <option value="cached">locally cached</option> - <option value="history">in your history</option> - </param> - <when value="cached"> - <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact your Galaxy team"> - <options from_data_table="deepTools_seqs" /> - </param> - </when> - <when value="history"> - <param name="input1" type="data" format="twobit" label="Select a reference dataset in 2bit format" /> - </when> - </conditional> - - <param name="outFileFormat" type="select" label="File format of the output"> - <option value="bam">bam</option> - <option value="bw">bigwig</option> - <option value="bg">bedgraph</option> - </param> - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="region" type="text" value="" - label="Region of the genome to limit the operation to" - help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> - - <param name="binSize" type="integer" value="50" min="1" - label="Bin size in bp" - help="Size of the bins in bp for the ouput of the bigwig/bedgraph file."/> - </when> - </conditional> - </inputs> - - <outputs> - <data format="bam" name="outFileName"> - <change_format> - <when input="outFileFormat" value="bw" format="bigwig" /> - <when input="outFileFormat" value="bam" format="bam" /> - <when input="outFileFormat" value="bg" format="bedgraph" /> - </change_format> - </data> - </outputs> - <help> - -**What it does** - -Computes the GC bias ussing Benjamini's method [citation]. The resulting GC -bias can later be used to plot the bias or to correct the bias. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - - </help> - -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,11 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="homer.py"/> + </datatype_files> + <registration> + <datatype extension="homer_tagdir" type="galaxy.datatypes.homer:TagDirectory" mimetype="text/html" display_in_upload="false"/> + </registration> + <sniffers> + </sniffers> +</datatypes>
--- a/heatmapper.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,370 +0,0 @@ -<tool id="heatmapper" name="Heatmapper" version="1.0"> - <description>creates a heatmap for a score associated to genomic regions</description> - - <requirements> - <requirement type="package" version="0.1.18">samtools</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="1.2.1">matplotlib</requirement> - <requirement type="package" version="0.12.0">scipy</requirement> - <requirement type="package" version="0.1">ucsc_tools</requirement> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - </requirements> - - <command> - heatmapper - --matrixFile $matrixFile - #if $output.showOutputSettings == "yes" - #set newoutFileName=str($outFileName)+"."+str($output.outFileFormat) - --outFileName $newoutFileName - #if $outFileNameData: - --outFileNameData '$outFileNameData' - #end if - - #if $outFileNameMatrix: - --outFileNameMatrix '$outFileNameMatrix' - #end if - - #if $outFileSortedRegions: - --outFileSortedRegions '$outFileSortedRegions' - #end if - #else - #set newoutFileName=str($outFileName)+".png" - --outFileName $newoutFileName - #end if - - #if $advancedOpt.showAdvancedOpt == "yes" - #if $advancedOpt.sortRegions: - --sortRegions '$advancedOpt.sortRegions' - #end if - - #if $advancedOpt.sortUsing: - --sortUsing '$advancedOpt.sortUsing' - #end if - - #if $advancedOpt.averageTypeSummaryPlot: - --averageTypeSummaryPlot '$advancedOpt.averageTypeSummaryPlot' - #end if - - #if str($advancedOpt.missingDataColor.value) != "None": - --missingDataColor '$advancedOpt.missingDataColor' - #end if - - --colorMap '$advancedOpt.colorMap' - - #if $advancedOpt.zMin: - --zMin $advancedOpt.zMin - #end if - #if $advancedOpt.zMax: - --zMax $advancedOpt.zMax - #end if - - #if $advancedOpt.yMin: - --yMin $advancedOpt.yMin - #end if - #if $advancedOpt.yMax: - --yMax $advancedOpt.yMax - #end if - - --xAxisLabel '$advancedOpt.xAxisLabel' - --yAxisLabel '$advancedOpt.yAxisLabel' - - --heatmapWidth $advancedOpt.heatmapWidth - --heatmapHeight $advancedOpt.heatmapHeight - - --whatToShow '$advancedOpt.whatToShow' - - --startLabel '$advancedOpt.startLabel' - --endLabel '$advancedOpt.endLabel' - --refPointLabel '$advancedOpt.referencePointLabel' - --regionsLabel '$advancedOpt.regionsLabel' - - #if str($advancedOpt.plotTitle.value) != "None": - --plotTitle '$advancedOpt.plotTitle' - #end if - - $advancedOpt.onePlotPerGroup - #end if - ; mv $newoutFileName $outFileName - </command> - <inputs> - <param name="matrixFile" format="bgzip" type="data" label="Matrix file from the computeMatrix tool"/> - - <conditional name="output" > - <param name="showOutputSettings" type="select" label="Show advanced output settings" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="outFileFormat" type="select" label="Image file format"> - <option value="png" selected="true">png</option> - <option value="pdf">pdf</option> - <option value="svg">svg</option> - <option value="eps">eps</option> - <option value="emf">emf</option> - </param> - <param name="saveData" type="boolean" label="Save the data underlying data for the average profile"/> - <param name="saveMatrix" type="boolean" label="Save the the matrix of values underlying the heatmap"/> - <param name="saveSortedRegions" type="boolean" label="Save the regions after skipping zeros or min/max threshold values" help="The order of the regions in the file follows the sorting order selected. This is useful, for example, to generate other heatmaps keeping the sorting of the first heatmap."/> - </when> - </conditional> - - <conditional name="advancedOpt" > - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="sortRegions" type="select" label="Sort regions" - help="Whether the heatmap should present the regions sorted. The default is to sort in descending order based on the mean value per region."> - <option value="no" selected="true">no ordering</option> - <option value="descend">descending order</option> - <option value="ascend">ascending order</option> - </param> - - <param name="sortUsing" type="select" label="Method used for sorting" help="For each row the method is computed." > - <option value="mean" selected="true">mean</option> - <option value="median">median</option> - <option value="min">min</option> - <option value="max">max</option> - <option value="sum">sum</option> - <option value="region_length">region length</option> - </param> - - <param name="averageTypeSummaryPlot" type="select" label="Type of statistic that should be plotted in the summary image above the heatmap"> - <option value="mean" selected="true">mean</option> - <option value="median">median</option> - <option value="min">min</option> - <option value="max">max</option> - <option value="sum">sum</option> - <option value="std">std</option> - </param> - - <param name="missingDataColor" type="text" label="Missing data color" value="black" optional="true" help="If 'Represent missing data as zero' is not set, such cases will be colored in black by default. By using this parameter a different color can be set. A value between 0 and 1 will be used for a gray scale (black is 0). Also color names can be used, see a list here: http://packages.python.org/ete2/reference/reference_svgcolors.html. Alternatively colors can be specified using the #rrggbb notation." /> - - <param name="colorMap" type="select" label="Color map to use for the heatmap" help=" Available values can be seen here: http://www.astro.lsa.umich.edu/~msshin/science/code/matplotlib_cm/"> - <option value="RdYlBu" selected="true">RdYlBu</option> - <option value="Accent">Accent</option> - <option value="Spectral">Spectral</option> - <option value="Set1">Set1</option> - <option value="Set2">Set2</option> - <option value="Set3">Set3</option> - <option value="Dark2">Dark2</option> - <option value="Reds">Reds</option> - <option value="Oranges">Oranges</option> - <option value="Greens">Greens</option> - <option value="Blues">Blues</option> - <option value="Greys">Greys</option> - <option value="Purples">Purples</option> - <option value="Paired">Paired</option> - <option value="Pastel1">Pastel1</option> - <option value="Pastel2">Pastel2</option> - <option value="spring">spring</option> - <option value="summer">summer</option> - <option value="autumn">autumn</option> - <option value="winter">winter</option> - <option value="hot">hot</option> - <option value="coolwarm">coolwarm</option> - <option value="cool">cool</option> - <option value="seismic">seismic</option> - <option value="terrain">terrain</option> - <option value="ocean">ocean</option> - <option value="rainbow">rainbow</option> - <option value="bone">bone</option> - <option value="flag">flag</option> - <option value="prism">prism</option> - <option value="cubehelix">cubehelix</option> - <option value="binary">binary</option> - <option value="pink">pink</option> - <option value="gray">gray</option> - <option value="copper">copper</option> - <option value="BrBG">BrBG</option> - <option value="BuGn">BuGn</option> - <option value="BuPu">BuPu</option> - <option value="GnBu">GnBu</option> - <option value="OrRd">OrRd</option> - <option value="PiYG">PiYG</option> - <option value="PRGn">PRGn</option> - <option value="PuOr">PuOr</option> - <option value="PuRd">PuRd</option> - <option value="PuBu">PuBu</option> - <option value="RdBu">RdBu</option> - <option value="RdGy">RdGy</option> - <option value="RdPu">RdPu</option> - <option value="YlGn">YlGn</option> - <option value="PuBuGn">PuBuGn</option> - <option value="RdYlGn">RdYlGn</option> - <option value="YlGnBu">YlGnBu</option> - <option value="YlOrBr">YlOrBr</option> - <option value="YlOrRd">YlOrRd</option> - <option value="gist_gray">gist_gray</option> - <option value="gist_stern">gist_stern</option> - <option value="gist_earth">gist_earth</option> - <option value="gist_yarg">gist_yarg</option> - <option value="gist_ncar">gist_ncar</option> - <option value="gist_rainbow">gist_rainbow</option> - <option value="gist_heat">gist_heat</option> - <option value="gnuplot">gnuplot</option> - <option value="gnuplot2">gnuplot2</option> - <option value="CMRmap">CMRmap</option> - <option value="bwr">bwr</option> - <option value="hsv">hsv</option> - <option value="brg">brg</option> - <option value="jet">jet</option> - <option value="afmhot">afmhot</option> - - <option value="Accent_r">Accent reversed</option> - <option value="Spectral_r">Spectral reversed</option> - <option value="Set1_r">Set1 reversed</option> - <option value="Set2_r">Set2 reversed</option> - <option value="Set3_r">Set3 reversed</option> - <option value="Dark2_r">Dark2 reversed</option> - <option value="Reds_r">Reds reversed</option> - <option value="Oranges_r">Oranges reversed</option> - <option value="Greens_r">Greens reversed</option> - <option value="Blues_r">Blues reversed</option> - <option value="Greys_r">Greys reversed</option> - <option value="Purples_r">Purples reversed</option> - <option value="Paired_r">Paired reversed</option> - <option value="Pastel1_r">Pastel1 reversed</option> - <option value="Pastel2_r">Pastel2 reversed</option> - <option value="spring_r">spring reversed</option> - <option value="summer_r">summer reversed</option> - <option value="autumn_r">autumn reversed</option> - <option value="winter_r">winter reversed</option> - <option value="hot_r">hot reversed</option> - <option value="coolwarm_r">coolwarm reversed</option> - <option value="cool_r">cool reversed</option> - <option value="seismic_r">seismic reversed</option> - <option value="terrain_r">terrain reversed</option> - <option value="ocean_r">ocean reversed</option> - <option value="rainbow_r">rainbow reversed</option> - <option value="bone_r">bone reversed</option> - <option value="flag_r">flag reversed</option> - <option value="prism_r">prism reversed</option> - <option value="cubehelix_r">cubehelix reversed</option> - <option value="binary_r">binary reversed</option> - <option value="pink_r">pink reversed</option> - <option value="gray_r">gray reversed</option> - <option value="copper_r">copper reversed</option> - <option value="BrBG_r">BrBG reversed</option> - <option value="BuGn_r">BuGn reversed</option> - <option value="BuPu_r">BuPu reversed</option> - <option value="GnBu_r">GnBu reversed</option> - <option value="OrRd_r">OrRd reversed</option> - <option value="PiYG_r">PiYG reversed</option> - <option value="PRGn_r">PRGn reversed</option> - <option value="PuOr_r">PuOr reversed</option> - <option value="PuRd_r">PuRd reversed</option> - <option value="PuBu_r">PuBu reversed</option> - <option value="RdBu_r">RdBu reversed</option> - <option value="RdGy_r">RdGy reversed</option> - <option value="RdPu_r">RdPu reversed</option> - <option value="YlGn_r">YlGn reversed</option> - <option value="PuBuGn_r">PuBuGn reversed</option> - <option value="RdYlBu_r">RdYlBu reversed</option> - <option value="RdYlGn_r">RdYlGn reversed</option> - <option value="YlGnBu_r">YlGnBu reversed</option> - <option value="YlOrBr_r">YlOrBr reversed</option> - <option value="YlOrRd_r">YlOrRd reversed</option> - <option value="gist_gray_r">gist_gray reversed</option> - <option value="gist_stern_r">gist_stern reversed</option> - <option value="gist_earth_r">gist_earth reversed</option> - <option value="gist_yarg_r">gist_yarg reversed</option> - <option value="gist_ncar_r">gist_ncar reversed</option> - <option value="gist_rainbow_r">gist_rainbow reversed</option> - <option value="gist_heat_r">gist_heat reversed</option> - <option value="gnuplot_r">gnuplot reversed</option> - <option value="gnuplot2_r">gnuplot2 reversed</option> - <option value="CMRmap_r">CMRmap reversed</option> - <option value="bwr_r">bwr reversed</option> - <option value="hsv_r">hsv reversed</option> - <option value="brg_r">brg reversed</option> - <option value="jet_r">jet reversed</option> - <option value="afmhot_r">afmhot reversed</option> - </param> - - <param name="zMin" type="float" value="" size="3" label="Minimum value for the heatmap intensities. Leave empty for automatic values" optional="true"/> - <param name="zMax" type="float" value="" size="3" label="Maximum value for the heatmap intensities. Leave empty for automatic values" optional="true"/> - <param name="yMin" type="float" value="" size="3" label="Minimum value for the Y-axis of the summary plot. Leave empty for automatic values" optional="true"/> - <param name="yMax" type="float" value="" size="3" label="Maximum value for Y-axis of the summary plot. Leave empty for automatic values" optional="true"/> - - <param name="xAxisLabel" type="text" value="distance from TSS (bp)" size="200" label="Description for the x-axis label" /> - <param name="yAxisLabel" type="text" value="genes" size="30" label="Description for the y-axis label for the top panel" /> - - <param name="heatmapWidth" type="float" value="7.5" min="1" max="100" label="Heatmap width in cm" help="The minimum value is 1 and the maximum is 100."/> - - <param name="heatmapHeight" type="float" value="25" min="3" max="100" label="Heatmap height in cm" help="The minimum value is 3 and the maximum is 100."/> - - <param name="whatToShow" type="select" label="What to show" help ="The default is to include a summary or profile plot on top of the heatmap and a heatmap colorbar."> - <option value="plot, heatmap and colorbar" selected="true">summary plot, heatmap and colorbar</option> - <option value="plot only">summary plot only</option> - <option value="plot and heatmap">summary plot and heatmap (no colorbar)</option> - <option value="heatmap only">heatmap only</option> - <option value="heatmap and colorbar">heatmap and colorbar</option> - <option value="colorbar only">colorbar only</option> - </param> - - <param name="startLabel" type="text" value="TSS" size="10" label="Label for the region start" help ="[only for scale-regions mode] Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. "peak start"." /> - <param name="endLabel" type="text" value="TES" size="10" label="Label for the region end" help="[only for scale-regions mode] Label shown in the plot for the region end. Default is TES (transcription end site)."/> - - <param name="referencePointLabel" type="text" value="TSS" size="10" label="Reference point label" help ="[only for scale-regions mode] Label shown in the plot for the reference-point. Default is the same as the reference point selected (e.g. TSS), but could be anything, e.g. "peak start" etc." /> - - <param name="regionsLabel" type="text" value="genes" size="30" label="Labels for the regions plotted in the heatmap" help="If more than one region is being plotted a list of labels separated by comma and limited by quotes, is required. For example, "label1, label2"."/> - - <param name="plotTitle" type="text" value="" size="30" label="Title of the plot" help="Title of the plot, to be printed on top of the generated image. Leave blank for no title." /> - - <param name="onePlotPerGroup" type="boolean" truevalue="--onePlotPerGroup" falsevalue="" label="Do one plot per group" help="When the region file contains groups separated by "#", the default is to plot the averages for the distinct plots in one plot. If this option is set, each group will get its own plot, stacked on top of each other."/> - - - </when> - </conditional> - - </inputs> - <outputs> - <data format="png" name="outFileName" label="${tool.name} image"> - <change_format> - <when input="output.outFileFormat" value="pdf" format="pdf" /> - <when input="output.outFileFormat" value="svg" format="svg" /> - <when input="output.outFileFormat" value="eps" format="eps" /> - <when input="output.outFileFormat" value="emf" format="emf" /> - </change_format> - </data> - <data format="tabular" name="outFileNameData" label="${tool.name} raw plot data"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveData'] == True)</filter> - </data> - <data format="tabular" name="outFileNameMatrix" label="${tool.name} matrix of heatmap values"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)</filter> - </data> - <data format="bed" name="outFileSortedRegions" label="${tool.name} sorted/filtered regions"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)</filter> - </data> - </outputs> - <tests> - <test> - <param name="matrixFile" value="master.mat.gz" ftype="bgzip" /> - <output name="outFileName" file="master.png" ftype="png" compare="sim_size" delta="100" /> - </test> - </tests> - <help> - -**What it does** - -HeatMapper visualizes scores associated with genomic regions, for example log2 fold change values obtained from ChIP-seq experiments. Those values can be visualized individually along each of the regions provided by the user. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - </help> - -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/homer.py Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,76 @@ +""" +HOMER special datatypes +""" + +from galaxy.datatypes.data import get_file_peek +from galaxy.datatypes.data import Text, Data +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.images import Html + + +class TagDirectory(object): + """Base class for HOMER's Tag Directory datatype.""" + + file_ext = 'homer_tagdir' + composite_type = 'auto_primary_file' + allow_datatype_change = False + + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + #self.add_composite_file('tagInfo.txt', description = 'basic configuration information', mimetype = 'text/html') # Contains basic configuration information + self.add_composite_file('tagLengthDistribution.txt', description = 'histogram of read lengths used for alignment', mimetype = 'text/html') # File contains a histogram of read lengths used for alignment. + self.add_composite_file('tagCountDistribution.txt', description = 'histogram of clonal read depth, showing the number of reads per unique position', mimetype = 'text/html') # File contains a histogram of clonal read depth, showing the number of reads per unique position. + self.add_composite_file('tagAutocorrelation.txt', description = 'distribution of distances between adjacent reads in the genome', mimetype = 'text/html') # The autocorrelation routine creates a distribution of distances between adjacent reads in the genome. + self.add_composite_file('tagFreq.txt', description = "nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads", mimetype = 'text/html', optional=True) # Calculates the nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads. + self.add_composite_file('tagFreqUniq.txt', description = "nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads (counted only once)", mimetype = 'text/html', optional=True) # Same as tagFreq.txt, however individual genomic positions are only counted once. + self.add_composite_file('tagGCcontent.txt', description = 'Distribution of fragment GC%-content', mimetype = 'text/html', optional=True) # Distribution of fragment GC%-content. + self.add_composite_file('genomeGCcontent.txt', description = 'Distribution of fragment GC%-content at each location in the genome', mimetype = 'text/html', optional=True) # Distribution of fragment GC%-content at each location in the genome. + + def generate_primary_file( self, dataset = None ): + rval = ['<html><head><title>HOMER database files</title></head><ul>'] + for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): + opt_text = '' + if composite_file.optional: + opt_text = ' (optional)' + rval.append( '<li><a href="%s">%s</a>%s' % ( composite_name, composite_name, opt_text ) ) + rval.append( '</ul></html>' ) + return "\n".join( rval ) + + def display_data(self, trans, data, preview=False, filename=None, + to_ext=None, size=None, offset=None, **kwd): + """Apparently an old display method, but still gets called. + + This allows us to format the data shown in the central pane via the "eye" icon. + """ + return "This is a HOMER database." + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text.""" + if not dataset.dataset.purged: + dataset.peek = "HOMER database (multiple files)" + dataset.blurb = "HOMER database (multiple files)" + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def display_peek( self, dataset ): + """Create HTML content, used for displaying peek.""" + try: + return dataset.peek + except: + return "HOMER database (multiple files)" + + def get_mime(self): + """Returns the mime type of the datatype (pretend it is text for peek)""" + return 'text/plain' + + def merge(split_files, output_file): + """Merge HOMER databases (not implemented).""" + raise NotImplementedError("Merging HOMER databases is not supported") + + def split( cls, input_datasets, subdir_generator_function, split_params): + """Split a HOMER database (not implemented).""" + if split_params is None: + return None + raise NotImplementedError("Can't split HOMER databases") +
--- a/profiler.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,189 +0,0 @@ -<tool id="dt_profiler" name="profiler" version="1.0"> - <description> - creates a profile plot for a score associated to genomic regions - </description> - <requirements> - <requirement type="package" version="1.5.1_98e5d8a61431ea8605c0643d991a1a5d8999b4dc">deepTools</requirement> - </requirements> - <command> - profiler - --matrixFile $matrixFile - - #if $output.showOutputSettings == "yes" - #set newoutFileName=str($outFileName)+"."+str($output.outFileFormat) - --outFileName $newoutFilename - #if $output.outFileNameData: - --outFileNameData '$output.outFileNameData' - #end if - - #if $output.outFileNameMatrix: - --outFileNameMatrix '$output.outFileNameMatrix' - #end if - - #if $output.outFileSortedRegions: - --outFileSortedRegions '$output.outFileSortedRegions' - #end if - #else - #set newoutFileName=str($outFileName)+".png" - --outFileName $newoutFileName - #end if - - #if $scaleRegions.showScaleRegionsOpt == "yes": - --startLabel $scaleRegions.startLabel - --endLabel $scaleRegions.endLabel - --refPointLabel $scaleRegions.refPointLabel - #end if - - #if $advancedOpt.showAdvancedOpt == "yes" - #if $advancedOpt.averageType: - --averageType '$advancedOpt.averageType' - #end if - --plotHeight $advancedOpt.plotHeight - --plotWidth $advancedOpt.plotWidth - --plotType $advancedOpt.plotType - - --regionsLabel '$advancedOpt.regionsLabel' - - #if str($advancedOpt.plotTitle.value) != "None": - --plotTitle '$advancedOpt.plotTitle' - #end if - - $advancedOpt.onePlotPerGroup - - #if $advancedOpt.yMin: - --yMin $advancedOpt.yMin - #end if - #if $advancedOpt.yMax: - --yMax $advancedOpt.yMax - #end if - - --xAxisLabel '$advancedOpt.xAxisLabel' - #if str($advancedOpt.yAxisLabel.value) != "None": - --yAxisLabel '$advancedOpt.yAxisLabel' - #end if - #end if - ; mv $newoutFileName $outFileName - </command> - - <inputs> - <param name="matrixFile" format="bgzip" type="data" label="Matrix file from the computeMatrix tool"/> - <conditional name="scaleRegions"> - <param name="showScaleRegionsOpt" type="select" label="The input matrix was computed in scale-regions mode"> - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="startLabel" type="text" value="TSS" size="10" label="Label for the region start" help ="[only for scale-regions mode] Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. "peak start"." /> - <param name="endLabel" type="text" value="TES" size="10" label="Label for the region end" help="[only for scale-regions mode] Label shown in the plot for the region end. Default is TES (transcription end site)."/> - <param name="refPointLabel" type="text" value="TSS" size="10" label="Reference point label" help ="[only for scale-regions mode] Label shown in the plot for the reference-point. Default is the same as the reference point selected (e.g. TSS), but could be anything, e.g. "peak start" etc." /> - </when> - </conditional> - - <conditional name="output" > - <param name="showOutputSettings" type="select" label="Show advanced output settings" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="outFileFormat" type="select" label="Image file format"> - <option value="png" selected="true">png</option> - <option value="pdf">pdf</option> - <option value="svg">svg</option> - <option value="eps">eps</option> - <option value="emf">emf</option> - </param> - <param name="saveData" type="boolean" label="Save the data underlying data for the average profile"/> - <param name="saveMatrix" type="boolean" label="Save the the matrix of values underlying the heatmap"/> - <param name="saveSortedRegions" type="boolean" label="Save the regions after skipping zeros or min/max threshold values" help="The order of the regions in the file follows the sorting order selected. This is useful, for example, to generate other heatmaps keeping the sorting of the first heatmap."/> - </when> - </conditional> - - <conditional name="advancedOpt"> - <param name="showAdvancedOpt" type="select" label="Show advanced options" > - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="averageType" type="select" label="Define the type of statistic that should be used for the profile."> - <option value="mean" selected="true">mean</option> - <option value="median">median</option> - <option value="min">min</option> - <option value="max">max</option> - <option value="sum">sum</option> - <option value="std">std</option> - </param> - - <param name="plotHeight" type="integer" value="5" min="3" - label="Plot height" - help="Height in cm. The default for the plot height is 5 centimeters. The minimum value is 3 cm." /> - <param name="plotWidth" type="integer" value="8" min="1" - label="Plot width" - help="Width in cm. The default value is 8 centimeters. The minimum value is 1 cm." /> - - <param name="plotType" type="select" label="Plot type" - help="For the summary plot (profile) only. The "lines" option will plot the profile line based on the average type selected. The "fill" option fills the region between zero and the profile curve. The fill in color is semi transparent to distinguish different profiles. The "std" option colors the region between the profile and the standard deviation of the data. As in the case of fill, a semi-transparent color is used. The option "overlapped_lines" plots each region values, one on top of the other; this option only works if "one plot per proup" is set."> - <option value="lines" selected="true">lines</option> - <option value="fill">fill</option> - <option value="std">std</option> - <option value="overlapped_lines">overlapped lines</option> - </param> - - <param name="regionsLabel" type="text" value="genes" size="30" label="Labels for the regions plotted in the heatmap" help="If more than one region is being plotted a list of labels separated by comma and limited by quotes, is required. For example, "label1, label2"."/> - <param name="plotTitle" type="text" value="" size="30" label="Title of the plot" help="Title of the plot, to be printed on top of the generated image. Leave blank for no title." /> - <param name="onePlotPerGroup" type="boolean" truevalue="--onePlotPerGroup" falsevalue="" label="Do one plot per group" help="When the region file contains groups separated by "#", the default is to plot the averages for the distinct plots in one plot. If this option is set, each group will get its own plot, stacked on top of each other."/> - - <param name="yMin" type="float" value="" size="3" label="Minimum value for the Y-axis of the summary plot. Leave empty for automatic values" optional="true"/> - <param name="yMax" type="float" value="" size="3" label="Maximum value for Y-axis of the summary plot. Leave empty for automatic values" optional="true"/> - - <param name="xAxisLabel" type="text" value="gene distance (bp)" size="50" label="Description for the x-axis label" /> - <param name="yAxisLabel" type="text" value="" size="50" label="Description for the y-axis label for the top panel" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="png" name="outFileName" label="${tool.name} image"> - <change_format> - <when input="output.outFileFormat" value="pdf" format="pdf" /> - <when input="output.outFileFormat" value="svg" format="svg" /> - <when input="output.outFileFormat" value="eps" format="eps" /> - <when input="output.outFileFormat" value="emf" format="emf" /> - </change_format> - </data> - <data format="tabular" name="outFileNameData" label="${tool.name} raw plot data"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveData'] == True)</filter> - </data> - <data format="tabular" name="outFileNameMatrix" label="${tool.name} matrix of heatmap values"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)</filter> - </data> - <data format="bed" name="outFileSortedRegions" label="${tool.name} sorted/filtered regions"> - <filter>(output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)</filter> - </data> - </outputs> - <help> - -**What it does** - -This tool creates a profile plot for a score associated to genomic regions. -Typically, these regions are genes, but any other regions defined in a BED or -GFF format will work. A preprocessed matrix generated by the tool -computeMatrix is required. - ------ - -.. class:: infomark - -Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. - -This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. - - -.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ -.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de -.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de - - </help> - -</tool>
--- a/test-data/test2.bed Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -ch1 100 150 CG11023 0 + -ch2 150 175 cda5 0 - -ch3 100 125 cda8 0 + -#Group 1 -ch1 75 125 C11023 0 + -ch2 125 150 ca5 0 - -ch3 75 100 ca8 0 + -#Group 2
--- a/tool-data/deepTools_seqs.loc.sample Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of 2bit genome files for use with deepTools. You will -#need to supply these files and then create a deepTools_seqs.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The deepTools_seqs.loc -#file has this format (white space characters are TAB characters): -# -#<unique_build_id><display_name><file_path> -# -#So, for example, if your deepTools_seqs.loc began like this: -# -#hg18Human (Homo sapiens): hg18/depot/data2/galaxy/twobit/hg18.2bit -#hg19Human (Homo sapiens): hg19/depot/data2/galaxy/twobit/hg19.2bit -#mm9Mouse (Mus musculus): mm9/depot/data2/galaxy/twobit/mm9.2bit -# -#then your /depot/data2/galaxy/twobit/ directory -#would need to contain the following 2bit files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.2bit -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg19.2bit -#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 mm9.2bit -# -#Your deepTools_seqs.loc file should include an entry per line for -#each file you have stored that you want to be available. Note that -#your files should all have the extension '2bit'. -# -#Note that for backwards compatibility with workflows, the unique ID of -#an entry must be the path that was in the original loc file, because that -#is the value stored in the workflow for that parameter. -#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/homer.loc.sample Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,30 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of homer installations with associated databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#That files should enable the administrator the establish some kind of +#reproducibility of HOMER data. It is recommended to install HOMER from scratch +#in a new folder and define the location here. If you want to update your HOMER +#installation consider to check out a completly new HOMER version in a new PATH. +# +#The entries are as follows: +# +#<unique_id> <database_caption> <base_name_path> +# +#Your homer.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#homer_08_Aug_2013 HOMER 4.2 08 Aug 2013 /data/0/galaxy_data/homer/08_08_2013/ +#homer_02_July_2013 HOMER 4.2 02 July 2013 /data/0/galaxy_data/homer/02_07_2013/ +#homer_1_Jan_2013 HOMER 4.1 1 Jan 2013 /data/0/galaxy_data/homer/01_01_2013 + +#...etc... +# +#You can install and populate HOMER with all relevant data, following the instructions here: +#http://biowhat.ucsd.edu/homer/introduction/install.html +# +#The Galaxy Toolshed will take care to install all requirements, but they are only +#accessible during execution time. So feel free to ignore warnings during the installation, +#that your are missing weblogo, blat and Co. + +
--- a/tool_data_table_conf.xml.sample Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -<tables> - <!-- Locations of 2bit sequence files for use in deepTools --> - <table name="deepTools_seqs" comment_char="#"> - <columns>value, name, path</columns> - <file path="tool-data/deepTools_seqs.loc" /> - </table> -</tables>
--- a/tool_dependencies.xml Fri Aug 09 14:04:34 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="boost" version="1.53.0"> - <repository changeset_revision="f5b13e56a321" name="package_boost_1_53" owner="bgruening" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="blast+" version="2.2.26+"> - <install version="1.0"> - <actions> - <!-- populate the environment variables from the dependend repos --> - <action type="set_environment_for_install"> - <repository changeset_revision="f5b13e56a321" name="package_boost_1_53" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu"> - <package name="boost" version="1.53.0" /> - </repository> - </action> - <action type="download_by_url">ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-src.tar.gz</action> - <action type="shell_command">cd c++ && ./configure --with-boost=$BOOST_ROOT_DIR --prefix=$INSTALL_DIR && make && make install</action> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - <readme> -Downloads and compiles BLAST+ from the NCBI, which assumes you have -all the required build dependencies installed. See: -http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download - </readme> - </package> -</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/README Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,15 @@ +Homer wrapper for Galaxy + +The homer tools will need to be accessible from command line + +Code repo: https://bitbucket.org/gvl/homer + +=========================================: +LICENSE for this wrapper: +=========================================: +Kevin Ying +Garvan Institute: http://www.garvan.org.au +GVL: https://genome.edu.au/wiki/GVL + +http://opensource.org/licenses/mit-license.php +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/annotatePeaks.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,164 @@ +<tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.5"> + <requirements> + <requirement type="package" version="4.1">homer</requirement> + </requirements> + <description></description> + <!--<version_command></version_command>--> + <command> + annotatePeaks.pl $input_bed $genome_selector 1> $out_annotated + 2> $out_log || echo "Error running annotatePeaks." >&2 + </command> + <inputs> + <param format="tabular,bed" name="input_bed" type="data" label="Homer peaks OR BED format"/> + <param name="genome_selector" type="select" label="Genome version"> + <option value="hg19" selected="true">hg19</option> + </param> + <param type="text" name="options" label="Extra options" value="" help="See link below for more options"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + <remove value="/"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + + .. class:: infomark + + **Homer annoatePeaks** + + More information on accepted formats and options + + http://biowhat.ucsd.edu/homer/ngs/annotation.html + + TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format. + +**Parameter list** + +Command line options (not all of them are supported):: + + Usage: annotatePeaks.pl <peak file | tss> <genome version> [additional options...] + + Available Genomes (required argument): (name,org,directory,default promoter set) + -- or -- + Custom: provide the path to genome FASTA files (directory or single file) + + User defined annotation files (default is UCSC refGene annotation): + annotatePeaks.pl accepts GTF (gene transfer formatted) files to annotate positions relative + to custom annotations, such as those from de novo transcript discovery or Gencode. + -gtf <gtf format file> (-gff and -gff3 can work for those files, but GTF is better) + + Peak vs. tss/tts/rna mode (works with custom GTF file): + If the first argument is "tss" (i.e. annotatePeaks.pl tss hg18 ...) then a TSS centric + analysis will be carried out. Tag counts and motifs will be found relative to the TSS. + (no position file needed) ["tts" now works too - e.g. 3' end of gene] + ["rna" specifies gene bodies, will automaticall set "-size given"] + NOTE: The default TSS peak size is 4000 bp, i.e. +/- 2kb (change with -size option) + -list <gene id list> (subset of genes to perform analysis [unigene, gene id, accession, + probe, etc.], default = all promoters) + -cTSS <promoter position file i.e. peak file> (should be centered on TSS) + + Primary Annotation Options: + -mask (Masked repeats, can also add 'r' to end of genome name) + -m <motif file 1> [motif file 2] ... (list of motifs to find in peaks) + -mscore (reports the highest log-odds score within the peak) + -nmotifs (reports the number of motifs per peak) + -mdist (reports distance to closest motif) + -mfasta <filename> (reports sites in a fasta file - for building new motifs) + -fm <motif file 1> [motif file 2] (list of motifs to filter from above) + -rmrevopp <#> (only count sites found within <#> on both strands once, i.e. palindromic) + -matrix <prefix> (outputs a motif co-occurrence files: + prefix.count.matrix.txt - number of peaks with motif co-occurrence + prefix.ratio.matrix.txt - ratio of observed vs. expected co-occurrence + prefix.logPvalue.matrix.txt - co-occurrence enrichment + prefix.stats.txt - table of pair-wise motif co-occurrence statistics + additional options: + -matrixMinDist <#> (minimum distance between motif pairs - to avoid overlap) + -matrixMaxDist <#> (maximum distance between motif pairs) + -mbed <filename> (Output motif positions to a BED file to load at UCSC (or -mpeak)) + -mlogic <filename> (will output stats on common motif orientations) + -d <tag directory 1> [tag directory 2] ... (list of experiment directories to show + tag counts for) NOTE: -dfile <file> where file is a list of directories in first column + -bedGraph <bedGraph file 1> [bedGraph file 2] ... (read coverage counts from bedGraph files) + -wig <wiggle file 1> [wiggle file 2] ... (read coverage counts from wiggle files) + -p <peak file> [peak file 2] ... (to find nearest peaks) + -pdist to report only distance (-pdist2 gives directional distance) + -pcount to report number of peaks within region + -vcf <VCF file> (annotate peaks with genetic variation infomation, one col per individual) + -editDistance (Computes the # bp changes relative to reference) + -individuals <name1> [name2] ... (restrict analysis to these individuals) + -gene <data file> ... (Adds additional data to result based on the closest gene. + This is useful for adding gene expression data. The file must have a header, + and the first column must be a GeneID, Accession number, etc. If the peak + cannot be mapped to data in the file then the entry will be left empty. + -go <output directory> (perform GO analysis using genes near peaks) + -genomeOntology <output directory> (perform genomeOntology analysis on peaks) + -gsize <#> (Genome size for genomeOntology analysis, default: 2e9) + + Annotation vs. Histogram mode: + -hist <bin size in bp> (i.e 1, 2, 5, 10, 20, 50, 100 etc.) + The -hist option can be used to generate histograms of position dependent features relative + to the center of peaks. This is primarily meant to be used with -d and -m options to map + distribution of motifs and ChIP-Seq tags. For ChIP-Seq peaks for a Transcription factor + you might want to use the -center option (below) to center peaks on the known motif + ** If using "-size given", histogram will be scaled to each region (i.e. 0-100%), with + the -hist parameter being the number of bins to divide each region into. + Histogram Mode specific Options: + -nuc (calculated mononucleotide frequencies at each position, + Will report by default if extracting sequence for other purposes like motifs) + -di (calculated dinucleotide frequencies at each position) + -histNorm <#> (normalize the total tag count for each region to 1, where <#> is the + minimum tag total per region - use to avoid tag spikes from low coverage + -ghist (outputs profiles for each gene, for peak shape clustering) + -rm <#> (remove occurrences of same motif that occur within # bp) + + Peak Centering: (other options are ignored) + -center <motif file> (This will re-center peaks on the specified motif, or remove peak + if there is no motif in the peak. ONLY recentering will be performed, and all other + options will be ignored. This will output a new peak file that can then be reanalyzed + to reveal fine-grain structure in peaks (It is advised to use -size < 200) with this + to keep peaks from moving too far (-mirror flips the position) + -multi (returns genomic positions of all sites instead of just the closest to center) + + Advanced Options: + -len <#> / -fragLength <#> (Fragment length, default=auto, might want to set to 0 for RNA) + -size <#> (Peak size[from center of peak], default=inferred from peak file) + -size #,# (i.e. -size -10,50 count tags from -10 bp to +50 bp from center) + -size "given" (count tags etc. using the actual regions - for variable length regions) + -log (output tag counts as log2(x+1+rand) values - for scatter plots) + -sqrt (output tag counts as sqrt(x+rand) values - for scatter plots) + -strand <+|-|both> (Count tags on specific strands relative to peak, default: both) + -pc <#> (maximum number of tags to count per bp, default=0 [no maximum]) + -cons (Retrieve conservation information for peaks/sites) + -CpG (Calculate CpG/GC content) + -ratio (process tag values as ratios - i.e. chip-seq, or mCpG/CpG) + -nfr (report nuclesome free region scores instead of tag counts, also -nfrSize <#>) + -norevopp (do not search for motifs on the opposite strand [works with -center too]) + -noadj (do not adjust the tag counts based on total tags sequenced) + -norm <#> (normalize tags to this tag count, default=1e7, 0=average tag count in all directories) + -pdist (only report distance to nearest peak using -p, not peak name) + -map <mapping file> (mapping between peak IDs and promoter IDs, overrides closest assignment) + -noann, -nogene (skip genome annotation step, skip TSS annotation) + -homer1/-homer2 (by default, the new version of homer [-homer2] is used for finding motifs) + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/bed2pos.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,37 @@ +<tool id="homer_bed2pos" name="homer_bed2pos" version="1.0.0"> + <requirements> + <requirement type="package" version="4.1">homer</requirement> + </requirements> + <description></description> + <!--<version_command></version_command>--> + <command> + bed2pos.pl $input_bed 1> $out_pos + 2> $out_log || echo "Error running bed2pos." >&2 + </command> + <inputs> + <param format="tabular,bed" name="input_bed" type="data" label="BED file" /> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="tabular" name="out_pos" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + .. class:: infomark + + Converts: BED -(to)-> homer peak positions + + **Homer bed2pos.pl** + + http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/findPeaks.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,122 @@ +<tool id="homer_findPeaks" name="homer_findPeaks" version="0.1.2"> + <requirements> + <requirement type="package" version="4.1">homer</requirement> + </requirements> + <description>Homer's peakcaller. Requires tag directories (see makeTagDirectory)</description> + <!--<version_command></version_command>--> + <command> + findPeaks $tagDir.extra_files_path $options -o $outputPeakFile + + #if $control_tagDir: + -i $control_tagDir.extra_files_path + #end if + + 2> $out_log || echo "Error running findPeaks." >&2 + </command> + <inputs> + <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" help="Must be made with homer_makeTagDirectory" /> + <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" help="Must be made with homer_makeTagDirectory" /> + <param type="text" name="options" label="Extra options" value="" help="See link below for more options"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + <remove value="/"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + + .. class:: infomark + + **Homer findPeaks** + + For more options, look under: "Command line options for findPeaks" + + http://biowhat.ucsd.edu/homer/ngs/peaks.html + + TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format. + +**Parameter list** + +Command line options (not all of them are supported):: + + Usage: findPeaks <tag directory> [options] + + Finds peaks in the provided tag directory. By default, peak list printed to stdout + + General analysis options: + -o <filename|auto> (file name for to output peaks, default: stdout) + "-o auto" will send output to "<tag directory>/peaks.txt", ".../regions.txt", + or ".../transcripts.txt" depending on the "-style" option + -style <option> (Specialized options for specific analysis strategies) + factor (transcription factor ChIP-Seq, uses -center, output: peaks.txt, default) + histone (histone modification ChIP-Seq, region based, uses -region -size 500 -L 0, regions.txt) + groseq (de novo transcript identification from GroSeq data, transcripts.txt) + tss (TSS identification from 5' RNA sequencing, tss.txt) + dnase (Hypersensitivity [crawford style (nicking)], peaks.txt) + + chipseq/histone options: + -i <input tag directory> (Experiment to use as IgG/Input/Control) + -size <#> (Peak size, default: auto) + -minDist <#> (minimum distance between peaks, default: peak size x2) + -gsize <#> (Set effective mappable genome size, default: 2e9) + -fragLength <#|auto> (Approximate fragment length, default: auto) + -inputFragLength <#|auto> (Approximate fragment length of input tags, default: auto) + -tbp <#> (Maximum tags per bp to count, 0 = no limit, default: auto) + -inputtbp <#> (Maximum tags per bp to count in input, 0 = no limit, default: auto) + -strand <both|separate> (find peaks using tags on both strands or separate, default:both) + -norm # (Tag count to normalize to, default 10000000) + -region (extends start/stop coordinates to cover full region considered "enriched") + -center (Centers peaks on maximum tag overlap and calculates focus ratios) + -nfr (Centers peaks on most likely nucleosome free region [works best with mnase data]) + (-center and -nfr can be performed later with "getPeakTags" + + Peak Filtering options: (set -F/-L/-C to 0 to skip) + -F <#> (fold enrichment over input tag count, default: 4.0) + -P <#> (poisson p-value threshold relative to input tag count, default: 0.0001) + -L <#> (fold enrichment over local tag count, default: 4.0) + -LP <#> (poisson p-value threshold relative to local tag count, default: 0.0001) + -C <#> (fold enrichment limit of expected unique tag positions, default: 2.0) + -localSize <#> (region to check for local tag enrichment, default: 10000) + -inputSize <#> (Size of region to search for control tags, default: 2x peak size) + -fdr <#> (False discovery rate, default = 0.001) + -poisson <#> (Set poisson p-value cutoff, default: uses fdr) + -tagThreshold <#> (Set # of tags to define a peak, default: 25) + -ntagThreshold <#> (Set # of normalized tags to define a peak, by default uses 1e7 for norm) + -minTagThreshold <#> (Absolute minimum tags per peak, default: expected tags per peak) + + GroSeq Options: (Need to specify "-style groseq"): + -tssSize <#> (size of region for initiation detection/artifact size, default: 250) + -minBodySize <#> (size of regoin for transcript body detection, default: 1000) + -maxBodySize <#> (size of regoin for transcript body detection, default: 10000) + -tssFold <#> (fold enrichment for new initiation dectection, default: 4.0) + -bodyFold <#> (fold enrichment for new transcript dectection, default: 4.0) + -endFold <#> (end transcript when levels are this much less than the start, default: 10.0) + -fragLength <#> (Approximate fragment length, default: 150) + -uniqmap <directory> (directory of binary files specifying uniquely mappable locations) + Download from http://biowhat.ucsd.edu/homer/groseq/ + -confPvalue <#> (confidence p-value: 1.00e-05) + -minReadDepth <#> (Minimum initial read depth for transcripts, default: auto) + -pseudoCount <#> (Pseudo tag count, default: 2.0) + -gtf <filename> (Output de novo transcripts in GTF format) + "-o auto" will produce <dir>/transcripts.txt and <dir>/transcripts.gtf + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/makeTagDirectory.py Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,94 @@ +""" + + +""" +import re +import os +import sys +import subprocess +import optparse +import shutil +import tempfile + +def getFileString(fpath, outpath): + """ + format a nice file size string + """ + size = '' + fp = os.path.join(outpath, fpath) + s = '? ?' + if os.path.isfile(fp): + n = float(os.path.getsize(fp)) + if n > 2**20: + size = ' (%1.1f MB)' % (n/2**20) + elif n > 2**10: + size = ' (%1.1f KB)' % (n/2**10) + elif n > 0: + size = ' (%d B)' % (int(n)) + s = '%s %s' % (fpath, size) + return s + +class makeTagDirectory(): + """wrapper + """ + + def __init__(self,opts=None, args=None): + self.opts = opts + self.args = args + + def run_makeTagDirectory(self): + """ + makeTagDirectory <Output Directory Name> [options] <alignment file1> [alignment file 2] + + """ + if self.opts.format != "bam": + cl = [self.opts.executable] + args + ["-format" , self.opts.format] + else: + cl = [self.opts.executable] + args + print cl + p = subprocess.Popen(cl) + retval = p.wait() + + + html = self.gen_html(args[0]) + #html = self.gen_html() + return html,retval + + def gen_html(self, dr=os.getcwd()): + flist = os.listdir(dr) + print flist + """ add a list of all files in the tagdirectory + """ + res = ['<div class="module"><h2>Files created by makeTagDirectory</h2><table cellspacing="2" cellpadding="2">\n'] + + flist.sort() + for i,f in enumerate(flist): + if not(os.path.isdir(f)): + fn = os.path.split(f)[-1] + res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, dr))) + + res.append('</table>\n') + + return res + +if __name__ == '__main__': + op = optparse.OptionParser() + op.add_option('-e', '--executable', default='makeTagDirectory') + op.add_option('-o', '--htmloutput', default=None) + op.add_option('-f', '--format', default="sam") + opts, args = op.parse_args() + #assert os.path.isfile(opts.executable),'## makeTagDirectory.py error - cannot find executable %s' % opts.executable + + #if not os.path.exists(opts.outputdir): + #os.makedirs(opts.outputdir) + f = makeTagDirectory(opts, args) + + html,retval = f.run_makeTagDirectory() + f = open(opts.htmloutput, 'w') + f.write(''.join(html)) + f.close() + if retval <> 0: + print >> sys.stderr, serr # indicate failure + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/makeTagDirectory.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,139 @@ +<tool id="homer_makeTagDirectory" name="Make HOMER database" version="1.0.1"> + <requirements> + <requirement type="package" version="35x1">blat</requirement> + <requirement type="package" version="2.8.2">weblogo</requirement> + <requirement type="package" version="9.07">ghostscript</requirement> + </requirements> + <description>(TagDirectory). Used by findPeaks</description> + <!--<version_command></version_command>--> + <command> + + export PATH=$PATH:$database.fields.path; + + makeTagDirectory $tagdir + #for $infile in alignment_files: + $infile + #end for + + </command> + <inputs> + <param name="database" type="select" label="HOMER database"> + <options from_file="homer.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + + <repeat name="alignment_files" title="Alignment Files"> + <param name="file" label="Add file" type="data" format="sam,bed,bam" help="Alignments in SAM, BAM or BED format" /> + </repeat> + + </inputs> + <outputs> + <data format="homer_tagdir" name="tag_dir" label="HOMER tag directory" /> + </outputs> + + + <tests> + </tests> + + <help> + + .. class:: infomark + + **Homer makeTagDirectory** + + For more options, look under: "Command line options" + + http://biowhat.ucsd.edu/homer/ngs/tagDir.html + +**Parameter list** + +Command line options (not all of them are supported):: + + Usage: makeTagDirectory <directory> <alignment file 1> [file 2] ... [options] + + Creates a platform-independent 'tag directory' for later analysis. + Currently BED, eland, bowtie, and sam files are accepted. The program will try to + automatically detect the alignment format if not specified. Program will also + unzip *.gz, *.bz2, and *.zip files and convert *.bam to sam files on the fly + Existing tag directories can be added or combined to make a new one using -d/-t + If more than one format is needed and the program cannot auto-detect it properly, + make separate tag directories by running the program separately, then combine them. + To perform QC/manipulations on an existing tag directory, add "-update" + + Options: + -fragLength <# | given> (Set estimated fragment length - given: use read lengths) + By default treats the sample as a single read ChIP-Seq experiment + -format <X> where X can be: (with column specifications underneath) + bed - BED format files: + (1:chr,2:start,3:end,4:+/- or read name,5:# tags,6:+/-) + -force5th (5th column of BED file contains # of reads mapping to position) + sam - SAM formatted files (use samTools to covert BAMs into SAM if you have BAM) + -unique (keep if there is a single best alignment based on mapq) + -mapq <#> (Minimum mapq for -unique, default: 10, set negative to use AS:i:/XS:i:) + -keepOne (keep one of the best alignments even if others exist) + -keepAll (include all alignments in SAM file) + -mis (Maximum allowed mismatches, default: no limit, uses MD:Z: tag) + bowtie - output from bowtie (run with --best -k 2 options) + (1:read name,2:+/-,3:chr,4:position,5:seq,6:quality,7:NA,8:misInfo) + eland_result - output from basic eland + (1:read name,2:seq,3:code,4:#zeroMM,5:#oneMM,6:#twoMM,7:chr, + 8:position,9:F/R,10-:mismatches + eland_export - output from illumina pipeline (22 columns total) + (1-5:read name info,9:sequence,10:quality,11:chr,13:position,14:strand) + eland_extended - output from illumina pipeline (4 columns total) + (1:read name,2:sequence,3:match stats,4:positions[,]) + mCpGbed - encode style mCpG reporting in extended BED format, no auto-detect + (1:chr,2:start,3:end,4:name,5:,6:+/-,7:,8:,9:,10:#C,11:#mC) + allC - Lister style output files detailing the read information about all cytosines + (1:chr,2:pos,3:strand,4:context,#mC,#totalC,#C + -minCounts <#> (minimum number of reads to report mC/C ratios, default: 10) + -mCcontext <CG|CHG|CHH|all> (only use C's in this context, default: CG) + HiCsummary - minimal paired-end read mapping information + (1:readname,2:chr1,3:5'pos1,4:strand1,5:chr2,6:5'pos2,7:strand2) + -force5th (5th column of BED file contains # of reads mapping to position) + -d <tag directory> [tag directory 2] ... (add Tag directory to new tag directory) + -t <tag file> [tag file 2] ... (add tag file i.e. *.tags.tsv to new tag directory) + -single (Create a single tags.tsv file for all "chromosomes" - i.e. if >100 chromosomes) + -update (Use current tag directory for QC/processing, do not parse new alignment files) + -tbp <#> (Maximum tags per bp, default: no maximum) + -precision <1|2|3> (number of decimal places to use for tag totals, default: 1) + + GC-bias options: + -genome <genome version> (To see available genomes, use "-genome list") + -or- (for custom genomes): + -genome <path-to-FASTA file or directory of FASTA files> + + -checkGC (check Sequence bias, requires "-genome") + -freqStart <#> (offset to start calculating frequency, default: -50) + -freqEnd <#> (distance past fragment length to calculate frequency, default: +50) + -oligoStart <#> (oligo bias start) + -oligoEnd <#> (oligo bias end) + -normGC <target GC profile file> (i.e. tagGCcontent.txt file from control experiment) + Use "-normGC default" to match the genomic GC distribution + -normFixedOligo <oligoFreqFile> (normalize 5' end bias, "-normFixedOligo default" ok) + -minNormRatio <#> (Minimum deflation ratio of tag counts, default: 0.25) + -maxNormRatio <#> (Maximum inflation ratio of tag counts, default: 2.0) + -iterNorm <#> (Sets -max/minNormRatio to 1 and 0, iteratively normalizes such that the + resulting distrubtion is no more than #% different than target, i.e. 0.1,default: off) + + Paired-end/HiC options + -illuminaPE (when matching PE reads, assumes last character of read name is 0 or 1) + -removePEbg (remove paired end tags within 1.5x fragment length on same chr) + -PEbgLength <#> (remove PE reads facing on another within this distance, default: 1.5x fragLen) + -restrictionSite <seq> (i.e. AAGCTT for HindIII, assign data < 1.5x fragment length to sites) + Must specify genome sequence directory too. (-rsmis <#> to specify mismatches, def: 0) + -both, -one, -onlyOne, -none (Keeps reads near restriction sites, default: keep all) + -removeSelfLigation (removes reads linking same restriction fragment) + -removeRestrictionEnds (removes reads starting on a restriction fragment) + -assignMidPoint (will place reads in the middle of HindIII fragments) + -restrictionSiteLength <#> (maximum distance from restriction site, default: 1.5x fragLen) + -removeSpikes <size bp> <#> (remove tags from regions with > than # times + the average tags per size bp, suggest "-removeSpikes 10000 5") + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/pos2bed.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,37 @@ +<tool id="homer_pos2bed" name="homer_pos2bed" version="1.0.0"> + <requirements> + <requirement type="package" version="4.1" >homer</requirement> + </requirements> + <description></description> + <!--<version_command></version_command>--> + <command> + pos2bed.pl $input_peak 1> $out_bed + 2> $out_log || echo "Error running pos2bed." >&2 + </command> + <inputs> + <param format="tabular" name="input_peak" type="data" label="Homer peak positions" /> + </inputs> + <outputs> + <!--<data format="html" name="html_outfile" label="index" />--> + <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />--> + <data format="bed" name="out_bed" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.bed" /> + <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.log" /> + </outputs> + <tests> + <test> + <!--<param name="input_file" value="extract_genomic_dna.fa" />--> + <!--<output name="html_file" file="sample_output.html" ftype="html" />--> + </test> + </tests> + + <help> + .. class:: infomark + + Converts: homer peak positions -(to)-> BED format + + **Homer pos2bed.pl** + + http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/tool_dependencies.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,26 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="homer" version="4.2"> + <install version="1.0"> + <actions> + <action type="download_file">http://biowhat.ucsd.edu/homer/configureHomer.pl</action> + + <action type="make_directory">$INSTALL_DIR/homer/</action> + <action type="move_file"> + <source>configureHomer.pl</source> + <destination>$INSTALL_DIR/homer/</destination> + </action> + <action type="shell_command">perl .$INSTALL_DIR/homer/configureHomer.pl -install</action> + + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/homer/bin</environment_variable> + </action> + </actions> + </install> + <readme> + I'm sorry but this may not work + + </readme> + </package> +</tool_dependency> +