Mercurial > repos > bgruening > bedtools_test_bag
diff mergeBed.xml @ 11:e526617a6bb9 draft
Uploaded
author | bernhardlutz |
---|---|
date | Wed, 18 Jun 2014 15:07:04 -0400 |
parents | |
children | c782e0edc4f1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mergeBed.xml Wed Jun 18 15:07:04 2014 -0400 @@ -0,0 +1,203 @@ +<tool id="bedtools_mergebed" name="Merge BED files" version="0.1.0"> + <description>(mergeBed)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + mergeBed + -i $input + $strandedness + $report_number + -d $distance + $nms + #if str($scores) != 'none' + -scores $scores + #end if + > $output + </command> + <inputs> + <param name="input" format="bed,gff,vcf" type="data" label="Sort the following BED/VCF/GFF file"/> + <param name="strandedness" type="boolean" label="Force strandedness." truevalue="-s" falsevalue="" checked="false" + help="That is, only merge features that are the same strand."/> + <param name="report_number" type="boolean" label="Report the number of BED entries that were merged." truevalue="-n" falsevalue="" checked="false" + help="1 is reported if no merging occurred."/> + <param name="nms" type="boolean" label="Report the names of the merged features separated by commas." truevalue="-nms" falsevalue="" checked="false" + help="1 is reported if no merging occurred."/> + + <param name="distance" type="integer" value="0" label="Maximum distance between features allowed for features to be merged." + help="That is, overlapping and/or book-ended features are merged."/> + <param name="scores" type="select" label="Report the scores of the merged features as"> + <option value="none" selected="True">Do not report at all</option> + <option value="sum">Sum</option> + <expand macro="math_options" /> + </param> + </inputs> + <outputs> + <data format="bed" name="output" metadata_source="input" label="Merged ${input.name}"/> + </outputs> + <tests> + <test> + <param name="input" value="0.bed" ftype="bed" /> + <output name="output" file="0_result.bed" ftype="bed" /> + </test> + <test> + <param name="input" value="1.bed" ftype="bed" /> + <param name="strandedness" value="-s" /> + <output name="output" file="1_result.bed" ftype="bed" /> + </test> + <test> + <param name="input" value="2.bed" ftype="bed" /> + <param name="report_number" value="-n" /> + <output name="output" file="2_result.bed" ftype="bed" /> + </test> + <test> + <param name="input" value="3.bed" ftype="bed" /> + <param name="distance" value="1000" /> + <output name="output" file="3_result.bed" ftype="bed" /> + </test> + </tests> + <help> + +**What it does** + +bedtools merge combines overlapping or "book-ended" features in an interval file into a single feature which spans all of the combined features. + + +.. image:: $PATH_TO_IMAGES/merge-glyph.png + + +.. class:: warningmark + +bedtools merge requires that you presort your data by chromosome and then by start position. + + +========================================================================== +Default behavior +========================================================================== +By default, ``bedtools merge`` combines overlapping (by at least 1 bp) and/or +bookended intervals into a single, "flattened" or "merged" interval. + +:: + + $ cat A.bed + chr1 100 200 + chr1 180 250 + chr1 250 500 + chr1 501 1000 + + $ bedtools merge -i A.bed + chr1 100 500 + chr1 501 1000 + + +========================================================================== +*-s* Enforcing "strandedness" +========================================================================== +The ``-s`` option will only merge intervals that are overlapping/bookended +*and* are on the same strand. + +:: + + $ cat A.bed + chr1 100 200 a1 1 + + chr1 180 250 a2 2 + + chr1 250 500 a3 3 - + chr1 501 1000 a4 4 + + + $ bedtools merge -i A.bed -s + chr1 100 250 + + chr1 501 1000 + + chr1 250 500 - + + +========================================================================== +*-n* Reporting the number of features that were merged +========================================================================== +The -n option will report the number of features that were combined from the +original file in order to make the newly merged feature. If a feature in the +original file was not merged with any other features, a "1" is reported. + +:: + + $ cat A.bed + chr1 100 200 + chr1 180 250 + chr1 250 500 + chr1 501 1000 + + $ bedtools merge -i A.bed -n + chr1 100 500 3 + chr1 501 1000 1 + + +========================================================================== +*-d* Controlling how close two features must be in order to merge +========================================================================== +By default, only overlapping or book-ended features are combined into a new +feature. However, one can force ``merge`` to combine more distant features +with the ``-d`` option. For example, were one to set ``-d`` to 1000, any +features that overlap or are within 1000 base pairs of one another will be +combined. + +:: + + $ cat A.bed + chr1 100 200 + chr1 501 1000 + + $ bedtools merge -i A.bed + chr1 100 200 + chr1 501 1000 + + $ bedtools merge -i A.bed -d 1000 + chr1 100 200 1000 + + +============================================================= +*-nms* Reporting the names of the features that were merged +============================================================= +Occasionally, one might like to know that names of the features that were +merged into a new feature. The ``-nms`` option will add an extra column to the +``merge`` output which lists (separated by semicolons) the names of the +merged features. + +:: + + $ cat A.bed + chr1 100 200 A1 + chr1 150 300 A2 + chr1 250 500 A3 + + $ bedtools merge -i A.bed -nms + chr1 100 500 A1,A2,A3 + + +=============================================================== +*-scores* Reporting the scores of the features that were merged +=============================================================== +Similarly, we might like to know that scores of the features that were +merged into a new feature. Enter the ``-scores`` option. One can specify +how the scores from each overlapping interval should be reported. + +:: + + $ cat A.bed + chr1 100 200 A1 1 + chr1 150 300 A2 2 + chr1 250 500 A3 3 + + $ bedtools merge -i A.bed -scores mean + chr1 100 500 2 + + $ bedtools merge -i A.bed -scores max + chr1 100 500 3 + + $ bedtools merge -i A.bed -scores collapse + chr1 100 500 1,2,3 + + +@REFERENCES@ + </help> +</tool>