diff mergeBed.xml @ 11:e526617a6bb9 draft

Uploaded
author bernhardlutz
date Wed, 18 Jun 2014 15:07:04 -0400
parents
children c782e0edc4f1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mergeBed.xml	Wed Jun 18 15:07:04 2014 -0400
@@ -0,0 +1,203 @@
+<tool id="bedtools_mergebed" name="Merge BED files" version="0.1.0">
+    <description>(mergeBed)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command>
+        mergeBed 
+            -i $input
+            $strandedness
+            $report_number
+            -d $distance
+            $nms
+            #if str($scores) != 'none'
+                -scores $scores
+            #end if
+            &gt; $output
+    </command>
+    <inputs>
+        <param  name="input" format="bed,gff,vcf" type="data" label="Sort the following BED/VCF/GFF file"/>
+        <param name="strandedness" type="boolean" label="Force strandedness." truevalue="-s" falsevalue="" checked="false" 
+            help="That is, only merge features that are the same strand."/>
+        <param name="report_number" type="boolean" label="Report the number of BED entries that were merged." truevalue="-n" falsevalue="" checked="false" 
+            help="1 is reported if no merging occurred."/>
+        <param name="nms" type="boolean" label="Report the names of the merged features separated by commas." truevalue="-nms" falsevalue="" checked="false" 
+            help="1 is reported if no merging occurred."/>
+
+        <param name="distance" type="integer" value="0" label="Maximum distance between features allowed for features to be merged." 
+            help="That is, overlapping and/or book-ended features are merged."/>
+        <param name="scores" type="select" label="Report the scores of the merged features as">
+            <option value="none" selected="True">Do not report at all</option>
+            <option value="sum">Sum</option>
+            <expand macro="math_options" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="bed" name="output" metadata_source="input" label="Merged ${input.name}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="0.bed" ftype="bed" />
+            <output name="output" file="0_result.bed" ftype="bed" />
+        </test>
+        <test>
+            <param name="input" value="1.bed" ftype="bed" />
+            <param name="strandedness" value="-s" />
+            <output name="output" file="1_result.bed" ftype="bed" />
+        </test>
+        <test>
+            <param name="input" value="2.bed" ftype="bed" />
+            <param name="report_number" value="-n" />
+            <output name="output" file="2_result.bed" ftype="bed" />
+        </test>
+        <test>
+            <param name="input" value="3.bed" ftype="bed" />
+            <param name="distance" value="1000" />
+            <output name="output" file="3_result.bed" ftype="bed" />
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+bedtools merge combines overlapping or "book-ended" features in an interval file into a single feature which spans all of the combined features.
+
+
+.. image:: $PATH_TO_IMAGES/merge-glyph.png
+
+
+.. class:: warningmark
+
+bedtools merge requires that you presort your data by chromosome and then by start position.
+
+
+==========================================================================
+Default behavior
+==========================================================================
+By default, ``bedtools merge`` combines overlapping (by at least 1 bp) and/or
+bookended intervals into a single, "flattened" or "merged" interval.
+
+::
+
+  $ cat A.bed
+  chr1  100  200
+  chr1  180  250
+  chr1  250  500
+  chr1  501  1000
+
+  $ bedtools merge -i A.bed
+  chr1  100  500
+  chr1  501  1000
+
+
+==========================================================================
+*-s* Enforcing "strandedness" 
+==========================================================================
+The ``-s`` option will only merge intervals that are overlapping/bookended
+*and* are on the same strand.
+
+::
+
+  $ cat A.bed
+  chr1  100  200   a1  1 +
+  chr1  180  250   a2  2 +
+  chr1  250  500   a3  3 -
+  chr1  501  1000  a4  4 +
+
+  $ bedtools merge -i A.bed -s
+  chr1  100  250    +
+  chr1  501  1000   +
+  chr1  250  500    -
+
+
+==========================================================================
+*-n* Reporting the number of features that were merged 
+==========================================================================
+The -n option will report the number of features that were combined from the 
+original file in order to make the newly merged feature. If a feature in the 
+original file was not merged with any other features, a "1" is reported.
+
+::
+
+  $ cat A.bed
+  chr1  100  200
+  chr1  180  250
+  chr1  250  500
+  chr1  501  1000
+  
+  $ bedtools merge -i A.bed -n
+  chr1  100  500  3
+  chr1  501  1000 1
+
+
+==========================================================================
+*-d* Controlling how close two features must be in order to merge 
+==========================================================================
+By default, only overlapping or book-ended features are combined into a new 
+feature. However, one can force ``merge`` to combine more distant features 
+with the ``-d`` option. For example, were one to set ``-d`` to 1000, any 
+features that overlap or are within 1000 base pairs of one another will be 
+combined.
+
+::
+
+  $ cat A.bed
+  chr1  100  200
+  chr1  501  1000
+  
+  $ bedtools merge -i A.bed
+  chr1  100  200
+  chr1  501  1000
+
+  $ bedtools merge -i A.bed -d 1000
+  chr1  100  200  1000
+
+
+=============================================================
+*-nms* Reporting the names of the features that were merged
+=============================================================
+Occasionally, one might like to know that names of the features that were 
+merged into a new feature. The ``-nms`` option will add an extra column to the 
+``merge`` output which lists (separated by semicolons) the names of the
+merged features.
+
+::
+
+  $ cat A.bed
+  chr1  100  200  A1
+  chr1  150  300  A2
+  chr1  250  500  A3
+ 
+  $ bedtools merge -i A.bed -nms
+  chr1  100  500  A1,A2,A3
+  
+
+===============================================================
+*-scores* Reporting the scores of the features that were merged
+===============================================================
+Similarly, we might like to know that scores of the features that were 
+merged into a new feature. Enter the ``-scores`` option.  One can specify 
+how the scores from each overlapping interval should be reported.
+
+::
+
+  $ cat A.bed
+  chr1  100  200  A1 1
+  chr1  150  300  A2 2
+  chr1  250  500  A3 3
+ 
+  $ bedtools merge -i A.bed -scores mean
+  chr1  100  500  2
+  
+  $ bedtools merge -i A.bed -scores max
+  chr1  100  500  3
+
+  $ bedtools merge -i A.bed -scores collapse
+  chr1  100  500  1,2,3
+
+
+@REFERENCES@
+    </help>
+</tool>