# HG changeset patch # User bgruening # Date 1403187120 14400 # Node ID c782e0edc4f1726a773d45799104783128a2133e # Parent e526617a6bb9fbbebc9b30ecba48a817d01fa628 Uploaded diff -r e526617a6bb9 -r c782e0edc4f1 Bed12ToBed6.xml --- a/Bed12ToBed6.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/Bed12ToBed6.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -9,7 +9,6 @@ bed12ToBed16 -i '$input' > '$output' - diff -r e526617a6bb9 -r c782e0edc4f1 BedToBam.xml --- a/BedToBam.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/BedToBam.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -12,14 +12,12 @@ -mapq $mapq -i '$input' > '$output' - - diff -r e526617a6bb9 -r c782e0edc4f1 annotateBed.xml --- a/annotateBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/annotateBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -14,10 +14,10 @@ #end for #if $names.names_select == 'yes': - -names - #for $bed in $names.beds: - $bed.inputName - #end for + -names + #for $bed in $names.beds: + $bed.inputName + #end for #end if $strand $counts diff -r e526617a6bb9 -r c782e0edc4f1 bamToBed.xml --- a/bamToBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/bamToBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -12,7 +12,7 @@ -i '$input' > '$output' #if str($tag): - -tag $tag + -tag $tag #end if diff -r e526617a6bb9 -r c782e0edc4f1 bamToFastq.xml --- a/bamToFastq.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/bamToFastq.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 bedpeToBam.xml --- a/bedpeToBam.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/bedpeToBam.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -12,14 +12,12 @@ -i '$input' -g $genome > '$output' - - diff -r e526617a6bb9 -r c782e0edc4f1 closestBed.xml --- a/closestBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/closestBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -35,6 +35,7 @@ **What it does** Similar to intersectBed, closestBed searches for overlapping features in A and B. In the event that no feature in B overlaps the current feature in A, closestBed will report the closest (that is, least genomic distance from the start or end of A) feature in B. For example, one might want to find which is the closest gene to a significant GWAS polymorphism. Note that closestBed will report an overlapping feature as the closest—that is, it does not restrict to closest non-overlapping feature. + @REFERENCES@ diff -r e526617a6bb9 -r c782e0edc4f1 clusterBed.xml --- a/clusterBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/clusterBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -14,9 +14,11 @@ - - - + + diff -r e526617a6bb9 -r c782e0edc4f1 complementBed.xml --- a/complementBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/complementBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 coverageBed.xml --- a/coverageBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/coverageBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + of features in file A across the features in file B (coverageBed) macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 expandBed.xml --- a/expandBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/expandBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 flankbed.xml --- a/flankbed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/flankbed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -11,13 +11,15 @@ $strand -g $genome -i $inputA - > $output + #if $addition.addition_select == 'b': - -b $addition.b + -b $addition.b #else: - -l $addition.l - -r $addition.r + -l $addition.l + -r $addition.r #end if + + > $output @@ -25,8 +27,6 @@ - - diff -r e526617a6bb9 -r c782e0edc4f1 genomeCoverageBed_bedgraph.xml --- a/genomeCoverageBed_bedgraph.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/genomeCoverageBed_bedgraph.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + @@ -45,23 +45,6 @@ - -$ cat A.bed -chr1 10 20 -chr1 20 30 -chr2 0 500 - -$ cat my.genome -chr1 1000 -chr2 500 - -$ bedtools genomecov -i A.bed -g my.genome -chr1 0 980 1000 0.98 -chr1 1 20 1000 0.02 -chr2 1 500 500 1 -genome 0 980 1500 0.653333 -genome 1 520 1500 0.346667 - diff -r e526617a6bb9 -r c782e0edc4f1 genomeCoverageBed_histogram.xml --- a/genomeCoverageBed_histogram.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/genomeCoverageBed_histogram.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + diff -r e526617a6bb9 -r c782e0edc4f1 getfastaBed.xml --- a/getfastaBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/getfastaBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 groupbyBed.xml --- a/groupbyBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/groupbyBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -21,17 +21,12 @@ - - + + - diff -r e526617a6bb9 -r c782e0edc4f1 intersectBed.xml --- a/intersectBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/intersectBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 intersectBed_bam_obsolete.xml --- a/intersectBed_bam_obsolete.xml Wed Jun 18 15:07:04 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,135 +0,0 @@ - - reports overlaps between two feature files. - - bedtools - intersectBed - - intersectBed -#if $intype.inselect == "bam" --abam $intype.inputBam -b $input $intype.bed -#else --a $intype.inputBed -b $input -#end if -#if $outputopt.showoutputopt == "yes" -$outputopt.wa $outputopt.wb $outputopt.wo $outputopt.wao $outputopt.u $outputopt.c $outputopt.v -#end if -#if $overlapopt.showoverlapopt == "yes" - #if str($overlapopt.f.value) != "None" - -f $overlapopt.f - #end if -$overlapopt.r $overlapopt.s -#end if -$split -> $output - - - - - - - - - - - - - - - - - - - - - - - - s - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -By far, the most common question asked of two sets of genomic features is whether or not any of the -features in the two sets "overlap" with one another. This is known as feature intersection. intersectBed -allows one to screen for overlaps between two sets of genomic features. Moreover, it allows one to have -fine control as to how the intersections are reported. intersectBed works with both BED/GFF -and BAM files as input. - -By default, if an overlap is found, intersectBed reports the shared interval between the two -overlapping features. - - -**Default behavior when using BAM input** - -When comparing alignments in BAM format to features in BED format, intersectBed -will, by default, write the output in BAM format. That is, each alignment in the BAM file that meets -the user's criteria will be written in BAM format. This serves as a mechanism to -create subsets of BAM alignments are of biological interest, etc. Note that only the mate in the BAM -alignment is compared to the BED file. Thus, if only one end of a paired-end sequence overlaps with a -feature in B, then that end will be written to the BAM output. By contrast, the other mate for the -pair will not be written. One should use pairToBed if one wants each BAM alignment -for a pair to be written to BAM output. - - -**Output BED format when using BAM input** - -When comparing alignments in BAM format to features in BED format, intersectBed -will optionally write the output in BED format. That is, each alignment in the BAM file is converted -to a 6 column BED feature and if overlaps are found (or not) based on the user's criteria, the BAM -alignment will be reported in BED format. The BED "name" field is comprised of the RNAME field in -the BAM alignment. If mate information is available, the mate (e.g., "/1" or "/2") field will be -appended to the name. The "score" field is the mapping quality score from the BAM alignment. - - - - - diff -r e526617a6bb9 -r c782e0edc4f1 jaccardBed.xml --- a/jaccardBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/jaccardBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -35,6 +35,7 @@ By default, bedtools jaccard reports the length of the intersection, the length of the union (minus the intersection), the final Jaccard statistic reflecting the similarity of the two sets, as well as the number of intersections. Whereas the bedtools intersect tool enumerates each an every intersection between two sets of genomic intervals, one often needs a single statistic reflecting the similarity of the two sets based on the intersections between them. The Jaccard statistic is used in set theory to represent the ratio of the intersection of two sets to the union of the two sets. Similarly, Favorov et al [1] reported the use of the Jaccard statistic for genome intervals: specifically, it measures the ratio of the number of intersecting base pairs between two sets to the number of base pairs in the union of the two sets. The bedtools jaccard tool implements this statistic, yet modifies the statistic such that the length of the intersection is subtracted from the length of the union. As a result, the final statistic ranges from 0.0 to 1.0, where 0.0 represents no overlap and 1.0 represent complete overlap. + .. image:: $PATH_TO_IMAGES/jaccard-glyph.png .. class:: warningmark diff -r e526617a6bb9 -r c782e0edc4f1 linksBed.xml --- a/linksBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/linksBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 macros.xml --- a/macros.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/macros.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,11 @@ + + + bedtools + + bedtools --version + + 2.19 @@ -29,14 +36,9 @@ - - - - - - bedtools - - bedtools --version + @@ -51,8 +53,8 @@ - - + + @@ -71,9 +73,6 @@ - - - ------ diff -r e526617a6bb9 -r c782e0edc4f1 makewindowsBed.xml --- a/makewindowsBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/makewindowsBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 mapBed.xml --- a/mapBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/mapBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 maskFastaBed.xml --- a/maskFastaBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/maskFastaBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 mergeBed.xml --- a/mergeBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/mergeBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + (mergeBed) macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 multiCov.xml --- a/multiCov.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/multiCov.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 multiIntersectBed.xml --- a/multiIntersectBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/multiIntersectBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 nucBed.xml --- a/nucBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/nucBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 overlapBed.xml --- a/overlapBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/overlapBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 randomBed.xml --- a/randomBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/randomBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -22,7 +22,6 @@ - diff -r e526617a6bb9 -r c782e0edc4f1 reldist.xml --- a/reldist.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/reldist.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -14,7 +14,6 @@ - @@ -25,10 +24,9 @@ **What it does** Traditional approaches to summarizing the similarity between two sets of genomic intervals are based upon the number or proportion of intersecting intervals. However, such measures are largely blind to spatial correlations between the two sets where, dpesite consistent spacing or proximity, intersections are rare (for example, enhancers and transcription start sites rarely overlap, yet they are much closer to one another than two sets of random intervals). Favorov et al [1] proposed a relative distance metric that describes distribution of relative distances between each interval in one set nd the two closest intervals in another set (see figure above). If there is no spatial correlation between the two sets, one would expect the relative distances to be uniformaly distributed among the relative distances ranging from 0 to 0.5. If, however, the intervals tend to be much closer than expected by chance, the distribution of observed relative distances would be shifted towards low relative distance values (e.g., the figure below). + .. image:: $PATH_TO_IMAGES/reldist-glyph.png - -.. image:: $PATH_TO_IMAGES/reldist-plot.png .. class:: infomark @REFERENCES@ diff -r e526617a6bb9 -r c782e0edc4f1 shuffleBed.xml --- a/shuffleBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/shuffleBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -47,15 +47,14 @@ - - + - - + + - diff -r e526617a6bb9 -r c782e0edc4f1 slopBed.xml --- a/slopBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/slopBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -20,7 +20,6 @@ $header > $output - @@ -29,7 +28,6 @@ - @@ -39,6 +37,7 @@ **What it does** bedtools slop will increase the size of each feature in a feature file by a user-defined number of bases. While something like this could be done with an awk '{OFS="\t" print $1,$2-<slop>,$3+<slop>}', bedtools slop will restrict the resizing to the size of the chromosome (i.e. no start < 0 and no end > chromosome size). + .. image:: $PATH_TO_IMAGES/slop-glyph.png .. class:: warningmark diff -r e526617a6bb9 -r c782e0edc4f1 sortBed.xml --- a/sortBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/sortBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 subtractBed.xml --- a/subtractBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/subtractBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -42,6 +42,7 @@ bedtools subtract searches for features in B that overlap A. If an overlapping feature is found in B, the overlapping portion is removed from A and the remaining portion of A is reported. If a feature in B overlaps all of a feature in A, the A feature will not be reported. .. image:: $PATH_TO_IMAGES/subtract-glyph.png + @REFERENCES@ diff -r e526617a6bb9 -r c782e0edc4f1 tagBed.xml --- a/tagBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/tagBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -35,7 +35,6 @@ - diff -r e526617a6bb9 -r c782e0edc4f1 unionBedGraphs.xml --- a/unionBedGraphs.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/unionBedGraphs.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml diff -r e526617a6bb9 -r c782e0edc4f1 windowBed.xml --- a/windowBed.xml Wed Jun 18 15:07:04 2014 -0400 +++ b/windowBed.xml Thu Jun 19 10:12:00 2014 -0400 @@ -1,4 +1,4 @@ - + macros.xml @@ -68,6 +68,7 @@ Similar to bedtools intersect, window searches for overlapping features in A and B. However, window adds a specified number (1000, by default) of base pairs upstream and downstream of each feature in A. In effect, this allows features in B that are “near” features in A to be detected. .. image:: $PATH_TO_IMAGES/window-glyph.png + @REFERENCES@