Mercurial > repos > bgruening > bedtools_test_bag
changeset 6:75d323631dce draft
Uploaded
author | bernhardlutz |
---|---|
date | Wed, 18 Jun 2014 12:40:28 -0400 |
parents | b78d20957e7f |
children | 1e2f23214cf4 |
files | Bed12ToBed6.xml BedToBam.xml annotateBed.xml bamToBed.xml bamToFastq.xml bedpeToBam.xml closestBed.xml clusterBed.xml complementBed.xml coverageBed.xml expandBed.xml flankbed.xml getfastaBed.xml groupbyBed.xml intersectBed.xml jaccardBed.xml linksBed.xml macros.xml makewindowsBed.xml mapBed.xml maskFastaBed.xml mergeBed.xml multiCov.xml nucBed.xml overlapBed.xml randomBed.xml reldist.xml shuffleBed.xml slopBed.xml static/images/reldist-glyph.png static/images/reldist-plot.png static/images/shuffle-glyph.png static/images/slop-glyph.png static/images/subtract-glyph.png static/images/window-glyph.png subtractBed.xml tagBed.xml test-data/A.bed test-data/expandInput.bed test-data/groupbyinput.bed test-data/mygenome.bed windowBed.xml |
diffstat | 42 files changed, 890 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Bed12ToBed6.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,28 @@ +<tool id="bedtools_bed12tobed6" name="Convert from BED12 to BED6" version="0.1.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bed12ToBed16 + -i '$input' + > '$output' + + </command> + <inputs> + <param format="bed" name="input" type="data" label="Convert the following BED12 file to BED6"/> + </inputs> + <outputs> + <data format="bed" name="output" metadata_source="input" label="${input.name} (as BED6)"/> + </outputs> + <help> + +**What it does** + +bed12ToBed6 is a convenience tool that converts BED features in BED12 (a.k.a. “blocked” BED features such as genes) to discrete BED6 features. For example, in the case of a gene with six exons, bed12ToBed6 would create six separate BED6 features (i.e., one for each exon). + +@REFERENCES@ + </help> +</tool>
--- a/BedToBam.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/BedToBam.xml Wed Jun 18 12:40:28 2014 -0400 @@ -6,7 +6,7 @@ <expand macro="requirements" /> <expand macro="stdio" /> <command> - bedToBam + bedtools bedtobam $ubam $bed12 -mapq $mapq @@ -30,5 +30,6 @@ bedToBam converts features in a feature file to BAM format. This is useful as an efficient means of storing large genome annotations in a compact, indexed format for visualization purposes. +@REFERENCES@ </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotateBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,66 @@ +<tool id="bedtools_annotatebed" name="AnnotateBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools annotate + -i $inputA + -files + #for $bed in $names.beds: + $bed.input + #end for + + #if names.names_select == 'yes': + -names + #for $bed in $names.beds: + $bed.inputName + #end for + #end if + $strand + $counts + $both + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <!-- Additional files, if the user needs more --> + <conditional name="names"> + <param name="names_select" type="select" label="Specify names for each file"> + <option value="no" selected="True">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <repeat name="beds" title="Add BED files and names" > + <param name="input" format="bed" type="data" label="BED file" /> + <param name="inputName" type="text" label="Name of the file" /> + </repeat> + </when> + <when value="no"> + <repeat name="beds" title="Add BED files" > + <param name="input" format="bed" type="data" label="BED file" /> + </repeat> + </when> + </conditional> + + <expand macro="strand2" /> + <param name="counts" type="boolean" checked="false" truevalue="-counts" falsevalue="" label="Report the count of features followed by the % coverage for each annotation file. Default is to report solely the fraction of -i covered by each file." /> + <param name="both" type="boolean" checked="false" truevalue="-both" falsevalue="" label="Report the count of features followed by the % coverage for each annotation file. Default is to report solely the fraction of the input file covered by each file." /> + </inputs> + </inputs> + + <outputs> + <data format="bed" name="output" label="" /> + </outputs> + <help> + +**What it does** + +bedtools annotate, well, annotates one BED/VCF/GFF file with the coverage and number of overlaps observed from multiple other BED/VCF/GFF files. In this way, it allows one to ask to what degree one feature coincides with multiple other feature types with a single command. + +@REFERENCES@ + + </help> +</tool>
--- a/bamToBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/bamToBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -6,7 +6,7 @@ <expand macro="requirements" /> <expand macro="stdio" /> <command> - bamToBed + bedtools bamtobed $option $ed_score -i '$input'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamToFastq.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,32 @@ +<tool id="bedtools_bamtofastq" name="Convert from BAM to FastQ" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools bamtofastq + $tags + $fq2 + -i '$input' + -fq '$output' + </command> + <inputs> + <param format="bam" name="input" type="data" label="Convert the following BAM file to FASTQ"/> + <param name="tags" type="boolean" truevalue="-tags" falsevalue="" selected="False" label="Create FASTQ based on the mate info in the BAM R2 and Q2 tags."/> + <param name="fq2" type="boolean" truevalue="-fq2" falsevalue="" selected="False" label="ASTQ for second end. Used if BAM contains paired-end data. BAM should be sorted by query name if creating paired FASTQ with this option."/> + </inputs> + <outputs> + <data format="fastq" name="output" metadata_source="input" label="${input.name} (as FASTQ)"/> + </outputs> +<help> + +**What it does** + +bedtools bamtofastq is a conversion utility for extracting FASTQ records from sequence alignments in BAM format. + +@REFERENCES@ + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedpeToBam.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,39 @@ +<tool id="bedtools_bedpetobam" name="Convert from BEDPE to BAM" version="0.1.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools bedpetobam + $ubam + -mapq $mapq + -i '$input' + -g $genome + > '$output' + + </command> + <inputs> + <param format="bed,gff,vcf" name="input" type="data" label="BED/VCF/GFF file"/> + <expand macro="genome" /> + <param name="mapq" type="integer" label="Set a mapping quality (SAM MAPQ field) value for all BED entries" value="255" /> + <param name="ubam" type="boolean" label="Write uncompressed BAM output." truevalue="-ubam" falsevalue="" checked="false"/> + + </inputs> + <outputs> + <data format="bam" name="output" metadata_source="input" label="${input.name} (as BAM)"/> + </outputs> + <help> + +**What it does** + +Converts feature records to BAM format. + +.. class:: warningmark + +BED files must be at least BED4 to create BAM (needs name field). + +@REFERENCES@ + </help> +</tool>
--- a/closestBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/closestBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -16,8 +16,8 @@ > $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> - <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="overlap intervals in this BED file?"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="overlap intervals in this BED/VCF/GFF file?"/> <param name="ties" type="select" label="How ties for closest feature should be handled" help="This occurs when two features in B have exactly the same overlap with a feature in A."> <option value="all" selected="True">all- Report all ties (default)</option> @@ -36,6 +36,6 @@ **What it does** Similar to intersectBed, closestBed searches for overlapping features in A and B. In the event that no feature in B overlaps the current feature in A, closestBed will report the closest (that is, least genomic distance from the start or end of A) feature in B. For example, one might want to find which is the closest gene to a significant GWAS polymorphism. Note that closestBed will report an overlapping feature as the closest—that is, it does not restrict to closest non-overlapping feature. - +@REFERENCES@ </help> </tool>
--- a/clusterBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/clusterBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -6,14 +6,14 @@ <expand macro="requirements" /> <expand macro="stdio" /> <command> - closestBed + bedtools cluster $strand -d $distance -i $inputA > $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Force strandedness." help="That is, only cluster features that are the same strand. By default, this is disabled." /> <param name="distance" type="integer" value="0" label="Maximum distance between features allowed for features to be clustered" help="Default is 0. That is, overlapping and/or book-ended features are clustered." /> @@ -33,5 +33,6 @@ bedtools cluster requires that you presort your data by chromosome and then by start position (e.g., sort -k1,1 -k2,2n in.bed > in.sorted.bed for BED files). +@REFERENCES@ </help> </tool>
--- a/complementBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/complementBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -12,7 +12,7 @@ > $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> <expand macro="genome" /> </inputs> <outputs> @@ -26,5 +26,6 @@ .. image:: $PATH_TO_IMAGES/complement-glyph.png +@REFERENCES@ </help> </tool>
--- a/coverageBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/coverageBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -19,7 +19,7 @@ > '$output' </command> <inputs> - <param format="bed,bam,gff,gg3,vcf" name="inputA" type="data" label="Count how many intervals in this BED or BAM file (source)"> + <param format="bed,bam,gff,gg3,vcf" name="inputA" type="data" label="Count how many intervals in this BED/VCF/GFF/BAM file (source)"> <validator type="unspecified_build" /> </param> <param format="bed,gff,gff3,vcf" name="inputB" type="data" label="overlap the intervals in this BED file (target)">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/expandBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,29 @@ +<tool id="bedtools_expandbed" name="ExpandBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools expand + -c $cols + -i $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="cols" type="text" value="" label="Specify the column(s) (comma separated) that should be summarized" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Replicate lines in a file based on columns of comma-separated values. + +@REFERENCES@ + </help> +</tool>
--- a/flankbed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/flankbed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -20,28 +20,16 @@ #end if </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> <expand macro="genome" /> <param name="pct" type="boolean" checked="false" truevalue="-pct" falsevalue="" label="Define -l and -r as a fraction of the feature’s length" help="E.g. if used on a 1000bp feature, -l 0.50, will add 500 bp “upstream”" /> <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Define -l and -r based on strand" help="For example. if used, -l 500 for a negative-stranded feature, it will add 500 bp to the end coordinate" /> - + <expand macro="conditional" /> - <conditional name="addition"> - <param name="addition_select" type="select" label="Choose what you want to do"> - <option value="b" selected="True">Increase the BED/GFF/VCF entry by the same number base pairs in each direction.</option> - <option value="lr">Increase by Start Coordinate and End Coordinate</option> - </param> - <when value="b"> - <param name="b" label="Give Value" type="integer" value="0" /> - </when> - <when value="lr"> - <param name="l" label="The number of base pairs to subtract from the start coordinate" type="integer" value="0" /> - <param name="r" label="The number of base pairs to add to the end coordinate" type="integer" value="0" /> - </when> - </conditional> + </inputs> <outputs> - <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + <data format="bed" name="output" label=""/> </outputs> <help> @@ -55,5 +43,6 @@ In order to prevent creating intervals that violate chromosome boundaries, bedtools flank requires a genome file defining the length of each chromosome or contig. +@REFERENCES@ </help> </tool>
--- a/getfastaBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/getfastaBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -17,7 +17,7 @@ -fo $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> <param format="fasta" name="fasta" type="data" label="Fasta file"/> <param name="name" type="boolean" checked="false" truevalue="-name" falsevalue="" label="Use the “name” column in the BED file for the FASTA headers in the output FASTA file" /> @@ -41,5 +41,7 @@ 1. The headers in the input FASTA file must exactly match the chromosome column in the BED file. 2. You can use the UNIX fold command to set the line width of the FASTA output. For example, fold -w 60 will make each line of the FASTA file have at most 60 nucleotides for easy viewing. + +@REFERENCES@ </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/groupbyBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,48 @@ +<tool id="bedtools_groupbybed" name="GroupByBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools groupby + -c $cols + -g $group + -o $operation + -i $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="cols" type="text" value="" label="Specify the column(s) (comma separated) that should be summarized" /> + <param name="group" type="text" value="1,2,3" label="Specifies which column(s) (1-based) should be used to group the input. Columns may be comma-separated with each column must be explicitly listed. Or, ranges (e.g. 1-4) are also allowed." /> + <param name="operation" type="select" label="Specify the operation"> + <option value="sum" selected="True">Sum - numeric only</option> + <option value="stdev">Stdev - numeric only</option> + <option value="sstdev">Sstdev - numeric only</option> + <option value="freqasc">Freqasc - print a comma separated list of values observed and the number of times they were observed. +Reported in ascending order of frequency* +</option> + <option value="freqdesc">Freqdesc - - print a comma separated list of values observed and the number of times they were observed. +Reported in descending order of frequency* +</option> + <option value="first">First - numeric or text</option> + <option value="last">Last - numeric or text</option> + <expand macro="math_options" /> + <expand macro="additional_math_options" /> + + </param> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Replicate lines in a file based on columns of comma-separated values. + +@REFERENCES@ + </help> +</tool>
--- a/intersectBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/intersectBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -27,7 +27,7 @@ > $output </command> <inputs> - <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> + <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/> <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="overlap intervals in this BED file?"/> <param name="strand" type="select" label="Calculate the intersection based on strandedness?">
--- a/jaccardBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/jaccardBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -17,8 +17,8 @@ > $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> - <param format="bed,vcf,gff,gff3" name="inputB" type="data" label="BED or BAM file"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="bed,vcf,gff,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> <param name="overlap" type="float" value="0.000000001" label="Minimum overlap required as a fraction of A" /> <param name="reciprocal" type="boolean" checked="false" truevalue="-f" falsevalue="" label="Require that the fraction of overlap be reciprocal for A and B. In other words, if -f is 0.90 and -r is used, this requires that B overlap at least 90% of A and that A also overlaps at least 90% of B" /> @@ -40,5 +40,7 @@ .. class:: warningmark The jaccard tool requires that your data is pre-sorted by chromosome and then by start position (e.g., sort -k1,1 -k2,2n in.bed > in.sorted.bed for BED files). + +@REFERENCES@ </help> </tool>
--- a/linksBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/linksBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -14,7 +14,7 @@ > $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> <param name="basename" type="text" value="http://genome.ucsc.edu" label="The “basename” for the UCSC browser" /> <param name="org" type="text" value="human" label="The organism (e.g. mouse, human)" /> <param name="db" type="text" value="hg18" label="The genome build" /> @@ -27,5 +27,7 @@ **What it does** Creates an HTML file with links to an instance of the UCSC Genome Browser for all features / intervals in a file. This is useful for cases when one wants to manually inspect through a large set of annotations or features. + +@REFERENCES@ </help> </tool>
--- a/macros.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/macros.xml Wed Jun 18 12:40:28 2014 -0400 @@ -20,6 +20,18 @@ <option value="-S">Only overlaps occurring on the **opposite** strand.</option> </param> </xml> + <xml name="seed"> + <conditional name="seed"> + <param name="choose" type="boolean" label="Choose Seed?" selected="False" truevalue="True" falsevalue="False" /> + <when value="True"> + <param name="seed" type="integer" value="12345" label="Enter Seed" /> + </when> + </conditional> + </xml> + <xml name="split"> + <param name="split" type="boolean" checked="true" truevalue="-split" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> + + </xml> <xml name="requirements"> <requirements> <requirement type="package" version="2.19.1">bedtools</requirement> @@ -29,6 +41,39 @@ <xml name="genome"> <param format="bed,vcf,gff,gff3" name="genome" type="data" label="Genome file" /> </xml> + <xml name="addition"> + <conditional name="addition"> + <param name="addition_select" type="select" label="Choose what you want to do"> + <option value="b" selected="True">Increase the BED/GFF/VCF entry by the same number base pairs in each direction.</option> + <option value="lr">Increase by Start Coordinate and End Coordinate</option> + </param> + <when value="b"> + <param name="b" label="Give Value" type="integer" value="0" /> + </when> + <when value="lr"> + <param name="l" label="The number of base pairs to subtract from the start coordinate" type="integer" value="0" /> + <param name="r" label="The number of base pairs to add to the end coordinate" type="integer" value="0" /> + </when> + </conditional> + </xml> + <xml name="math_options"> + <option value="min">Min - numeric only</option> + <option value="max">Max - numeric only</option> + <option value="mean">Mean - numeric only</option> + <option value="median">Median - numeric only</option> + <option value="mode">Mode - numeric only</option> + <option value="antimode">Antimode - numeric only</option> + <option value="collapse">collapse (i.e., print a comma separated list) - numeric or text</option> + </xml> + <xml name="additional_math_options"> + <option value="count">Count - numeric or text</option> + <option value="count_disctinct">Count Distinct - numeric or text</option> + <option value="distinct">distinct (i.e., print a comma separated list) - numeric or text</option> + <option value="concat">concat (i.e., print a comma separated list) - numeric or text</option> + </xml> + + + <token name="@REFERENCES@"> ------ @@ -38,5 +83,6 @@ .. __: https://github.com/arq5x/bedtools2 .. __: http://cphg.virginia.edu/quinlan/ .. __: http://bioinformatics.oxfordjournals.org/content/26/6/841.short +.. __: http://bedtools.readthedocs.org/en/latest/content/bedtools-suite.html </token> </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/makewindowsBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,78 @@ +<tool id="bedtools_makewindowsbed" name="MakeWindowsBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools makewindows + #if $type.type_select == 'genome': + -g $type.genome + #else: + -i $type.inputA + #end if + #if $action.action_select == 'windowsize': + -w $action.windowsize + #if $action.step_size.step_size_select == 'yes': + -s $action.step_size.step_size + #end if + #else: + -n $action.number + #end if + $sourcename + > $output + </command> + <inputs> + <conditional name="type"> + <param name="type_select" type="select" label="Work with"> + <option value="bed" selected="True">Bed File</option> + <option value="genome">Genome File</option> + </param> + <when value="bed"> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + </when> + <when value="genome"> + <expand macro="genome" /> + </when> + </conditional> + <conditional name="action"> + <param name="action_select" type="select" label="Work with"> + <option value="windowsize" selected="True">Set WindowSize</option> + <option value="number">Give Number of Windows</option> + </param> + <when value="windowsize"> + <param name="windowsize" type="integer" value="1" label="Divide each input interval (either a chromosome or a BED interval) to fixed-sized windows (i.e. same number of nucleotide in each window)." /> + <conditional name="step_size"> + <param name="step_size_select" type="select" label="Specify Step size? i.e. how many base pairs to step before creating a new window. Used to create 'sliding' windows. Defaults to window size (non-sliding windows)."> + <option value="yes">Yes</option> + <option value="no" selected="True">No</option> + </param> + <when value="yes"> + <param name="step_size" type="integer" value="100" label="Specify it" /> + </when> + </conditional> + </when> + <when value="number"> + <param name="number" type="integer" value="1" label="Divide each input interval (either a chromosome or a BED interval) to fixed number of windows (i.e. same number of windows, with varying window sizes)." /> + </when> + </conditional> + <param name="sourcename" type="select" label="ID Naming Options"> + <option value="" selected="True">Default</option> + <option value="-i src">use the source interval's name</option> + <option value="-i winnum">use the window number as the ID (e.g. 1,2,3,4...)</option> + <option value="-i srcwinnum">use the source interval's name with the window number.</option> + </param> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Makes adjacent or sliding windows across a genome or BED file. + +@REFERENCES@ + </help> +</tool>
--- a/mapBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/mapBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -22,25 +22,18 @@ > $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED File A"/> - <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED File B"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file A"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file B"/> <param name="col" type="integer" value="5" label="Specify the column from the B file to map onto intervals in A" /> <expand macro="overlap" /> <param name="reciprocal" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Require reciprocal overlap." help="If set, the overlap between the BAM alignment and the BED interval must affect the above fraction of both the alignment and the BED interval." /> <expand macro="strand2" /> <param name="operation" type="select" label="Specify the operation"> - <option value="sum" selected="True">Sum - numeric only</option> - <option value="count">Count - numeric or text</option> - <option value="count_disctinct">Count Distinct - numeric or text</option> - <option value="min">Min - numeric only</option> - <option value="max">Max - numeric only</option> - <option value="mean">Mean - numeric only</option> - <option value="median">Median - numeric only</option> - <option value="mode">Mode - numeric only</option> - <option value="antimode">Antimode - numeric only</option> - <option value="collapse">collapse (i.e., print a comma separated list) - numeric or text</option> - <option value="distinct">distinct (i.e., print a comma separated list) - numeric or text</option> - <option value="concat">concat (i.e., print a comma separated list) - numeric or text</option> + <option value="sum">Sum - numeric only</option> + <option value="absmin">AbsMin - numeric only</option> + <option value="absmax">AbsMax - numeric only</option> + <expand macro="math_options" /> + <expand macro="additional_math_options" /> </param> <param name="split" type="boolean" checked="true" truevalue="-split" falsevalue="" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." /> @@ -70,6 +63,7 @@ The map tool is substantially faster in versions 2.19.0 and later. The plot below demonstrates the increased speed when, for example, counting the number of exome alignments that align to each exon. The bedtools times are compared to the bedops bedmap utility as a point of reference. +@REFERENCES@ </help> </tool>
--- a/maskFastaBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/maskFastaBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -14,7 +14,7 @@ -fo $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> <param format="fasta" name="fasta" type="data" label="Fasta file"/> <param name="soft" type="boolean" checked="false" truevalue="-name" falsevalue="" label="Soft-mask (that is, convert to lower-case bases) the FASTA sequence. By default, hard-masking (that is, conversion to Ns) is performed" /> @@ -31,5 +31,6 @@ .. image:: $PATH_TO_IMAGES/maskfasta-glyph.png +@REFERENCES@ </help> </tool>
--- a/mergeBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/mergeBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -18,7 +18,7 @@ > $output </command> <inputs> - <param name="input" format="bed,gff,vcf" type="data" label="Sort the following BED file"/> + <param name="input" format="bed,gff,vcf" type="data" label="Sort the following BED/VCF/GFF file"/> <param name="strandedness" type="boolean" label="Force strandedness." truevalue="-s" falsevalue="" checked="false" help="That is, only merge features that are the same strand."/> <param name="report_number" type="boolean" label="Report the number of BED entries that were merged." truevalue="-n" falsevalue="" checked="false" @@ -29,15 +29,9 @@ <param name="distance" type="integer" value="0" label="Maximum distance between features allowed for features to be merged." help="That is, overlapping and/or book-ended features are merged."/> <param name="scores" type="select" label="Report the scores of the merged features as"> - <option value="none">Do not report at all</option> + <option value="none" selected="True">Do not report at all</option> <option value="sum">Sum</option> - <option value="min">Min</option> - <option value="max">Max</option> - <option value="mean">Mean</option> - <option value="median">Median</option> - <option value="mode">Mode</option> - <option value="antimode">Antimode</option> - <option value="collapse">Semicolon separated list</option> + <expand macro="math_options" /> </param> </inputs> <outputs>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiCov.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,57 @@ +<tool id="bedtools_multicovtbed" name="MultiCovBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools multicov + -bed $input1 + -bam + #for $bam in $bams: + $bam.input + #end for + $strand + -f $overlap + $reciprocal + $split + -q $mapq + $duplicate + $failed + $proper + > $output + </command> + <inputs> + <param name="input1" format="bed" type="data" label="First sorted BED file" /> + <!-- Additional files, if the user needs more --> + <repeat name="bams" title="Add BAM files" > + <param name="input" format="bam" type="data" label="BAM file" /> + </repeat> + <expand macro="strand2" /> + <expand macro="overlap" /> + <param name="reciprocal" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Require that the fraction overlap be reciprocal for A and B. In other words, if -f is 0.90 and -r is used, this requires that B overlap 90% of A and A _also_ overlaps 90% of B." /> + <expand macro="split" /> + + <param name="duplicate" type="boolean" checked="false" truevalue="-D" falsevalue="" label="Include duplicate reads. Default counts non-duplicates only" /> + <param name="failed" type="boolean" checked="false" truevalue="-F" falsevalue="" label=" Include failed-QC reads. Default counts pass-QC reads only" /> + <param name="proper" type="boolean" checked="false" truevalue="-p" falsevalue="" label="Only count proper pairs. Default counts all alignments with MAPQ > -q argument, regardless of the BAM FLAG field." /> + </inputs> + + <outputs> + <data format="bed" name="output" label="" /> + </outputs> + <help> + +**What it does** + +bedtools multicov, reports the count of alignments from multiple position-sorted and indexed BAM files that overlap intervals in a BED file. Specifically, for each BED interval provided, it reports a separate count of overlapping alignments from each BAM file. + +.. class:: infomark + +bedtools multicov depends upon index BAM files in order to count the number of overlaps in each BAM file. As such, each BAM file should be position sorted (samtool sort aln.bam aln.sort) and indexed (samtools index aln.sort.bam) with either samtools or bamtools. + +@REFERENCES@ + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nucBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,38 @@ +<tool id="bedtools_nucbed" name="NucBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools nuc + $strand + $seq + $pattern + $case + -fi $fasta + -bed $inputA + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="fasta" name="fasta" type="data" label="Fasta file"/> + + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Profile the sequence according to strand." /> + <param name="seq" type="boolean" checked="false" truevalue="-seq" falsevalue="" label="Print the extracted sequence." /> + <param name="pattern" type="boolean" checked="false" truevalue="-pattern" falsevalue="" label="Report the number of times a user-defined sequence is observed (case-sensitive)." /> + <param name="case" type="boolean" checked="false" truevalue="-C" falsevalue="" label="Igore case when matching -pattern." /> + </inputs> + <outputs> + <data format="fasta" name="output" /> + </outputs> + <help> + +**What it does** + +Profiles the nucleotide content of intervals in a fasta file. + +@REFERENCES@ + </help> +</tool>
--- a/overlapBed.xml Thu Jun 05 15:25:18 2014 -0400 +++ b/overlapBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -12,8 +12,8 @@ > $output </command> <inputs> - <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED or BAM file"/> - <param name="cols" type="text" value="" area="True" size="40x10" label="Specify the columns (1-based) for the starts and ends of the features for which you’d like to compute the overlap/distance. The columns must be listed in the following order: start1,end1,start2,end2" /> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="cols" type="text" value="" area="True" size="20x10" label="Specify the columns (1-based) for the starts and ends of the features for which you’d like to compute the overlap/distance. The columns must be listed in the following order: start1,end1,start2,end2" /> </inputs> <outputs> <data format_source="inputA" name="output" metadata_source="inputA" label="Overlap of ${inputA.name}"/> @@ -24,5 +24,6 @@ overlap computes the amount of overlap (in the case of positive values) or distance (in the case of negative values) between feature coordinates occurring on the same input line and reports the result at the end of the same line. In this way, it is a useful method for computing custom overlap scores from the output of other BEDTools. +@REFERENCES@ </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/randomBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,35 @@ +<tool id="bedtools_randombed" name="RandomBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools random + -g $genome + -l $length + -n $intervals + #if $seed.choose: + -seed $seed.seed + #end if + </command> + <inputs> + <expand macro="genome" /> + <param name="length" type="integer" value="100" label="The length of the intervals to generate." /> + <param name="intervals" type="integer" value="1000000" label="The number of intervals to generate." /> + <expand macro="seed" /> + </inputs> + <outputs> + <data format="bed" name="output" /> + + </outputs> + <help> + +**What it does** + +bedtools random will generate a random set of intervals in BED6 format. One can specify both the number (-n) and the size (-l) of the intervals that should be generated. + +@REFERENCES@ + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reldist.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,37 @@ +<tool id="bedtools_reldistbed" name="ReldistBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools reldist + -a $inputA + -b $inputB + $detail + </command> + <inputs> + <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> + + <param name="detail" type="boolean" checked="false" truevalue="-detail" falsevalue="" label="Instead of a summary, report the relative distance for each interval in A" help="" /> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label="Intersection of ${inputA.name} and ${inputB.name}"/> + </outputs> + <help> + +**What it does** + +Traditional approaches to summarizing the similarity between two sets of genomic intervals are based upon the number or proportion of intersecting intervals. However, such measures are largely blind to spatial correlations between the two sets where, dpesite consistent spacing or proximity, intersections are rare (for example, enhancers and transcription start sites rarely overlap, yet they are much closer to one another than two sets of random intervals). Favorov et al [1] proposed a relative distance metric that describes distribution of relative distances between each interval in one set nd the two closest intervals in another set (see figure above). If there is no spatial correlation between the two sets, one would expect the relative distances to be uniformaly distributed among the relative distances ranging from 0 to 0.5. If, however, the intervals tend to be much closer than expected by chance, the distribution of observed relative distances would be shifted towards low relative distance values (e.g., the figure below). +.. image:: $PATH_TO_IMAGES/reldist-glyph.png + + +.. image:: $PATH_TO_IMAGES/reldist-plot.png +.. class:: infomark + +@REFERENCES@ + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shuffleBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,69 @@ +<tool id="bedtools_shufflebed" name="ShuffleBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools shuffle + -g $genome + -i $inputA + $bedpe + -n $intervals + #if $seed.choose: + -seed $seed.seed + #end if + #if $excl.choose: + -excl $excl.excl + #end if + #if $incl.choose: + -incl $incl.incl + #end if + $chrom + -f $overlap + $chromfirst + $nooverlap + $allowBeyond + -maxTries $maxtries + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param name="bedpe" type="boolean" label="The file is in BEDPE format" selected="False" truevalue="-bedpe" falsevalue="" /> + </when> + <expand macro="genome" /> + <param name="chrom" type="boolean" label="Keep features in the input file on the same chromosome. Solely permute their location on the chromosome. By default, both the chromosome and position are randomly chosen" selected="False" truevalue="-chrom" falsevalue="" /> + <param name="intervals" type="integer" value="1000000" label="The number of intervals to generate." /> + <expand macro="seed" /> + <conditional name="excl"> + <param name="choose" type="boolean" label="Choose a BED file of coordinates in which features from -i should not be placed?" selected="False" truevalue="True" falsevalue="False" /> + <when value="True"> + <param name="excl" type="data" format="bed" label="Choose File" /> + </conditional> + <conditional name="incl"> + <param name="choose" type="boolean" label="Choose a BED file of coordinates in which features from -i should be placed.?" selected="False" truevalue="True" falsevalue="False" /> + <when value="True"> + <param name="incl" type="data" format="bed" label="Choose File" /> + </when> + </conditional> + <expand macro="overlap" /> + <param name="chromfirst" type="boolean" label="Instead of choosing a position randomly among the entire genome (the default), first choose a chrom randomly, and then choose a random start coordinate on that chrom. This leads to features being ~uniformly distributed among the chroms, as opposed to features being distribute as a function of chrom size." selected="False" truevalue="-chromFirst" falsevalue="" /> + + <param name="maxtries" type="integer" value="1000" label="Max. number of attempts to find a home for a shuffled interval in the presence of -incl or -excl. Default = 1000." /> + <param name="nooverlap" type="boolean" label="Don’t allow shuffled intervals to overlap" selected="False" truevalue="-noOverlapping" falsevalue="" /> + <param name="allowBeyond" type="boolean" label="Allow the original the length of the original records to extebd beyond the length of the chromosome." selected="False" truevalue="-allowBeyondChromEnd" falsevalue="" /> + </inputs> + <outputs> + <data format="bed" name="output" /> + + </outputs> + <help> + +**What it does** + +bedtools shuffle will randomly permute the genomic locations of a feature file among a genome defined in a genome file. One can also provide an “exclusions” BED/GFF/VCF file that lists regions where you do not want the permuted features to be placed. For example, one might want to prevent features from being placed in known genome gaps. shuffle is useful as a null basis against which to test the significance of associations of one feature with another. +.. image:: $PATH_TO_IMAGES/shuffle-glyph.png +@REFERENCES@ + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slopBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,49 @@ +<tool id="bedtools_slopbed" name="SlopBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools slop + $pct + $strand + -g $genome + -i $inputA + #if addition.addition_select = 'b': + -b $b + #else: + -l $l + -r $r + #end if + $header + + > $output + + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <expand macro="genome" /> + <param name="pct" type="boolean" checked="false" truevalue="-pct" falsevalue="" label="Define -l and -r as a fraction of the feature’s length" help="E.g. if used on a 1000bp feature, -l 0.50, will add 500 bp “upstream”" /> + <param name="strand" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Define -l and -r based on strand" help="For example. if used, -l 500 for a negative-stranded feature, it will add 500 bp to the end coordinate" /> + <expand macro="addition" /> + <param name="header" type="boolean" checked="true" truevalue="-header" falsevalue="" label="Print the header from the input file prior to results." /> + + </inputs> + <outputs> + <data format="bed" name="output" label=""/> + </outputs> + <help> + +**What it does** + +bedtools slop will increase the size of each feature in a feature file by a user-defined number of bases. While something like this could be done with an awk '{OFS="\t" print $1,$2-<slop>,$3+<slop>}', bedtools slop will restrict the resizing to the size of the chromosome (i.e. no start < 0 and no end > chromosome size). +.. image:: $PATH_TO_IMAGES/slop-glyph.png + +.. class:: warningmark + +In order to prevent the extension of intervals beyond chromosome boundaries, bedtools slop requires a genome file defining the length of each chromosome or contig. +@REFERENCES@ + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/subtractBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,47 @@ +<tool id="bedtools_subtractbed" name="SubtractBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools subtract + $strand + -a $inputA + -b $inputB + -f $overlap + $removeIfOverlap + > $output + </command> + <inputs> + <param format="bed,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> + <expand macro="strand2" /> + <expand macro="overlap" /> + + <param name="strand" type="select" label="Calculation based on strandedness?"> + <option value="" selected="True">Overlaps on either strand</option> + <option value="-s">Only overlaps occurring on the **same** strand.</option> + <option value="-S">Only overlaps occurring on the **opposite** strand.</option> + </param> + + <param name="removeIfOverlap" type="select" label="Calculation based on strandedness?"> + <option value="" selected="True">Dont Remove entire feature on overlap</option> + <option value="-A">Remove entire feature if any overlap. That is, by default, only subtract the portion of A that overlaps B. Here, if any overlap is found (or -f amount), the entire feature is removed.</option> + <option value="-N">Same as -A except when used with -f, the amount is the sum of all features (not any single feature).</option> + </param> + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +bedtools subtract searches for features in B that overlap A. If an overlapping feature is found in B, the overlapping portion is removed from A and the remaining portion of A is reported. If a feature in B overlaps all of a feature in A, the A feature will not be reported. + +.. image:: $PATH_TO_IMAGES/subtract-glyph.png +@REFERENCES@ + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tagBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,52 @@ +<tool id="bedtools_tagbed" name="TagBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools tag + -i $inputA + -files + #for $bed in beds: + $bed.input + #end for + -f $overlap + $strand + -tag $tag + $field + > $output + </command> + <inputs> + <param format="bam" name="inputA" type="data" label="BAM file"/> + + <repeat name="beds" title="Add files" > + <param name="input" format="bed,gff,vcf" type="data" label="BED/VCF/GFF file" /> + </repeat> + <expand macro="strand2" /> + <expand macro="overlap" /> + + <param name="tag" type="text" value="YB" label="Dictate what the tag should be." /> + <param name="field" type="select" label="Use which field from the annotation files to populate tags?"> + <option value="-labels" selected="True">labels</option> + <option value="-scores">Scores</option> + <option value="-names">Names</option> + <option value="-labels -intervals">Intervals</option> + </param> + </inputs> + </inputs> + + <outputs> + <data format="bed" name="output" label="" /> + </outputs> + <help> + +**What it does** + +Annotates a BAM file based on overlaps with multiple BED/GFF/VCF files on the intervals in an input bam file + +@REFERENCES@ + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/A.bed Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,5 @@ +chr1 100 200 +chr1 180 250 +chr1 250 500 +chr1 501 1000 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expandInput.bed Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,2 @@ +chr1 10 20 1,2,3 10,20,30 +chr1 40 50 4,5,6 40,50,60
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/groupbyinput.bed Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,14 @@ +chr21 9719758 9729320 variant1 chr21 9719768 9721892 ALR/Alpha 1004 + +chr21 9719758 9729320 variant1 chr21 9721905 9725582 ALR/Alpha 1010 + +chr21 9719758 9729320 variant1 chr21 9725582 9725977 L1PA3 3288 + +chr21 9719758 9729320 variant1 chr21 9726021 9729309 ALR/Alpha 1051 + +chr21 9729310 9757478 variant2 chr21 9729320 9729809 L1PA3 3897 - +chr21 9729310 9757478 variant2 chr21 9729809 9730866 L1P1 8367 + +chr21 9729310 9757478 variant2 chr21 9730866 9734026 ALR/Alpha 1036 - +chr21 9729310 9757478 variant2 chr21 9734037 9757471 ALR/Alpha 1182 - +chr21 9795588 9796685 variant3 chr21 9795589 9795713 (GAATG)n 308 + +chr21 9795588 9796685 variant3 chr21 9795736 9795894 (GAATG)n 683 + +chr21 9795588 9796685 variant3 chr21 9795911 9796007 (GAATG)n 345 + +chr21 9795588 9796685 variant3 chr21 9796028 9796187 (GAATG)n 756 + +chr21 9795588 9796685 variant3 chr21 9796202 9796615 (GAATG)n 891 + +chr21 9795588 9796685 variant3 chr21 9796637 9796824 (GAATG)n 621 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mygenome.bed Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,2 @@ +chr1 1000 +chr2 800
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/windowBed.xml Wed Jun 18 12:40:28 2014 -0400 @@ -0,0 +1,73 @@ +<tool id="bedtools_windowbed" name="WindowBed" version="0.2.0"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command> + bedtools window + #if $inputA.ext == "bam": + -abam $inputA + #else: + -a $inputA + #end if + -b $inputB + $ubam + $bed + $strandB + #if addition.addition_select = 'b': + -b $b + #elif addition.addition_select = 'lr': + -l $l + -r $r + #end if + $original + $number + $nooverlaps + $header + > $output + </command> + <inputs> + <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/> + <param format="bed,gff,vcf,gff3" name="inputB" type="data" label="BED/VCF/GFF file"/> + <param name="ubam" type="boolean" checked="false" truevalue="-ubam" falsevalue="" label="Write uncompressed BAM output. The default is write compressed BAM output.." /> + <param name="bed" type="boolean" checked="false" truevalue="bed" falsevalue="" label="Write uncompressed BAM output. The default is write compressed BAM output.." /> + <conditional name="addition"> + <param name="addition_select" type="select" label="Choose what you want to do"> + <option value="huhn" selected="True">Do not change added base pairs</option> + <option value="b">Add Base pairs for **both** upstream and downstream of each entry in A when searching for overlaps in B</option> + <option value="lr">Add Base pairs **separately** for upstream and downstream of each entry in A when searching for overlaps in B</option> + </param> + <when value="b"> + <param name="b" label="Give Value" type="integer" value="1000" /> + </when> + <when value="lr"> + <param name="l" label="Base pairs added upstream (left of) of each entry in A when searching for overlaps in B. Allows one to create assymetrical “windows”. Default is 1000bp." type="integer" value="1000" /> + <param name="r" label="Base pairs added downstream (right of) of each entry in A when searching for overlaps in B. Allows one to create assymetrical “windows”. Default is 1000bp." type="integer" value="1000" /> + </when> + </conditional> + <param name="strandB" type="select" label="Calculation based on strandedness?"> + <option value="" selected="True">Report any hit in B</option> + <option value="-sm">Only report hits in B that overlap A on the **same** strand</option> + <option value="-Sm">Only report hits in B that overlap A on the **opposite** strand.</option> + </param> + <param name="original" type="boolean" checked="false" truevalue="-u" falsevalue="" label="Write original A entry once if any overlaps found in B. In other words, just report the fact at least one overlap was found in B." /> + <param name="number" type="boolean" checked="false" truevalue="-c" falsevalue="" label="For each entry in A, report the number of hits in B while restricting to -f. Reports 0 for A entries that have no overlap with B." /> + <param name="nooverlaps" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Only report those entries in A that have no overlaps with B." /> + <param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." /> + + </inputs> + <outputs> + <data format_source="inputA" name="output" metadata_source="inputA" label=""/> + </outputs> + <help> + +**What it does** + +Similar to bedtools intersect, window searches for overlapping features in A and B. However, window adds a specified number (1000, by default) of base pairs upstream and downstream of each feature in A. In effect, this allows features in B that are “near” features in A to be detected. + +.. image:: $PATH_TO_IMAGES/window-glyph.png +@REFERENCES@ + </help> +</tool>