Mercurial > repos > devteam > bamtools_filter
view bamtools-filter.xml @ 7:255d1b5c962e draft
planemo upload commit 72e0040a5fce69961b107fe5ade9f5a935402a1a
author | devteam |
---|---|
date | Wed, 26 Aug 2015 17:20:39 -0400 |
parents | 23a1c1f66b47 |
children | 709d8669e8d6 |
line wrap: on
line source
<tool id="bamFilter" name="Filter" version="0.0.1"> <description>BAM datasets on a variety of attributes</description> <requirements> <requirement type="package" version="2.3.0_2d7685d2ae">bamtools</requirement> </requirements> <command> cat $script_file > $out_file2; #for $bam_count, $input_bam in enumerate( $input_bams ): ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" && ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && #end for bamtools filter -script $script_file #for $bam_count, $input_bam in enumerate( $input_bams ): -in "localbam_${bam_count}.bam" #end for -out $out_file1 </command> <inputs> <repeat name="input_bams" title="BAM dataset(s) to filter" min="1"> <param name="input_bam" type="data" format="bam" label="BAM dataset" /> </repeat> <repeat name="conditions" title="Condition" min="1"> <repeat name="filters" title="Filter" min="1"> <conditional name="bam_property"> <param name="bam_property_selector" type="select" label="Select BAM property to filter on"> <option value="alignmentFlag"/> <option value="cigar"/> <option value="insertSize"/> <option value="isDuplicate"/> <option value="isFailedQC"/> <option value="isFirstMate"/> <option value="isMapped"/> <option value="isMateMapped"/> <option value="isMateReverseStrand"/> <option value="isPaired"/> <option value="isPrimaryAlignment"/> <option value="isProperPair"/> <option value="isReverseStrand"/> <option value="isSecondMate"/> <option selected="True" value="mapQuality"/> <option value="matePosition"/> <option value="mateReference"/> <option value="name"/> <option value="position"/> <option value="queryBases"/> <option value="reference"/> <option value="tag"/> </param> <!-- would be fanstastic to have AND and OR constructs in when statements --> <when value="alignmentFlag"> <param name="bam_property_value" type="integer" value="3" label="Filter on this alignment flag" help="Default (3) is for a paired read mapped in a proper pair"/> </when> <when value="cigar"> <param name="bam_property_value" type="text" size="10" value="101M" label="Filter on this CIGAR string" help="Default (101M) is for 101 continuously matched bases"/> </when> <when value="insertSize"> <param name="bam_property_value" type="text" size="10" value=">=250" label="Filter on inster size" help="You can use >, <, =, and ! (not) in your expression. E.g., to select pairs with inster size above 250 nt use ">=250""> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="isDuplicate"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads makwed as duplicates" help="Checked = Read IS Duplicate, Empty = Read is NOT Duplicate" /> </when> <when value="isFailedQC"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads failing QC" help="Checked = Failed QC, Empty = Passed QC"/> </when> <when value="isFirstMate"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select first mate in a read pair" help="Checked = is first mate, Empty = is NOT first mate"/> </when> <when value="isMapped"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Selected mapped reads" help="Checked = Mapped, Empty = NOT mapped"/> </when> <when value="isMateMapped"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mapped mate" help="Checked = Mate IS mapped Empty = Mate is NOT mapped"/> </when> <when value="isMateReverseStrand"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mate on the reverse strand" help="Checked = Mate IS on reverse strand, Empty = Mate is NOT on the reverse strand"/> </when> <when value="isPaired"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select paired reads" help="Checked = Read IS paired, Empty = Read is NOT paired"/> </when> <when value="isPrimaryAlignment"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select BAM records for primary alignments" help="Checked = Alignment IS primary, Empty = Alignment is NOT primary"/> </when> <when value="isProperPair"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select properly paired reads" help="Checked = Read IS in proper pair, Empty = Read is NOT in the proper pair"/> </when> <when value="isReverseStrand"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads in the reverse strand only" help="Checked = Read IS on the reverse strand, Empty = Read is NOT on the reverse strand"/> </when> <when value="isSecondMate"> <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select second mate in a read pair" help="Checked = Read IS second mate, Empty = Read is NOT second mate"/> </when> <when value="mapQuality"> <param name="bam_property_value" type="text" value="20" label="Filter on read mapping quality (phred scale)" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mapping quality of at least 30 use ">=30""> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="matePosition"> <param name="bam_property_value" type="text" value="1000000" label="Filter on the position of the mate" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mate (second end) mapping after position 1,000,000 use ">1000000""> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="mateReference"> <param name="bam_property_value" type="text" value="chr22" label="Filter on reference name for the mate" help="You can use = and ! (not) in your expression. E.g., to select reads with mates mapping to chrM use "chr22""> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="name"> <param name="bam_property_value" type="text" label="Filter on read name" help="You can use = and ! (not) in your expression."> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="position"> <param name="bam_property_value" type="text" value="500000" label="Filter on the position of the read" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads mapping after position 5,000 use ">5000""> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="queryBases"> <param name="bam_property_value" type="text" value="ttagggttagg" label="Filter on a sequence motif" help="You can use ! (not) in your expression"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="reference"> <param name="bam_property_value" type="text" value="chr22" label="Filter on the reference name for the read" help="You can use ! (not) in your expression"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> </sanitizer> </param> </when> <when value="tag"> <param name="bam_property_value" type="text" value="NM:>1" label="Filter on a particular tag" help="You can use >, <, =, and ! (not). Tag name and its value must be separated by ":". E.g., to obtain reads with at least one mismatch use "NM:>1""> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value=":!="/></valid> </sanitizer> </param> </when> </conditional> </repeat> </repeat> <conditional name="rule_configuration"> <param name="rules_selector" type="boolean" truevalue="true" falsevalue="false" label="Would you like to set rules?" help="Allows complex logical constructs. See Example 4 below." /> <when value="true"> <param name="rules" type="text" size="20" label="Enter rules here" help="This option can only be used with at least two conditions. Read help below (Example 4) to understand how it works." > <sanitizer invalid_char=""> <valid initial="string.printable"/> </sanitizer> </param> </when> </conditional> </inputs> <configfiles> <configfile name="script_file"> ##Sets up a json configfile for bamtools filter ##If there is more than one condition prints brackets and "filters:" #if len( $conditions ) > 1 { "filters": [ #end if #for $i, $c in enumerate( $conditions, start=1 ) { "id": "$i", #for $j, $s in enumerate( $c.filters, start=1 ) ##The if below takes care of the comma at the end of last condition within group #if $j != len( $c.filters) "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}", #else "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}" #end if #end for ##The if below takes care of the comma at the end of last condition within group #if $i != len( $conditions ) }, #else } #end if #end for #if len( $conditions ) > 1 #if str( $rule_configuration.rules_selector ) == "True": ], "rule" : "${rule_configuration.rules}" #else ] #end if } #end if </configfile> </configfiles> <outputs> <data format="txt" name="out_file2" /> <data format="bam" name="out_file1" /> </outputs> <tests> <test> <param name="input_bam" ftype="bam" value="bamtools-input1.bam"/> <param name="bam_property_selector" value="mapQuality"/> <param name="bam_property_value" value=">20"/> <output name="out_file1" file="bamtools-test1.bam" ftype="bam"/> </test> </tests> <help> **What is does** BAMTools filter is a very powerful utility to perform complex filtering of BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). ----- **How it works** The tool use logic relies on the three concepts: (1) input BAM, (2) groups, and (3) filters. *Input BAM(s)* The input BAM is self-explanatory. This is the dataset you will be filtering. The tool can accept just one or multiple BAM files. To filter on multiple BAMs just add them by clicking **Add new BAM dataset(s) to filter** *Conditions and Filters* Conditions for filtering BAM files can be arranged in **Groups and Filters**. While it can be confusing at first this is what gives ultimate power to this tools. So try to look at the examples we are supplying below. ----- **Example 1. Using a single filter** When filtering on a single condition there is no need to worry about filters and conditions. Just choose a filter from the **Select BAM property to filter on:** dropdown and enter a value (or click a checkbox for binary filters). For example, for retaining reads with mapping quality of at least 20 one would set the tool interface as shown below: .. image:: single-filter.png ----- **Example 2. Using multiple filters** Now suppose one needs to extract reads that (1) have mapping quality of at least 20, (2) contain at least 1 mismatch, and (3) are mapping onto forward strand only. To do so we will use three filters as shown below (multiple filters are added to the interface by clicking on the **Add new Filter** button): .. image:: multiple-filters.png In this case (you can see that the three filters are grouped within a single Condition - **Condition 1**) the filter too use logical **AND** to perform filtering. In other words only reads that (1) have mapping quality of at least 20 **AND** (2) contain at least 1 mismatch **AND** are mapping onto forward strand will be returned in this example. ----- **Example 3. Complex filtering with multiple conditions** Suppose now you would like to select **either** reads that (**1**) have (*1.1*) no mismatches and (*1.2*) are on the forward strand **OR** (**2**) reads that have (*2.1*) at least one mismatch and (*2.2*) are on the reverse strand. In this scenario we have to set up two conditions: (**1**) and (**2**) each with two filters: *1.1* and *1.2* as well as *2.1* and *2.2*. The following screenshot expalins how this can be done: .. image:: complex-filters.png ----- **Example 4. Even more complex filtering with Rules** In the above example we have used two conditions (Condition 1 and Condition 2). Using multiple conditions allows to combine them and a variety of ways to enable even more powerful filtering. For example, suppose get all reads that (**1**) do NOT map to mitochondria and either (**2**) have mapping quality over 20, or (**3**) are in properly mapped pairs. The logical rule to enable such filtering will look like this:: !(1) & (2 | 3) Here, numbers 1, 2, and 3 represent conditions. The following screenshot illustrates how to do this in Galaxy: .. image:: rule.png There are three conditions here, each with a single filter. A text entry area that can be opened by clicking on the **Would you like to set rules?** checkbox enables you to enter a rule. Here numbers correspond to numbers of conditions as they are shown in the interface. E.g., 1 corresponds to condition 1, 2 to condition 2 and so on... In human language this means:: NOT condition 1 AND (condition 2 OR condition 3) ----- **JSON script file** This tool produces two outputs. One of the them is a BAM file containing filtered reads. The other is a JSONified script. It can help you to see how your instructions are sent to BAMTools. For instance, the example 4 looks like this in the JSON form:: { "filters": [ { "id": "1", "tag":"NM:=0", "isReverseStrand":"false" }, { "id": "2", "tag":"NM:>0", "isReverseStrand":"true" } ] } ----- **More information** .. class:: infomark Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki </help> <citations> <citation type="doi">10.1093/bioinformatics/btr174</citation> </citations> </tool>