view bamtools-filter.xml @ 15:ab1de07c641b draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/bamtools/bamtools_filter commit 8d5ae1d04c43988fdcc458f4f08376a15e72db8e"
author iuc
date Thu, 20 Feb 2020 22:22:59 +0000
parents ab0a7a622ff6
children
line wrap: on
line source

<tool id="bamFilter" name="Filter" version="2.4.1">
    <description>BAM datasets on a variety of attributes</description>
    <macros>
          <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
        <![CDATA[
            cp '$script_file' '$out_file2' &&
            ln -s '${input_bam}' localbam.bam &&
            ln -s '${input_bam.metadata.bam_index}' localbam.bam.bai &&
            cat '$script_file' &&
            bamtools filter -script '$script_file' -in localbam.bam -out '$out_file1'
        ]]>
    </command>
    <configfiles>
        <configfile name="script_file">
<![CDATA[
<% import json %>
#set $config = dict()
#if $conditions:
    #set $config[ 'filters' ] = []
    #for $i, $condition in enumerate( $conditions, start=1 ):
        #set $filter = dict( id=str( $i ) )
        #for $j, $s in enumerate( $condition.filters, start=1 ):
            #set $filter[ $str( s[ 'bam_property' ][ 'bam_property_selector' ] ) ] = $str( s[ 'bam_property' ][ 'bam_property_value' ] )
        #end for
        $(config[ 'filters' ].append( $filter ))
    #end for
    #if str( $rule_configuration.rules_selector ) == "true":
        #set $config[ 'rule' ] = str( $rule_configuration.rules )
    #end if
#end if
$json.dumps( $config, indent=4 )
]]>
        </configfile>
    </configfiles>
    <inputs>
        <param name="input_bam" type="data" format="bam" label="BAM dataset(s) to filter" />
        <repeat name="conditions" title="Condition" min="1">
            <repeat name="filters" title="Filter" min="1">
                <conditional name="bam_property">
                    <param name="bam_property_selector" type="select" label="Select BAM property to filter on">
                        <option value="alignmentFlag"/>
                        <option value="cigar"/>
                        <option value="insertSize"/>
                        <option value="isDuplicate"/>
                        <option value="isFailedQC"/>
                        <option value="isFirstMate"/>
                        <option value="isMapped"/>
                        <option value="isMateMapped"/>
                        <option value="isMateReverseStrand"/>
                        <option value="isPaired"/>
                        <option value="isPrimaryAlignment"/>
                        <option value="isProperPair"/>
                        <option value="isReverseStrand"/>
                        <option value="isSecondMate"/>
                        <option selected="True" value="mapQuality"/>
                        <option value="matePosition"/>
                        <option value="mateReference"/>
                        <option value="name"/>
                        <option value="position"/>
                        <option value="queryBases"/>
                        <option value="reference"/>
                        <option value="tag"/>
                    </param>
                    <!-- would be fanstastic to have AND and OR constructs in when statements -->
                    <when value="alignmentFlag">
                        <param name="bam_property_value" type="integer" value="3" label="Filter on this alignment flag" help="Default (3) is for a paired read mapped in a proper pair"/>
                    </when>
                    <when value="cigar">
                        <param name="bam_property_value" type="text" value="101M" label="Filter on this CIGAR string" help="Default (101M) is for 101 continuously matched bases"/>
                    </when>
                    <when value="insertSize">
                        <param name="bam_property_value" type="text" value="&gt;=250" label="Filter on insert size" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select pairs with insert size above 250 nt use &quot;&gt;=250&quot;">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/><add value="-"/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="isDuplicate">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads makwed as duplicates" help="Checked = Read IS Duplicate, Empty = Read is NOT Duplicate" />
                    </when>
                    <when value="isFailedQC">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads failing QC" help="Checked = Failed QC, Empty = Passed QC"/>
                    </when>
                    <when value="isFirstMate">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select first mate in a read pair" help="Checked = is first mate, Empty = is NOT first mate"/>
                    </when>
                    <when value="isMapped">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Selected mapped reads" help="Checked = Mapped, Empty = NOT mapped"/>
                    </when>
                    <when value="isMateMapped">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mapped mate" help="Checked = Mate IS mapped Empty = Mate is NOT mapped"/>
                    </when>
                    <when value="isMateReverseStrand">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mate on the reverse strand" help="Checked = Mate IS on reverse strand, Empty = Mate is NOT on the reverse strand"/>
                    </when>
                    <when value="isPaired">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select paired reads" help="Checked = Read IS paired, Empty = Read is NOT paired"/>
                    </when>
                    <when value="isPrimaryAlignment">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select BAM records for primary alignments" help="Checked = Alignment IS primary, Empty = Alignment is NOT primary"/>
                    </when>
                    <when value="isProperPair">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select properly paired reads" help="Checked = Read IS in proper pair, Empty = Read is NOT in the proper pair"/>
                    </when>
                    <when value="isReverseStrand">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads in the reverse strand only" help="Checked = Read IS on the reverse strand, Empty = Read is NOT on the reverse strand"/>
                    </when>
                    <when value="isSecondMate">
                        <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select second mate in a read pair" help="Checked = Read IS second mate, Empty = Read is NOT second mate"/>
                    </when>
                    <when value="mapQuality">
                        <param name="bam_property_value" type="text" value="20" label="Filter on read mapping quality (phred scale)" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads with mapping quality of at least 30 use &quot;&gt;=30&quot;">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="matePosition">
                        <param name="bam_property_value" type="text" value="1000000" label="Filter on the position of the mate" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads with mate (second end) mapping after position 1,000,000 use &quot;&gt;1000000&quot;">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="mateReference">
                        <param name="bam_property_value" type="text" value="chr22" label="Filter on reference name for the mate" help="You can use = and ! (not) in your expression. E.g., to select reads with mates mapping to chrM use &quot;chr22&quot;">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="name">
                        <param name="bam_property_value" type="text" label="Filter on read name" help="You can use = and ! (not) in your expression.">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="position">
                        <param name="bam_property_value" type="text" value="500000" label="Filter on the position of the read" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads mapping after position 5,000 use &quot;&gt;5000&quot;">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="queryBases">
                        <param name="bam_property_value" type="text" value="ttagggttagg" label="Filter on a sequence motif" help="You can use ! (not) in your expression">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="reference">
                        <param name="bam_property_value" type="text" value="chr22" label="Filter on the reference name for the read" help="You can use ! (not) in your expression">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="tag">
                        <param name="bam_property_value" type="text" value="NM:&gt;1" label="Filter on a particular tag" help="You can use +, -, &gt;, &lt;, =, and ! (not).  Tag name and its value must be separated by &quot;:&quot;. E.g., to obtain reads with at least one mismatch use &quot;NM:&gt;1&quot;">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value=":!=+-"/></valid>
                            </sanitizer>
                        </param>
                    </when>
                </conditional>
            </repeat>
        </repeat>
        <conditional name="rule_configuration">
            <param name="rules_selector" type="boolean" truevalue="true" falsevalue="false" label="Would you like to set rules?" help="Allows complex logical constructs. See Example 4 below." />
            <when value="false"/>
            <when value="true">
                <param name="rules" type="text" label="Enter rules here" help="This option can only be used with at least two conditions. Read help below (Example 4) to understand how it works." >
                    <sanitizer invalid_char="">
                        <valid initial="string.printable"/>
                    </sanitizer>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="txt" name="out_file2" label="${tool.name} on ${on_string}: JSON filter rules" />
        <data name="out_file1" format="bam" label="${tool.name} on ${on_string}: Filtered BAM" />
    </outputs>
    <tests>
        <test>
            <param name="input_bam" ftype="bam" value="bamtools-input1.bam"/>
            <param name="bam_property_selector" value="mapQuality"/>
            <param name="bam_property_value" value="&gt;20"/>
            <output name="out_file1" ftype="bam" file="bamtools-test1.bam" />
        </test>
    </tests>
    <help>
<![CDATA[
**What is does**

BAMTools filter is a very powerful utility to perform complex filtering of BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).

-----

**How it works**

The tool use logic relies on the three concepts: (1) input BAM, (2) groups, and (3) filters.

*Input BAM(s)*

The input BAM is self-explanatory. This is the dataset you will be filtering. The tool can accept just one or multiple BAM files. To filter on multiple BAMs just add them by clicking **Add new BAM dataset(s) to filter**

*Conditions and Filters*

Conditions for filtering BAM files can be arranged in **Groups and Filters**. While it can be confusing at first this is what gives ultimate power to this tools. So try to look at the examples we are supplying below.

-----

**Example 1. Using a single filter**

When filtering on a single condition there is no need to worry about filters and conditions. Just choose a filter from the **Select BAM property to filter on:** dropdown and enter a value (or click a checkbox for binary filters).
For example, for retaining reads with mapping quality of at least 20 one would set the tool interface as shown below:

.. image:: single-filter.png

-----

**Example 2. Using multiple filters**

Now suppose one needs to extract reads that (1) have mapping quality of at least 20, (2) contain at least 1 mismatch, and (3) are mapping onto forward strand only.
To do so we will use three filters as shown below (multiple filters are added to the interface by clicking on the **Add new Filter** button):

.. image:: multiple-filters.png

In this case (you can see that the three filters are grouped within a single Condition - **Condition 1**) the filter too use logical **AND** to perform filtering.
In other words only reads that (1) have mapping quality of at least 20 **AND** (2) contain at least 1 mismatch **AND** are mapping onto forward strand will be returned in this example.

-----

**Example 3. Complex filtering with multiple conditions**

Suppose now you would like to select **either** reads that (**1**) have (*1.1*) no mismatches and (*1.2*) are on the forward strand **OR** (**2**) reads that have (*2.1*)
at least one mismatch and (*2.2*) are on the reverse strand. In this scenario we have to set up two conditions: (**1**) and (**2**) each with two filters: *1.1* and *1.2* as well as *2.1* and *2.2*.
The following screenshot expalins how this can be done:

.. image:: complex-filters.png

-----

**Example 4. Even more complex filtering with Rules**

In the above example we have used two conditions (Condition 1 and Condition 2). Using multiple conditions allows to combine them and a variety of ways to enable even more powerful filtering.
For example, suppose get all reads that (**1**) do NOT map to mitochondria and either (**2**) have mapping quality over 20, or (**3**) are in properly mapped pairs. The logical rule to enable such
filtering will look like this::

 !(1) & (2 | 3)

Here, numbers 1, 2, and 3 represent conditions. The following screenshot illustrates how to do this in Galaxy:

.. image:: rule.png

There are three conditions here, each with a single filter. A text entry area that can be opened by clicking on the **Would you like to set rules?** checkbox enables you to enter a rule.
Here numbers correspond to numbers of conditions as they are shown in the interface. E.g., 1 corresponds to condition 1, 2 to condition 2 and so on... In human language this means::

 NOT condition 1 AND (condition 2 OR condition 3)

-----

**JSON script file**

This tool produces two outputs. One of the them is a BAM file containing filtered reads. The other is a JSONified script. It can help you to see how your instructions are sent to BAMTools.
For instance, the example 4 looks like this in the JSON form::

       {
        "filters":
        [
          { "id": "1",
            "tag":"NM:=0",
            "isReverseStrand":"false"
          },
          { "id": "2",
            "tag":"NM:>0",
            "isReverseStrand":"true"
          }
        ]
      }


-----

**More information**

.. class:: infomark

Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
]]>
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btr174</citation>
    </citations>
</tool>