Mercurial > repos > devteam > bamtools_filter
annotate bamtools-filter.xml @ 9:c20a4a4acf3f draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author | devteam |
---|---|
date | Mon, 09 Nov 2015 12:04:29 -0500 |
parents | 709d8669e8d6 |
children | 4089f1be1a71 |
rev | line source |
---|---|
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
1 <tool id="bamFilter" name="Filter" version="0.0.2"> |
0 | 2 <description>BAM datasets on a variety of attributes</description> |
3 <requirements> | |
4 <requirement type="package" version="2.3.0_2d7685d2ae">bamtools</requirement> | |
5 </requirements> | |
6 <command> | |
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
7 cat $script_file > $out_file2; |
0 | 8 |
9 #for $bam_count, $input_bam in enumerate( $input_bams ): | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
10 ln -s "${input_bam}" "localbam_${bam_count}.bam" && |
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
11 ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && |
0 | 12 #end for |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
13 |
0 | 14 bamtools |
15 filter | |
16 -script $script_file | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
17 |
0 | 18 #for $bam_count, $input_bam in enumerate( $input_bams ): |
19 -in "localbam_${bam_count}.bam" | |
20 #end for | |
21 -out $out_file1 | |
22 </command> | |
23 <inputs> | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
24 <param name="input_bams" type="data" format="bam" label="BAM dataset(s) to filter" min="1" multiple="True"/> |
0 | 25 <repeat name="conditions" title="Condition" min="1"> |
26 <repeat name="filters" title="Filter" min="1"> | |
27 <conditional name="bam_property"> | |
28 <param name="bam_property_selector" type="select" label="Select BAM property to filter on"> | |
29 <option value="alignmentFlag"/> | |
30 <option value="cigar"/> | |
31 <option value="insertSize"/> | |
32 <option value="isDuplicate"/> | |
33 <option value="isFailedQC"/> | |
34 <option value="isFirstMate"/> | |
35 <option value="isMapped"/> | |
36 <option value="isMateMapped"/> | |
37 <option value="isMateReverseStrand"/> | |
38 <option value="isPaired"/> | |
39 <option value="isPrimaryAlignment"/> | |
40 <option value="isProperPair"/> | |
41 <option value="isReverseStrand"/> | |
42 <option value="isSecondMate"/> | |
43 <option selected="True" value="mapQuality"/> | |
44 <option value="matePosition"/> | |
45 <option value="mateReference"/> | |
46 <option value="name"/> | |
47 <option value="position"/> | |
48 <option value="queryBases"/> | |
49 <option value="reference"/> | |
50 <option value="tag"/> | |
51 </param> | |
52 <!-- would be fanstastic to have AND and OR constructs in when statements --> | |
53 <when value="alignmentFlag"> | |
54 <param name="bam_property_value" type="integer" value="3" label="Filter on this alignment flag" help="Default (3) is for a paired read mapped in a proper pair"/> | |
55 </when> | |
56 <when value="cigar"> | |
8
709d8669e8d6
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
5
diff
changeset
|
57 <param name="bam_property_value" type="text" value="101M" label="Filter on this CIGAR string" help="Default (101M) is for 101 continuously matched bases"/> |
0 | 58 </when> |
59 <when value="insertSize"> | |
8
709d8669e8d6
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
5
diff
changeset
|
60 <param name="bam_property_value" type="text" value=">=250" label="Filter on inster size" help="You can use >, <, =, and ! (not) in your expression. E.g., to select pairs with inster size above 250 nt use ">=250""> |
0 | 61 <sanitizer invalid_char=""> |
62 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
63 </sanitizer> | |
64 </param> | |
65 </when> | |
66 <when value="isDuplicate"> | |
67 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads makwed as duplicates" help="Checked = Read IS Duplicate, Empty = Read is NOT Duplicate" /> | |
68 </when> | |
69 <when value="isFailedQC"> | |
70 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads failing QC" help="Checked = Failed QC, Empty = Passed QC"/> | |
71 </when> | |
72 <when value="isFirstMate"> | |
73 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select first mate in a read pair" help="Checked = is first mate, Empty = is NOT first mate"/> | |
74 </when> | |
75 <when value="isMapped"> | |
76 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Selected mapped reads" help="Checked = Mapped, Empty = NOT mapped"/> | |
77 </when> | |
78 <when value="isMateMapped"> | |
79 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mapped mate" help="Checked = Mate IS mapped Empty = Mate is NOT mapped"/> | |
80 </when> | |
81 <when value="isMateReverseStrand"> | |
82 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mate on the reverse strand" help="Checked = Mate IS on reverse strand, Empty = Mate is NOT on the reverse strand"/> | |
83 </when> | |
84 <when value="isPaired"> | |
85 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select paired reads" help="Checked = Read IS paired, Empty = Read is NOT paired"/> | |
86 </when> | |
87 <when value="isPrimaryAlignment"> | |
88 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select BAM records for primary alignments" help="Checked = Alignment IS primary, Empty = Alignment is NOT primary"/> | |
89 </when> | |
90 <when value="isProperPair"> | |
91 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select properly paired reads" help="Checked = Read IS in proper pair, Empty = Read is NOT in the proper pair"/> | |
92 </when> | |
93 <when value="isReverseStrand"> | |
94 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads in the reverse strand only" help="Checked = Read IS on the reverse strand, Empty = Read is NOT on the reverse strand"/> | |
95 </when> | |
96 <when value="isSecondMate"> | |
97 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select second mate in a read pair" help="Checked = Read IS second mate, Empty = Read is NOT second mate"/> | |
98 </when> | |
99 <when value="mapQuality"> | |
100 <param name="bam_property_value" type="text" value="20" label="Filter on read mapping quality (phred scale)" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mapping quality of at least 30 use ">=30""> | |
101 <sanitizer invalid_char=""> | |
102 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
103 </sanitizer> | |
104 </param> | |
105 </when> | |
106 <when value="matePosition"> | |
107 <param name="bam_property_value" type="text" value="1000000" label="Filter on the position of the mate" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mate (second end) mapping after position 1,000,000 use ">1000000""> | |
108 <sanitizer invalid_char=""> | |
109 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
110 </sanitizer> | |
111 </param> | |
112 </when> | |
113 <when value="mateReference"> | |
114 <param name="bam_property_value" type="text" value="chr22" label="Filter on reference name for the mate" help="You can use = and ! (not) in your expression. E.g., to select reads with mates mapping to chrM use "chr22""> | |
115 <sanitizer invalid_char=""> | |
116 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
117 </sanitizer> | |
118 </param> | |
119 </when> | |
120 <when value="name"> | |
121 <param name="bam_property_value" type="text" label="Filter on read name" help="You can use = and ! (not) in your expression."> | |
122 <sanitizer invalid_char=""> | |
123 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
124 </sanitizer> | |
125 </param> | |
126 </when> | |
127 <when value="position"> | |
128 <param name="bam_property_value" type="text" value="500000" label="Filter on the position of the read" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads mapping after position 5,000 use ">5000""> | |
129 <sanitizer invalid_char=""> | |
130 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
131 </sanitizer> | |
132 </param> | |
133 </when> | |
134 <when value="queryBases"> | |
135 <param name="bam_property_value" type="text" value="ttagggttagg" label="Filter on a sequence motif" help="You can use ! (not) in your expression"> | |
136 <sanitizer invalid_char=""> | |
137 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
138 </sanitizer> | |
139 </param> | |
140 </when> | |
141 <when value="reference"> | |
142 <param name="bam_property_value" type="text" value="chr22" label="Filter on the reference name for the read" help="You can use ! (not) in your expression"> | |
143 <sanitizer invalid_char=""> | |
144 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
145 </sanitizer> | |
146 </param> | |
147 </when> | |
148 <when value="tag"> | |
149 <param name="bam_property_value" type="text" value="NM:>1" label="Filter on a particular tag" help="You can use >, <, =, and ! (not). | |
150 Tag name and its value must be separated by ":". E.g., to obtain reads with at least one mismatch use "NM:>1""> | |
151 <sanitizer invalid_char=""> | |
152 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value=":!="/></valid> | |
153 </sanitizer> | |
154 </param> | |
155 </when> | |
156 </conditional> | |
157 </repeat> | |
158 </repeat> | |
159 <conditional name="rule_configuration"> | |
160 <param name="rules_selector" type="boolean" truevalue="true" falsevalue="false" label="Would you like to set rules?" help="Allows complex logical constructs. See Example 4 below." /> | |
161 <when value="true"> | |
8
709d8669e8d6
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
5
diff
changeset
|
162 <param name="rules" type="text" label="Enter rules here" help="This option can only be used with at least two conditions. Read help below (Example 4) to understand how it works." > |
0 | 163 <sanitizer invalid_char=""> |
164 <valid initial="string.printable"/> | |
165 </sanitizer> | |
166 </param> | |
167 </when> | |
168 </conditional> | |
169 </inputs> | |
170 | |
171 <configfiles> | |
172 <configfile name="script_file"> | |
173 ##Sets up a json configfile for bamtools filter | |
174 ##If there is more than one condition prints brackets and "filters:" | |
175 #if len( $conditions ) > 1 | |
176 { | |
177 "filters": | |
178 [ | |
179 #end if | |
180 #for $i, $c in enumerate( $conditions, start=1 ) | |
181 { "id": "$i", | |
182 #for $j, $s in enumerate( $c.filters, start=1 ) | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
183 ##The if below takes care of the comma at the end of last condition within group |
0 | 184 #if $j != len( $c.filters) |
185 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}", | |
186 #else | |
187 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}" | |
188 #end if | |
189 #end for | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
190 ##The if below takes care of the comma at the end of last condition within group |
0 | 191 #if $i != len( $conditions ) |
192 }, | |
193 #else | |
194 } | |
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
195 #end if |
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
196 #end for |
0 | 197 #if len( $conditions ) > 1 |
198 #if str( $rule_configuration.rules_selector ) == "True": | |
199 ], | |
200 "rule" : "${rule_configuration.rules}" | |
201 #else | |
202 ] | |
203 #end if | |
204 } | |
205 #end if | |
206 </configfile> | |
207 </configfiles> | |
208 | |
209 <outputs> | |
210 <data format="txt" name="out_file2" /> | |
211 <data format="bam" name="out_file1" /> | |
212 </outputs> | |
213 <tests> | |
214 <test> | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
215 <param name="input_bams" ftype="bam" value="bamtools-input1.bam"/> |
0 | 216 <param name="bam_property_selector" value="mapQuality"/> |
217 <param name="bam_property_value" value=">20"/> | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
218 <output name="out_file1" file="bamtools-test1.bam" ftype="bam"/> |
0 | 219 </test> |
220 </tests> | |
221 <help> | |
222 **What is does** | |
223 | |
224 BAMTools filter is a very powerful utility to perform complex filtering of BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). | |
225 | |
226 ----- | |
227 | |
228 **How it works** | |
229 | |
230 The tool use logic relies on the three concepts: (1) input BAM, (2) groups, and (3) filters. | |
231 | |
232 *Input BAM(s)* | |
233 | |
234 The input BAM is self-explanatory. This is the dataset you will be filtering. The tool can accept just one or multiple BAM files. To filter on multiple BAMs just add them by clicking **Add new BAM dataset(s) to filter** | |
235 | |
236 *Conditions and Filters* | |
237 | |
238 Conditions for filtering BAM files can be arranged in **Groups and Filters**. While it can be confusing at first this is what gives ultimate power to this tools. So try to look at the examples we are supplying below. | |
239 | |
240 ----- | |
241 | |
242 **Example 1. Using a single filter** | |
243 | |
244 When filtering on a single condition there is no need to worry about filters and conditions. Just choose a filter from the **Select BAM property to filter on:** dropdown and enter a value (or click a checkbox for binary filters). | |
245 For example, for retaining reads with mapping quality of at least 20 one would set the tool interface as shown below: | |
246 | |
5
23a1c1f66b47
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
4
diff
changeset
|
247 .. image:: single-filter.png |
0 | 248 |
249 ----- | |
250 | |
251 **Example 2. Using multiple filters** | |
252 | |
253 Now suppose one needs to extract reads that (1) have mapping quality of at least 20, (2) contain at least 1 mismatch, and (3) are mapping onto forward strand only. | |
254 To do so we will use three filters as shown below (multiple filters are added to the interface by clicking on the **Add new Filter** button): | |
255 | |
5
23a1c1f66b47
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
4
diff
changeset
|
256 .. image:: multiple-filters.png |
0 | 257 |
258 In this case (you can see that the three filters are grouped within a single Condition - **Condition 1**) the filter too use logical **AND** to perform filtering. | |
259 In other words only reads that (1) have mapping quality of at least 20 **AND** (2) contain at least 1 mismatch **AND** are mapping onto forward strand will be returned in this example. | |
260 | |
261 ----- | |
262 | |
263 **Example 3. Complex filtering with multiple conditions** | |
264 | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
265 Suppose now you would like to select **either** reads that (**1**) have (*1.1*) no mismatches and (*1.2*) are on the forward strand **OR** (**2**) reads that have (*2.1*) |
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
266 at least one mismatch and (*2.2*) are on the reverse strand. In this scenario we have to set up two conditions: (**1**) and (**2**) each with two filters: *1.1* and *1.2* as well as *2.1* and *2.2*. |
0 | 267 The following screenshot expalins how this can be done: |
268 | |
5
23a1c1f66b47
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
4
diff
changeset
|
269 .. image:: complex-filters.png |
0 | 270 |
271 ----- | |
272 | |
273 **Example 4. Even more complex filtering with Rules** | |
274 | |
275 In the above example we have used two conditions (Condition 1 and Condition 2). Using multiple conditions allows to combine them and a variety of ways to enable even more powerful filtering. | |
276 For example, suppose get all reads that (**1**) do NOT map to mitochondria and either (**2**) have mapping quality over 20, or (**3**) are in properly mapped pairs. The logical rule to enable such | |
277 filtering will look like this:: | |
278 | |
279 !(1) & (2 | 3) | |
9
c20a4a4acf3f
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/bamtools/bamtools_filter commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
devteam
parents:
8
diff
changeset
|
280 |
0 | 281 Here, numbers 1, 2, and 3 represent conditions. The following screenshot illustrates how to do this in Galaxy: |
282 | |
5
23a1c1f66b47
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
4
diff
changeset
|
283 .. image:: rule.png |
0 | 284 |
285 There are three conditions here, each with a single filter. A text entry area that can be opened by clicking on the **Would you like to set rules?** checkbox enables you to enter a rule. | |
286 Here numbers correspond to numbers of conditions as they are shown in the interface. E.g., 1 corresponds to condition 1, 2 to condition 2 and so on... In human language this means:: | |
287 | |
288 NOT condition 1 AND (condition 2 OR condition 3) | |
289 | |
290 ----- | |
291 | |
292 **JSON script file** | |
293 | |
294 This tool produces two outputs. One of the them is a BAM file containing filtered reads. The other is a JSONified script. It can help you to see how your instructions are sent to BAMTools. | |
295 For instance, the example 4 looks like this in the JSON form:: | |
296 | |
297 { | |
298 "filters": | |
299 [ | |
300 { "id": "1", | |
301 "tag":"NM:=0", | |
302 "isReverseStrand":"false" | |
303 }, | |
304 { "id": "2", | |
305 "tag":"NM:>0", | |
306 "isReverseStrand":"true" | |
307 } | |
308 ] | |
309 } | |
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
310 |
0 | 311 |
312 ----- | |
313 | |
314 **More information** | |
315 | |
316 .. class:: infomark | |
317 | |
318 Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki | |
319 | |
320 | |
321 </help> | |
322 <citations> | |
323 <citation type="doi">10.1093/bioinformatics/btr174</citation> | |
324 </citations> | |
325 </tool> |