7
|
1 <tool id="samtools_filter" name="Filter SAM or BAM" version="1.1.1">
|
|
2 <description>files on FLAG MAPQ RG LN or by region</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.1.18">samtools</requirement>
|
|
5 </requirements>
|
|
6 <!--
|
|
7 samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag] [-F skipFlag] [-q minMapQ] [-l library] [-r readGroup] [-R rgFile] <in.bam>|<in.sam> [region1 [...]]
|
|
8 Usage: samtools view [options] <in.bam>|<in.sam> [region1 [...]]
|
|
9
|
|
10 Options: -b output BAM
|
|
11 -h print header for the SAM output
|
|
12 -H print header only (no alignments)
|
|
13 -S input is SAM
|
|
14 -u uncompressed BAM output (force -b)
|
|
15 -1 fast compression (force -b)
|
|
16 -x output FLAG in HEX (samtools-C specific)
|
|
17 -X output FLAG in string (samtools-C specific)
|
|
18 -c print only the count of matching records
|
|
19 -L FILE output alignments overlapping the input BED FILE [null]
|
|
20 -t FILE list of reference names and lengths (force -S) [null]
|
|
21 -T FILE reference sequence file (force -S) [null]
|
|
22 -o FILE output file name [stdout]
|
|
23 -R FILE list of read groups to be outputted [null]
|
|
24 -f INT required flag, 0 for unset [0]
|
|
25 -F INT filtering flag, 0 for unset [0]
|
|
26 -q INT minimum mapping quality [0]
|
|
27 -l STR only output reads in library STR [null]
|
|
28 -r STR only output reads in read group STR [null]
|
|
29 -? longer help
|
|
30 -->
|
|
31 <command>
|
|
32 ##set up input files, regions requires input.bam and input.bai
|
|
33 #if isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('bam').__class__):
|
|
34 #set $input = 'input.bam'
|
|
35 ln -s $input1 $input &&
|
|
36 ln -s $input1.metadata.bam_index input.bai &&
|
|
37 #elif isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('sam').__class__):
|
|
38 #set $input = 'input.sam'
|
|
39 ln -s $input1 $input &&
|
|
40 #end if
|
|
41 samtools view -o "$output1" $header
|
|
42
|
|
43 #if $input1.datatype.file_ext == 'sam':
|
|
44 -S
|
|
45 #end if
|
|
46
|
|
47 #if $outputtype.__str__ == "bam":
|
|
48 -b
|
|
49 #end if
|
|
50
|
|
51
|
|
52 #if $mapq.__str__ != '':
|
|
53 -q $mapq
|
|
54 #end if
|
|
55 #if $flag.filter.__str__ == 'yes':
|
|
56 #if $flag.reqBits.__str__ != 'None':
|
|
57 #set $reqs = $flag.reqBits.__str__.split(',')
|
|
58 #set $reqFlag = 0
|
|
59 #for $xn in $reqs:
|
|
60 #set $reqFlag += int(xn,16)
|
|
61 #end for
|
|
62 -f $hex($reqFlag)
|
|
63 #end if
|
|
64 #if $flag.skipBits.__str__ != 'None':
|
|
65 #set $skips = $flag.skipBits.__str__.split(',')
|
|
66 #set $skipFlag = 0
|
|
67 #for $xn in $skips:
|
|
68 #set $skipFlag += int(xn,16)
|
|
69 #end for
|
|
70 -F $hex($skipFlag)
|
|
71 #end if
|
|
72 #end if
|
|
73 #if $read_group.__str__.strip() != '':
|
|
74 -r $read_group
|
|
75 #end if
|
|
76 #if $library.__str__.strip() != '':
|
|
77 -l $library
|
|
78 #end if
|
|
79 #if $bed_file.__str__ != "None" and len($bed_file.__str__) > 0:
|
|
80 -L $bed_file
|
|
81 #end if
|
|
82 $input
|
|
83 #if $regions.__str__.strip() != '' and $input1.datatype.file_ext == 'bam':
|
|
84 $regions.__str__.strip()
|
|
85 #end if
|
|
86 ## need to redirect stderr message so galaxy does not think this failed
|
|
87 2>&1
|
|
88 </command>
|
|
89 <inputs>
|
|
90 <param name="input1" type="data" format="sam,bam" label="SAM or BAM File to Filter" />
|
|
91 <param name="header" type="select" label="Header in output">
|
|
92 <option value="-h">Include Header</option>
|
|
93 <option value="">Exclude Header</option>
|
|
94 <option value="-H">Only the Header</option>
|
|
95 </param>
|
|
96 <param name="mapq" type="integer" value="" optional="true" label="Minimum MAPQ quality score">
|
|
97 <validator type="in_range" message="The MAPQ quality score can't be negative" min="0"/>
|
|
98 </param>
|
|
99 <conditional name="flag">
|
|
100 <param name="filter" type="select" label="Filter on bitwise flag">
|
|
101 <option value="no">no</option>
|
|
102 <option value="yes">yes</option>
|
|
103 </param>
|
|
104 <when value="no"/>
|
|
105 <when value="yes">
|
|
106 <param name="reqBits" type="select" multiple="true" display="checkboxes" label="Only output alignments with all of these flag bits set" >
|
|
107 <option value="0x0001">Read is paired</option>
|
|
108 <option value="0x0002">Read is mapped in a proper pair</option>
|
|
109 <option value="0x0004">The read is unmapped</option>
|
|
110 <option value="0x0008">The mate is unmapped</option>
|
|
111 <option value="0x0010">Read strand</option>
|
|
112 <option value="0x0020">Mate strand</option>
|
|
113 <option value="0x0040">Read is the first in a pair</option>
|
|
114 <option value="0x0080">Read is the second in a pair</option>
|
|
115 <option value="0x0100">The alignment or this read is not primary</option>
|
|
116 <option value="0x0200">The read fails platform/vendor quality checks</option>
|
|
117 <option value="0x0400">The read is a PCR or optical duplicate</option>
|
|
118 </param>
|
|
119 <param name="skipBits" type="select" multiple="true" display="checkboxes" label="Skip alignments with any of these flag bits set" >
|
|
120 <option value="0x0001">Read is paired</option>
|
|
121 <option value="0x0002">Read is mapped in a proper pair</option>
|
|
122 <option value="0x0004">The read is unmapped</option>
|
|
123 <option value="0x0008">The mate is unmapped</option>
|
|
124 <option value="0x0010">Read strand</option>
|
|
125 <option value="0x0020">Mate strand</option>
|
|
126 <option value="0x0040">Read is the first in a pair</option>
|
|
127 <option value="0x0080">Read is the second in a pair</option>
|
|
128 <option value="0x0100">The alignment or this read is not primary</option>
|
|
129 <option value="0x0200">The read fails platform/vendor quality checks</option>
|
|
130 <option value="0x0400">The read is a PCR or optical duplicate</option>
|
|
131 </param>
|
|
132 </when>
|
|
133 </conditional>
|
|
134 <param name="library" type="text" value="" size="20" label="Select alignments from Library"
|
|
135 help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/>
|
|
136 <param name="read_group" type="text" value="" size="20" label="Select alignments from Read Group"
|
|
137 help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/>
|
|
138 <param name="bed_file" type="data" format="bed" optional="true" label="Output alignments overlapping the regions in the BED FILE"/>
|
|
139 <param name="regions" type="text" value="" size="180" label="Select regions (only used when the input is in BAM format)"
|
|
140 help="region should be presented in one of the following formats: `chr1', `chr2:1,000' and `chr3:1000-2,000'"/>
|
|
141 <param name="outputtype" type="select" label="Select the output format">
|
|
142 <option value="bam">bam</option>
|
|
143 <option value="sam">sam</option>
|
|
144 </param>
|
|
145 </inputs>
|
|
146 <outputs>
|
|
147 <data name="output1" format_source="input1" label="${tool.name} on ${on_string}: ${input1.datatype.file_ext}">
|
|
148 <change_format>
|
|
149 <when input="outputtype" value="bam" format="bam" />
|
|
150 </change_format>
|
|
151 </data>
|
|
152 </outputs>
|
|
153 <tests>
|
|
154 <test>
|
|
155 <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" />
|
|
156 <param name="header" value=""/>
|
|
157 <param name="filter" value="yes"/>
|
|
158 <param name="reqBits" value="0x0080"/>
|
|
159 <output name="output1" >
|
|
160 <assert_contents>
|
|
161 <has_text text="141" />
|
|
162 <not_has_text text="77" />
|
|
163 </assert_contents>
|
|
164 </output>
|
|
165 </test>
|
|
166 <test>
|
|
167 <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" />
|
|
168 <param name="header" value=""/>
|
|
169 <param name="filter" value="no"/>
|
|
170 <param name="read_group" value="rg1"/>
|
|
171 <output name="output1" >
|
|
172 <assert_contents>
|
|
173 <has_text text="rg1" />
|
|
174 <not_has_text text="rg2" />
|
|
175 </assert_contents>
|
|
176 </output>
|
|
177 </test>
|
|
178 <test>
|
|
179 <param name="input1" value="bam_to_sam_in1.sam" ftype="sam" />
|
|
180 <param name="header" value=""/>
|
|
181 <param name="filter" value="yes"/>
|
|
182 <param name="skipBits" value="0x0008"/>
|
|
183 <param name="mapq" value="250"/>
|
|
184 <output name="output1" >
|
|
185 <assert_contents>
|
|
186 <has_text text="both_reads_align_clip_marked" />
|
|
187 <not_has_text text="both_reads_present_only_first_aligns" />
|
|
188 </assert_contents>
|
|
189 </output>
|
|
190 </test>
|
|
191 </tests>
|
|
192 <help>
|
|
193
|
|
194
|
|
195 **What it does**
|
|
196
|
|
197 This tool uses the samtools view command in SAMTools_ toolkit to filter a SAM or BAM file on the MAPQ (mapping quality), FLAG bits, Read Group, Library, or region.
|
|
198
|
|
199 **Input**
|
|
200
|
|
201 Input is either a SAM or BAM file.
|
|
202
|
|
203 **Output**
|
|
204
|
|
205 The output file will be SAM or BAM (depending on the chosen option), filtered by the selected options.
|
|
206
|
|
207 **Options**
|
|
208
|
|
209 Filtering by read group or library requires headers in the input SAM or BAM file.
|
|
210
|
|
211 If regions are specified, only alignments overlapping the specified regions will be output. An alignment may be given multiple times if it is overlapping several regions.
|
|
212 A region can be presented, for example, in the following format::
|
|
213
|
|
214 chr2 (the whole chr2)
|
|
215 chr2:1000000 (region starting from 1,000,000bp)
|
|
216 chr2:1,000,000-2,000,000 (region between 1,000,000 and 2,000,000bp including the end points).
|
|
217
|
|
218 Note: The coordinate is 1-based.
|
|
219
|
|
220 Multiple regions may be specified, separated by a space character::
|
|
221
|
|
222 chr2:1000000-2000000 chr2:1,000,000-2,000,000 chrX
|
|
223
|
|
224
|
|
225
|
|
226 .. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
|
|
227
|
|
228 </help>
|
|
229 </tool>
|