1
|
1 <tool id="snpSift_filter" name="SnpSift Filter" version="4.0.0">
|
0
|
2 <options sanitize="False" />
|
|
3 <description>Filter variants using arbitrary expressions</description>
|
|
4 <expand macro="requirements" />
|
|
5 <macros>
|
|
6 <import>snpEff_macros.xml</import>
|
|
7 </macros>
|
|
8 <command>
|
|
9 java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse
|
|
10 #if $filtering.mode == 'field':
|
|
11 #if $filtering.replace.pass:
|
|
12 --pass
|
|
13 #if $filtering.replace.filterId and len($filtering.replace.filterId.__str__.strip()) > 0:
|
|
14 --filterId "$filtering.replace.filterId"
|
|
15 #end if
|
|
16 #end if
|
|
17 #if $filtering.addFilter and len($filtering.addFilter.__str__.strip()) > 0:
|
|
18 --addFilter "$filtering.addFilter"
|
|
19 #end if
|
|
20 #if $filtering.rmFilter and len($filtering.rmFilter.__str__.strip()) > 0:
|
|
21 --rmFilter "$filtering.rmFilter"
|
|
22 #end if
|
|
23 #end if
|
|
24 > $output
|
|
25 </command>
|
|
26 <inputs>
|
|
27 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
|
|
28 <param name="expr" type="text" label="Filter criteria" size="160" help="Need help? See below a few examples." />
|
|
29 <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" />
|
|
30 <conditional name="filtering">
|
|
31 <param name="mode" type="select" label="Filter mode">
|
|
32 <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option>
|
|
33 <option value="field">Change the FILTER field, but retain all entries</option>
|
|
34 </param>
|
|
35 <when value="entries"/>
|
|
36 <when value="field">
|
|
37 <conditional name="replace">
|
|
38 <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'"
|
|
39 help="appends an ID tag to non-matching entry FILTER "/>
|
|
40 <when value="no"/>
|
|
41 <when value="yes">
|
|
42 <param name="filterId" type="text" value="" optional="true" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." size="10"
|
|
43 help="Default ID is 'SnpSift'"/>
|
|
44 </when>
|
|
45 </conditional>
|
|
46 <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
|
|
47 <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
|
|
48 </when>
|
|
49 </conditional>
|
|
50 </inputs>
|
|
51 <configfiles>
|
|
52 <configfile name="exprFile">
|
1
|
53 $expr#slurp
|
0
|
54 </configfile>
|
|
55 </configfiles>
|
|
56
|
|
57 <outputs>
|
|
58 <data format="vcf" name="output" />
|
|
59 </outputs>
|
|
60 <expand macro="stdio" />
|
|
61 <tests>
|
|
62 <test>
|
|
63 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
64 <param name="expr" value="QUAL >= 50"/>
|
|
65 <param name="mode" value="entries"/>
|
|
66 <output name="output">
|
|
67 <assert_contents>
|
|
68 <has_text text="28837706" />
|
|
69 <not_has_text text="NT_166464" />
|
|
70 </assert_contents>
|
|
71 </output>
|
|
72 </test>
|
|
73
|
|
74 <test>
|
|
75 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
76 <param name="expr" value="(CHROM = '19')"/>
|
|
77 <param name="mode" value="entries"/>
|
|
78 <output name="output">
|
|
79 <assert_contents>
|
|
80 <has_text text="3205820" />
|
|
81 <not_has_text text="NT_16" />
|
|
82 </assert_contents>
|
|
83 </output>
|
|
84 </test>
|
|
85
|
|
86 <test>
|
|
87 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
88 <param name="expr" value="(POS >= 20175) & (POS <= 35549)"/>
|
|
89 <param name="mode" value="entries"/>
|
|
90 <output name="output">
|
|
91 <assert_contents>
|
|
92 <has_text text="20175" />
|
|
93 <has_text text="35549" />
|
|
94 <has_text text="22256" />
|
|
95 <not_has_text text="18933" />
|
|
96 <not_has_text text="37567" />
|
|
97 </assert_contents>
|
|
98 </output>
|
|
99 </test>
|
|
100
|
|
101 <test>
|
|
102 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
103 <param name="expr" value="( DP >= 5 )"/>
|
|
104 <param name="mode" value="entries"/>
|
|
105 <output name="output">
|
|
106 <assert_contents>
|
|
107 <has_text text="DP=5;" />
|
|
108 <has_text text="DP=6;" />
|
|
109 <not_has_text text="DP=1;" />
|
|
110 </assert_contents>
|
|
111 </output>
|
|
112 </test>
|
|
113 </tests>
|
|
114 <help>
|
|
115
|
|
116 **SnpSift filter**
|
|
117
|
|
118 You can filter ia vcf file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility.
|
|
119
|
|
120 Some examples:
|
|
121
|
1
|
122 - *I want just the variants from the second million bases of chr1*:
|
|
123
|
|
124 ::
|
|
125
|
|
126 ( CHROM = 'chr1' ) & ( POS > 1000000 ) & ( POS < 2000000 )
|
|
127
|
|
128 - *Filter value is either 'PASS' or it is missing*:
|
|
129
|
|
130 ::
|
|
131
|
|
132 (FILTER = 'PASS') | ( na FILTER )
|
|
133
|
|
134 - *I want to filter lines with an EFF of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*:
|
|
135
|
|
136 ::
|
|
137
|
|
138 ( EFF[*].EFFECT = 'frameshift_variant' )
|
|
139
|
|
140 - *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*:
|
|
141
|
|
142 ::
|
|
143
|
|
144 ( EFF[*].EFFECT = 'FRAME_SHIFT' )
|
|
145
|
0
|
146 - *I want to filter out samples with quality less than 30*:
|
|
147
|
|
148 ::
|
|
149
|
|
150 ( QUAL > 30 )
|
1
|
151
|
0
|
152 - *...but we also want InDels that have quality 20 or more*:
|
|
153
|
|
154 ::
|
|
155
|
|
156 (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )
|
|
157
|
|
158 - *...or any homozygous variant present in more than 3 samples*:
|
|
159
|
|
160 ::
|
|
161
|
|
162 (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )
|
|
163
|
|
164 - *...or any heterozygous sample with coverage 25 or more*:
|
|
165
|
|
166 ::
|
|
167
|
|
168 ((countHet() > 0) & (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )
|
|
169
|
|
170 - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*:
|
|
171
|
|
172 ::
|
|
173
|
|
174 (isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] ))
|
|
175
|
|
176
|
1
|
177 **For information regarding HGVS and Sequence Ontology terms versus classic names**:
|
0
|
178
|
1
|
179 - http://snpeff.sourceforge.net/SnpEff_manual.html#cmdline for the options: -classic, -hgvs, and -sequenceOntology
|
|
180 - http://snpeff.sourceforge.net/SnpEff_manual.html#input for the table containing the classic name and sequence onology term for each effect
|
|
181
|
0
|
182
|
|
183 @EXTERNAL_DOCUMENTATION@
|
|
184 http://snpeff.sourceforge.net/SnpSift.html#filter
|
|
185
|
|
186 @CITATION_SECTION@
|
|
187
|
|
188 </help>
|
|
189 </tool>
|