0
|
1 <tool id="snpSift_filter" name="SnpSift Filter" version="3.2">
|
|
2 <options sanitize="False" />
|
|
3 <description>Filter variants using arbitrary expressions</description>
|
|
4 <requirements>
|
|
5 <requirement type="package" version="3.2">snpEff</requirement>
|
|
6 </requirements>
|
|
7 <command>
|
|
8 java -Xmx6G -jar \$JAVA_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse $pass
|
|
9 #if $filterId and len($filterId.__str__.strip()) > 0:
|
|
10 --filterId = "$filterId"
|
|
11 #end if
|
|
12 #if $addFilter and len($addFilter.__str__.strip()) > 0:
|
|
13 --addFilter = "$addFilter"
|
|
14 #end if
|
|
15 #if $rmFilter and len($rmFilter.__str__.strip()) > 0:
|
|
16 --rmFilter = "$rmFilter"
|
|
17 #end if
|
|
18 > $output
|
|
19 </command>
|
|
20 <inputs>
|
|
21 <param format="vcf" name="input" type="data" label="VCF input"/>
|
|
22 <param name="expr" type="text" label="Expression" size="120"/>
|
|
23 <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse. Show lines that do not match filter expression"/>
|
|
24 <param name="pass" type="boolean" truevalue="--pass" falsevalue="" checked="false" label="Use 'PASS' field instead of filtering out VCF entries"/>
|
|
25 <param name="filterId" type="text" value="" optional="true" label="ID for this filter (##FILTER tag in header and FILTER VCF field)." size="10"/>
|
|
26 <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
|
|
27 <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
|
|
28 </inputs>
|
|
29 <configfiles>
|
|
30 <configfile name="exprFile">
|
|
31 $expr
|
|
32 </configfile>
|
|
33 </configfiles>
|
|
34
|
|
35 <outputs>
|
|
36 <data format="vcf" name="output" />
|
|
37 </outputs>
|
|
38 <stdio>
|
|
39 <exit_code range=":-1" level="fatal" description="Error: Cannot open file" />
|
|
40 <exit_code range="1:" level="fatal" description="Error" />
|
|
41 </stdio>
|
|
42
|
|
43 <tests>
|
|
44
|
|
45 <test>
|
|
46 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
47 <param name="expr" value="QUAL >= 50"/>
|
|
48 <output name="output">
|
|
49 <assert_contents>
|
|
50 <has_text text="28837706" />
|
|
51 <not_has_text text="NT_166464" />
|
|
52 </assert_contents>
|
|
53 </output>
|
|
54 </test>
|
|
55
|
|
56 <test>
|
|
57 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
58 <param name="expr" value="(CHROM = '19')"/>
|
|
59 <output name="output">
|
|
60 <assert_contents>
|
|
61 <has_text text="3205820" />
|
|
62 <not_has_text text="NT_16" />
|
|
63 </assert_contents>
|
|
64 </output>
|
|
65 </test>
|
|
66
|
|
67 <test>
|
|
68 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
69 <param name="expr" value="(POS >= 20175) & (POS <= 35549)"/>
|
|
70 <output name="output">
|
|
71 <assert_contents>
|
|
72 <has_text text="20175" />
|
|
73 <has_text text="35549" />
|
|
74 <has_text text="22256" />
|
|
75 <not_has_text text="18933" />
|
|
76 <not_has_text text="37567" />
|
|
77 </assert_contents>
|
|
78 </output>
|
|
79 </test>
|
|
80
|
|
81 <test>
|
|
82 <param name="input" ftype="vcf" value="test01.vcf"/>
|
|
83 <param name="expr" value="( DP >= 5 )"/>
|
|
84 <output name="output">
|
|
85 <assert_contents>
|
|
86 <has_text text="DP=5;" />
|
|
87 <has_text text="DP=6;" />
|
|
88 <not_has_text text="DP=1;" />
|
|
89 </assert_contents>
|
|
90 </output>
|
|
91 </test>
|
|
92
|
|
93 </tests>
|
|
94
|
|
95 <help>
|
|
96
|
|
97 **SnpSift filter**
|
|
98
|
|
99 You can filter ia vcf file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility.
|
|
100
|
|
101 Some examples:
|
|
102
|
|
103 - *I want to filter out samples with quality less than 30*:
|
|
104
|
|
105 * **( QUAL > 30 )**
|
|
106
|
|
107 - *...but we also want InDels that have quality 20 or more*:
|
|
108
|
|
109 * **(( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )**
|
|
110
|
|
111 - *...or any homozygous variant present in more than 3 samples*:
|
|
112
|
|
113 * **(countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )**
|
|
114
|
|
115 - *...or any heterozygous sample with coverage 25 or more*:
|
|
116
|
|
117 * **((countHet() > 0) & (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )**
|
|
118
|
|
119 - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*:
|
|
120
|
|
121 * **isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] )**
|
|
122
|
|
123
|
|
124 For complete details about this tool and epressions that can be used, please go to http://snpeff.sourceforge.net/SnpSift.html#filter
|
|
125
|
|
126 </help>
|
|
127 </tool>
|