Mercurial > repos > iuc > snpsift
comparison snpSift_filter.xml @ 0:f0faaa4d6ee5 draft
Uploaded
| author | iuc |
|---|---|
| date | Mon, 06 Apr 2015 15:47:15 -0400 |
| parents | |
| children | b884686a80dc |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f0faaa4d6ee5 |
|---|---|
| 1 <tool id="snpSift_filter" name="SnpSift Filter" version="4.0.0"> | |
| 2 <options sanitize="False" /> | |
| 3 <description>Filter variants using arbitrary expressions</description> | |
| 4 <expand macro="requirements" /> | |
| 5 <macros> | |
| 6 <import>snpSift_macros.xml</import> | |
| 7 </macros> | |
| 8 <command> | |
| 9 java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse | |
| 10 #if $filtering.mode == 'field': | |
| 11 #if $filtering.replace.pass: | |
| 12 --pass | |
| 13 #if $filtering.replace.filterId and len($filtering.replace.filterId.__str__.strip()) > 0: | |
| 14 --filterId "$filtering.replace.filterId" | |
| 15 #end if | |
| 16 #end if | |
| 17 #if $filtering.addFilter and len($filtering.addFilter.__str__.strip()) > 0: | |
| 18 --addFilter "$filtering.addFilter" | |
| 19 #end if | |
| 20 #if $filtering.rmFilter and len($filtering.rmFilter.__str__.strip()) > 0: | |
| 21 --rmFilter "$filtering.rmFilter" | |
| 22 #end if | |
| 23 #end if | |
| 24 > $output | |
| 25 </command> | |
| 26 <inputs> | |
| 27 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> | |
| 28 <param name="expr" type="text" label="Filter criteria" size="160" help="Need help? See below a few examples." /> | |
| 29 <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" /> | |
| 30 <conditional name="filtering"> | |
| 31 <param name="mode" type="select" label="Filter mode"> | |
| 32 <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option> | |
| 33 <option value="field">Change the FILTER field, but retain all entries</option> | |
| 34 </param> | |
| 35 <when value="entries"/> | |
| 36 <when value="field"> | |
| 37 <conditional name="replace"> | |
| 38 <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'" | |
| 39 help="appends an ID tag to non-matching entry FILTER "/> | |
| 40 <when value="no"/> | |
| 41 <when value="yes"> | |
| 42 <param name="filterId" type="text" value="" optional="true" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." size="10" | |
| 43 help="Default ID is 'SnpSift'"/> | |
| 44 </when> | |
| 45 </conditional> | |
| 46 <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/> | |
| 47 <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/> | |
| 48 </when> | |
| 49 </conditional> | |
| 50 </inputs> | |
| 51 <configfiles> | |
| 52 <configfile name="exprFile"> | |
| 53 $expr#slurp | |
| 54 </configfile> | |
| 55 </configfiles> | |
| 56 | |
| 57 <outputs> | |
| 58 <data format="vcf" name="output" /> | |
| 59 </outputs> | |
| 60 <expand macro="stdio" /> | |
| 61 <tests> | |
| 62 <test> | |
| 63 <param name="input" ftype="vcf" value="test01.vcf"/> | |
| 64 <param name="expr" value="QUAL >= 50"/> | |
| 65 <param name="mode" value="entries"/> | |
| 66 <output name="output"> | |
| 67 <assert_contents> | |
| 68 <has_text text="28837706" /> | |
| 69 <not_has_text text="NT_166464" /> | |
| 70 </assert_contents> | |
| 71 </output> | |
| 72 </test> | |
| 73 | |
| 74 <test> | |
| 75 <param name="input" ftype="vcf" value="test01.vcf"/> | |
| 76 <param name="expr" value="(CHROM = '19')"/> | |
| 77 <param name="mode" value="entries"/> | |
| 78 <output name="output"> | |
| 79 <assert_contents> | |
| 80 <has_text text="3205820" /> | |
| 81 <not_has_text text="NT_16" /> | |
| 82 </assert_contents> | |
| 83 </output> | |
| 84 </test> | |
| 85 | |
| 86 <test> | |
| 87 <param name="input" ftype="vcf" value="test01.vcf"/> | |
| 88 <param name="expr" value="(POS >= 20175) & (POS <= 35549)"/> | |
| 89 <param name="mode" value="entries"/> | |
| 90 <output name="output"> | |
| 91 <assert_contents> | |
| 92 <has_text text="20175" /> | |
| 93 <has_text text="35549" /> | |
| 94 <has_text text="22256" /> | |
| 95 <not_has_text text="18933" /> | |
| 96 <not_has_text text="37567" /> | |
| 97 </assert_contents> | |
| 98 </output> | |
| 99 </test> | |
| 100 | |
| 101 <test> | |
| 102 <param name="input" ftype="vcf" value="test01.vcf"/> | |
| 103 <param name="expr" value="( DP >= 5 )"/> | |
| 104 <param name="mode" value="entries"/> | |
| 105 <output name="output"> | |
| 106 <assert_contents> | |
| 107 <has_text text="DP=5;" /> | |
| 108 <has_text text="DP=6;" /> | |
| 109 <not_has_text text="DP=1;" /> | |
| 110 </assert_contents> | |
| 111 </output> | |
| 112 </test> | |
| 113 </tests> | |
| 114 <help> | |
| 115 | |
| 116 **SnpSift filter** | |
| 117 | |
| 118 You can filter a VCF file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility. | |
| 119 | |
| 120 Some examples: | |
| 121 | |
| 122 - *I want just the variants from the second million bases of chr1*: | |
| 123 | |
| 124 :: | |
| 125 | |
| 126 ( CHROM = 'chr1' ) & ( POS > 1000000 ) & ( POS < 2000000 ) | |
| 127 | |
| 128 - *Filter value is either 'PASS' or it is missing*: | |
| 129 | |
| 130 :: | |
| 131 | |
| 132 (FILTER = 'PASS') | ( na FILTER ) | |
| 133 | |
| 134 - *I want to filter lines with an EFF of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*: | |
| 135 | |
| 136 :: | |
| 137 | |
| 138 ( EFF[*].EFFECT = 'frameshift_variant' ) | |
| 139 | |
| 140 - *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*: | |
| 141 | |
| 142 :: | |
| 143 | |
| 144 ( EFF[*].EFFECT = 'FRAME_SHIFT' ) | |
| 145 | |
| 146 - *I want to filter out samples with quality less than 30*: | |
| 147 | |
| 148 :: | |
| 149 | |
| 150 ( QUAL > 30 ) | |
| 151 | |
| 152 - *...but we also want InDels that have quality 20 or more*: | |
| 153 | |
| 154 :: | |
| 155 | |
| 156 (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) | |
| 157 | |
| 158 - *...or any homozygous variant present in more than 3 samples*: | |
| 159 | |
| 160 :: | |
| 161 | |
| 162 (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) | |
| 163 | |
| 164 - *...or any heterozygous sample with coverage 25 or more*: | |
| 165 | |
| 166 :: | |
| 167 | |
| 168 ((countHet() > 0) & (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) | |
| 169 | |
| 170 - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*: | |
| 171 | |
| 172 :: | |
| 173 | |
| 174 (isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] )) | |
| 175 | |
| 176 | |
| 177 **For information regarding HGVS and Sequence Ontology terms versus classic names**: | |
| 178 | |
| 179 - http://snpeff.sourceforge.net/SnpEff_manual.html#cmdline for the options: -classic, -hgvs, and -sequenceOntology | |
| 180 - http://snpeff.sourceforge.net/SnpEff_manual.html#input for the table containing the classic name and sequence onology term for each effect | |
| 181 | |
| 182 | |
| 183 @EXTERNAL_DOCUMENTATION@ | |
| 184 http://snpeff.sourceforge.net/SnpSift.html#filter | |
| 185 | |
| 186 @CITATION_SECTION@ | |
| 187 | |
| 188 </help> | |
| 189 </tool> |
