|
0
|
1 <tool id="bcftools_view" name="bcftools_view" version="0.1.0">
|
|
|
2 <requirements>
|
|
|
3 <requirement type="package" version="1.0">bcftools</requirement>
|
|
|
4 <requirement type="package" version="1.0">tabix</requirement>
|
|
|
5 </requirements>
|
|
|
6 <stdio>
|
|
|
7 <exit_code range="1:" />
|
|
|
8 </stdio>
|
|
|
9 <command>
|
|
|
10
|
|
|
11 #if str($input.ext) == 'vcf':
|
|
|
12 cp $input input.vcf && bgzip input.vcf &&
|
|
|
13 #set $input="input.vcf.gz"
|
|
|
14 #end if
|
|
|
15
|
|
|
16 bcftools index $input &&
|
|
|
17 bcftools view
|
|
|
18
|
|
|
19 #if str($output_format) == 'vcf_uncompressed':
|
|
|
20 -O v
|
|
|
21 #elif str($output_format) =='vcf_compressed':
|
|
|
22 -O z
|
|
|
23 #elif str($output_format) =='bcf_uncompressed':
|
|
|
24 -O u
|
|
|
25 #elif str($output_format) =='bcf_compressed':
|
|
|
26 -O b
|
|
|
27 #end if
|
|
|
28
|
|
|
29 #if str($header_option) == 'header_only':
|
|
|
30 --header-only
|
|
|
31 #elif str($header_option) == 'no_header':
|
|
|
32 --no-header
|
|
|
33 #end if
|
|
|
34
|
|
|
35 -o $output
|
|
|
36 $input
|
|
|
37
|
|
|
38 #if str($region):
|
|
|
39 -r $region
|
|
|
40 #end if
|
|
|
41
|
|
|
42
|
|
|
43 #if str($trim_alt_alleles) == "True" then "-a" else "" #
|
|
|
44
|
|
|
45 #if str($sites_no_genotype) == "True":
|
|
|
46 "-u"
|
|
|
47 elif str($sites_no_genotype) == "False":
|
|
|
48 "-U"
|
|
|
49 #end if
|
|
|
50
|
|
|
51 #if $min_nref:
|
|
|
52 --min-ac "$min_nref"
|
|
|
53 #end if
|
|
|
54
|
|
|
55 #if $max_nref:
|
|
|
56 --max-ac "$max_nref"
|
|
|
57 #end if
|
|
|
58
|
|
|
59 #if $samples:
|
|
|
60 -s "$samples"
|
|
|
61 #end if
|
|
|
62
|
|
|
63 #if $include_types:
|
|
|
64 -v "$include_types"
|
|
|
65 #end if
|
|
|
66
|
|
|
67 #if $filters:
|
|
|
68 --apply-filters "$filters"
|
|
|
69 #end if
|
|
|
70
|
|
|
71
|
|
|
72 #if $select_sites:
|
|
|
73 #set $list = str($select_sites).split(',')
|
|
|
74 #for $i, $s in enumerate( $list )
|
|
|
75 #if str($s) == "known":
|
|
|
76 -k
|
|
|
77 #elif str($s) == "novel":
|
|
|
78 -n
|
|
|
79 #end if
|
|
|
80 #end for
|
|
|
81 #end if
|
|
|
82
|
|
|
83 #if $private:
|
|
|
84 #set $list = str($private).split(',')
|
|
|
85 #for $i, $s in enumerate( $list )
|
|
|
86 #if str($s) == "private":
|
|
|
87 -x
|
|
|
88 #elif str($s) == "exclude":
|
|
|
89 -X
|
|
|
90 #end if
|
|
|
91 #end for
|
|
|
92 #end if
|
|
|
93
|
|
|
94
|
|
|
95
|
|
|
96 </command>
|
|
|
97 <inputs>
|
|
|
98 <param format="bcf,vcf" label="VCF/BCF file(s) to merged" name="input" optional="false" type="data" />
|
|
|
99 <param label="Choose the output format" name="output_format" type="select">
|
|
|
100 <option selected="true" value="vcf_uncompressed">UnCompressedVCF</option>
|
|
|
101 <option value="vcf_compressed">Compressed VCF</option>
|
|
|
102 <option value="bcf_uncompressed">UnCompressed BCF</option>
|
|
|
103 <option value="bcf_compressed">Compressed BCF</option>
|
|
|
104 </param>
|
|
|
105 <param label="Choose the output everything, only header or no header" name="header_option" type="select">
|
|
|
106 <option selected="true" value="all">Print All</option>
|
|
|
107 <option value="header_only">Header only</option>
|
|
|
108 <option value="no_header">No Header</option>
|
|
|
109 </param>
|
|
|
110 <param help="Accept following format: CHROM:START-END" label="Region to view" name="region" size="30" type="text" value="" />
|
|
|
111 <param help="trim alternate alleles not seen in the subset" label="Trim alternate alleles" name="trim_alt_alleles" optional="true" type="select">
|
|
|
112 <option value="False">False</option>
|
|
|
113 <option value="True">True</option>
|
|
|
114 </param>
|
|
|
115 <param help="select/exclude sites without a called genotype" label="Sites without a called genotype" name="sites_no_genotype" optional="true" type="select">
|
|
|
116 <option value="off">Turn off completely</option>
|
|
|
117 <option value="False">False</option>
|
|
|
118 <option value="True">True</option>
|
|
|
119 </param>
|
|
|
120 <param help="minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Minimum count for non reference" name="min_nref" optional="true" type="integer" value="" />
|
|
|
121 <param help="Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Maximum count for non reference" name="max_nref" optional="true" type="integer" value="" />
|
|
|
122 <param help="file of samples to include (or exclude with "^" prefix)" label="Samples to include or exclude" name="samples" optional="true" type="text" value="" />
|
|
|
123 <param help="select comma-separated list of variant types: snps,indels,mnps,other" label="Select variant types" multiple="true" name="include_types" optional="true" type="select">
|
|
|
124 <option value="snps">snps</option>
|
|
|
125 <option value="indels">indels</option>
|
|
|
126 <option value="mnps">mnps</option>
|
|
|
127 <option value="other">other</option>
|
|
|
128 </param>
|
|
|
129 <param help="Listed FILTER strings (e.g. "PASS, . ")" label="FILTER strings" name="filters" optional="true" type="text" value="" />
|
|
|
130 <param help="select known/novel sites only (ID is not/is '.')" label="Select known/novel sites" multiple="true" name="select_sites" optional="true" type="select">
|
|
|
131 <option value="known">Known</option>
|
|
|
132 <option value="novel">novel</option>
|
|
|
133 </param>
|
|
|
134 <param label="select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples" multiple="true" name="private" optional="true" type="select">
|
|
|
135 <option value="private">private</option>
|
|
|
136 <option value="exclude">Exclude private</option>
|
|
|
137 </param>
|
|
|
138 </inputs>
|
|
|
139 <outputs>
|
|
|
140 <data format="bcf" name="output">
|
|
|
141 <change_format>
|
|
|
142 <when format="vcf" input="output_format" value="vcf_uncompressed" />
|
|
|
143 <when format="vcf_bgzip" input="output_format" value="vcf_compressed" />
|
|
|
144 <when format="bcf_bgzip" input="output_format" value="bcf_compressed" />
|
|
|
145 </change_format>
|
|
|
146 </data>
|
|
|
147 </outputs>
|
|
|
148 <tests>
|
|
|
149 <test>
|
|
|
150 <param name="input" value="input1.bcf.gz" />
|
|
|
151 <param name="output_format" value="vcf_uncompressed" />
|
|
|
152 <output file="result1.vcf" ftype="vcf" lines_diff="2" name="output" />
|
|
|
153 </test>
|
|
|
154 <test>
|
|
|
155 <param name="input" value="result1.vcf" />
|
|
|
156 <param name="output_format" value="bcf_compressed" />
|
|
|
157 <output compare="sim_size" delta="100" file="result3.bcf.gz" ftype="bcf_bgzip" name="output" />
|
|
|
158 </test>
|
|
|
159 <test>
|
|
|
160 <param name="input" value="input1.bcf.gz" />
|
|
|
161 <param name="header_option" value="no_header" />
|
|
|
162 <param name="output_format" value="vcf_compressed" />
|
|
|
163 <output file="result2.vcf.gz" ftype="vcf_bgzip" lines_diff="2" name="output" />
|
|
|
164 </test>
|
|
|
165 <test>
|
|
|
166 <param name="input" value="merge.a.bcf" />
|
|
|
167 <param name="header_option" value="no_header" />
|
|
|
168 <param name="output_format" value="vcf_uncompressed" />
|
|
|
169 <param name="region" value="2:3199812-3199812" />
|
|
|
170 <output file="tabix.2.3199812.out" ftype="vcf" name="output" />
|
|
|
171 </test>
|
|
|
172 <test>
|
|
|
173 <param name="input" value="merge.a.bcf" />
|
|
|
174 <param name="header_option" value="no_header" />
|
|
|
175 <param name="output_format" value="vcf_uncompressed" />
|
|
|
176 <param name="region" value="1:3000151-3000151" />
|
|
|
177 <output file="tabix.1.3000151.out" ftype="vcf" name="output" />
|
|
|
178 </test>
|
|
|
179 <test>
|
|
|
180 <param name="input" value="large_chrom_tbi_limit.bcf" />
|
|
|
181 <param name="header_option" value="no_header" />
|
|
|
182 <param name="output_format" value="vcf_uncompressed" />
|
|
|
183 <param name="region" value="chr11:1-536870912" />
|
|
|
184 <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
|
|
|
185 </test>
|
|
|
186 <test>
|
|
|
187 <param name="input" value="large_chrom_csi_limit.vcf.gz" />
|
|
|
188 <param name="header_option" value="no_header" />
|
|
|
189 <param name="output_format" value="vcf_uncompressed" />
|
|
|
190 <param name="region" value="chr20:1-2147483647" />
|
|
|
191 <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
|
|
|
192 </test>
|
|
|
193 <test>
|
|
|
194 <param name="input" value="large_chrom_tbi_limit.vcf.gz" />
|
|
|
195 <param name="header_option" value="no_header" />
|
|
|
196 <param name="output_format" value="vcf_uncompressed" />
|
|
|
197 <param name="region" value="chr11:1-536870912" />
|
|
|
198 <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
|
|
|
199 </test>
|
|
|
200 <test>
|
|
|
201 <param name="input" value="large_chrom_csi_limit.bcf" />
|
|
|
202 <param name="header_option" value="no_header" />
|
|
|
203 <param name="output_format" value="vcf_uncompressed" />
|
|
|
204 <param name="region" value="chr20:1-2147483647" />
|
|
|
205 <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
|
|
|
206 </test>
|
|
|
207 <test>
|
|
|
208 <param name="input" value="large_chrom_csi_limit.bcf" />
|
|
|
209 <param name="header_option" value="no_header" />
|
|
|
210 <param name="output_format" value="vcf_uncompressed" />
|
|
|
211 <param name="region" value="chr20" />
|
|
|
212 <output file="large_chrom.20.1.2147483647.out" ftype="vcf" name="output" />
|
|
|
213 </test>
|
|
|
214 <test>
|
|
|
215 <param name="input" value="view.vcf.gz" />
|
|
|
216 <param name="trim_alt_alleles" value="True" />
|
|
|
217 <param name="sites_no_genotype" value="False" />
|
|
|
218 <param name="samples" value="NA00002" />
|
|
|
219 <param name="min_nref" value="1" />
|
|
|
220 <param name="max_nref" value="1" />
|
|
|
221 <param name="include_types" value="snps" />
|
|
|
222 <param name="output_format" value="vcf_uncompressed" />
|
|
|
223 <output file="view.1.out" ftype="vcf" lines_diff="2" name="output" />
|
|
|
224 </test>
|
|
|
225 <test>
|
|
|
226 <param name="input" value="view.vcf.gz" />
|
|
|
227 <param name="filters" value="PASS" />
|
|
|
228 <param name="select_sites" value="known" />
|
|
|
229 <param name="private" value="exclude" />
|
|
|
230 <param name="samples" value="NA00003" />
|
|
|
231 <param name="region" value="20" />
|
|
|
232 <param name="output_format" value="vcf_uncompressed" />
|
|
|
233 <output file="view.2.out" ftype="vcf" lines_diff="2" name="output" />
|
|
|
234 </test>
|
|
|
235 <test>
|
|
|
236 <param name="input" value="view.vcf.gz" />
|
|
|
237 <param name="private" value="private" />
|
|
|
238 <param name="samples" value="NA00003" />
|
|
|
239 <param name="output_format" value="vcf_uncompressed" />
|
|
|
240 <output file="view.3.out" ftype="vcf" lines_diff="2" name="output" />
|
|
|
241 </test>
|
|
|
242 </tests>
|
|
|
243 <help>
|
|
|
244
|
|
|
245 About: VCF/BCF conversion, view, subset and filter VCF/BCF files.
|
|
|
246
|
|
|
247 Usage: bcftools view [options] <in.vcf.gz>; [region1 [...]]
|
|
|
248
|
|
|
249 **Output options:**
|
|
|
250
|
|
|
251 -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set)
|
|
|
252
|
|
|
253 -h/H, --header-only/--no-header print the header only/suppress the header in VCF output
|
|
|
254
|
|
|
255 -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [-1]
|
|
|
256
|
|
|
257 -o, --output-file <file>; output file name [stdout]
|
|
|
258
|
|
|
259 -O, --output-type <b|u|z|v>; b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]
|
|
|
260
|
|
|
261 -r, --regions <region>; restrict to comma-separated list of regions
|
|
|
262
|
|
|
263 -R, --regions-file <file>; restrict to regions listed in a file
|
|
|
264
|
|
|
265 -t, --targets [^]<region>; similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix
|
|
|
266
|
|
|
267 -T, --targets-file [^]<file>; similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix
|
|
|
268
|
|
|
269
|
|
|
270 **Subset options:**
|
|
|
271
|
|
|
272
|
|
|
273 -a, --trim-alt-alleles trim alternate alleles not seen in the subset
|
|
|
274
|
|
|
275 -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)
|
|
|
276
|
|
|
277 -s, --samples [^]<list>; comma separated list of samples to include (or exclude with "^" prefix)
|
|
|
278
|
|
|
279 --force-samples only warn about unknown subset samples
|
|
|
280 -S, --samples-file [^]<file>; file of samples to include (or exclude with "^" prefix)
|
|
|
281
|
|
|
282
|
|
|
283
|
|
|
284
|
|
|
285 **Filter options:**
|
|
|
286
|
|
|
287 -c/C, --min-ac/--max-ac <int>;[:<type>;] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
|
|
|
288
|
|
|
289 -f, --apply-filters <list>; require at least one of the listed FILTER strings (e.g. "PASS,.")
|
|
|
290
|
|
|
291 -g, --genotype [^]<hom|het|miss>; require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes
|
|
|
292
|
|
|
293 -i/e, --include/--exclude <expr>; select/exclude sites for which the expression is true (see man page for details)
|
|
|
294
|
|
|
295 -k/n, --known/--novel select known/novel sites only (ID is not/is '.')
|
|
|
296
|
|
|
297 -m/M, --min-alleles/--max-alleles <int>; minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)
|
|
|
298
|
|
|
299 -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased
|
|
|
300
|
|
|
301 -q/Q, --min-af/--max-af <float>;[:<type>;] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
|
|
|
302
|
|
|
303 -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype
|
|
|
304
|
|
|
305 -v/V, --types/--exclude-types <list>; select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]
|
|
|
306
|
|
|
307 -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples
|
|
|
308
|
|
|
309 </help>
|
|
|
310 <citations>
|
|
|
311 <citation type="doi">10.1093/bioinformatics/btp352</citation>
|
|
|
312 <citation type="doi">10.1093/bioinformatics/btr509</citation>
|
|
|
313 <citation type="doi">10.1093/bioinformatics/btr076</citation>
|
|
|
314 </citations>
|
|
|
315 </tool>
|