comparison bcftools_view.xml @ 0:667b2d503ba3 draft default tip

Uploaded
author takadonet
date Wed, 08 Apr 2015 12:09:16 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:667b2d503ba3
1 <tool id="bcftools_view" name="bcftools_view" version="0.1.0">
2 <requirements>
3 <requirement type="package" version="1.0">bcftools</requirement>
4 <requirement type="package" version="1.0">tabix</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" />
8 </stdio>
9 <command>
10
11 #if str($input.ext) == 'vcf':
12 cp $input input.vcf &amp;&amp; bgzip input.vcf &amp;&amp;
13 #set $input="input.vcf.gz"
14 #end if
15
16 bcftools index $input &amp;&amp;
17 bcftools view
18
19 #if str($output_format) == 'vcf_uncompressed':
20 -O v
21 #elif str($output_format) =='vcf_compressed':
22 -O z
23 #elif str($output_format) =='bcf_uncompressed':
24 -O u
25 #elif str($output_format) =='bcf_compressed':
26 -O b
27 #end if
28
29 #if str($header_option) == 'header_only':
30 --header-only
31 #elif str($header_option) == 'no_header':
32 --no-header
33 #end if
34
35 -o $output
36 $input
37
38 #if str($region):
39 -r $region
40 #end if
41
42
43 #if str($trim_alt_alleles) == "True" then "-a" else "" #
44
45 #if str($sites_no_genotype) == "True":
46 "-u"
47 elif str($sites_no_genotype) == "False":
48 "-U"
49 #end if
50
51 #if $min_nref:
52 --min-ac "$min_nref"
53 #end if
54
55 #if $max_nref:
56 --max-ac "$max_nref"
57 #end if
58
59 #if $samples:
60 -s "$samples"
61 #end if
62
63 #if $include_types:
64 -v "$include_types"
65 #end if
66
67 #if $filters:
68 --apply-filters "$filters"
69 #end if
70
71
72 #if $select_sites:
73 #set $list = str($select_sites).split(',')
74 #for $i, $s in enumerate( $list )
75 #if str($s) == "known":
76 -k
77 #elif str($s) == "novel":
78 -n
79 #end if
80 #end for
81 #end if
82
83 #if $private:
84 #set $list = str($private).split(',')
85 #for $i, $s in enumerate( $list )
86 #if str($s) == "private":
87 -x
88 #elif str($s) == "exclude":
89 -X
90 #end if
91 #end for
92 #end if
93
94
95
96 </command>
97 <inputs>
98 <param format="bcf,vcf" label="VCF/BCF file(s) to merged" name="input" optional="false" type="data" />
99 <param label="Choose the output format" name="output_format" type="select">
100 <option selected="true" value="vcf_uncompressed">UnCompressedVCF</option>
101 <option value="vcf_compressed">Compressed VCF</option>
102 <option value="bcf_uncompressed">UnCompressed BCF</option>
103 <option value="bcf_compressed">Compressed BCF</option>
104 </param>
105 <param label="Choose the output everything, only header or no header" name="header_option" type="select">
106 <option selected="true" value="all">Print All</option>
107 <option value="header_only">Header only</option>
108 <option value="no_header">No Header</option>
109 </param>
110 <param help="Accept following format: CHROM:START-END" label="Region to view" name="region" size="30" type="text" value="" />
111 <param help="trim alternate alleles not seen in the subset" label="Trim alternate alleles" name="trim_alt_alleles" optional="true" type="select">
112 <option value="False">False</option>
113 <option value="True">True</option>
114 </param>
115 <param help="select/exclude sites without a called genotype" label="Sites without a called genotype" name="sites_no_genotype" optional="true" type="select">
116 <option value="off">Turn off completely</option>
117 <option value="False">False</option>
118 <option value="True">True</option>
119 </param>
120 <param help="minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Minimum count for non reference" name="min_nref" optional="true" type="integer" value="" />
121 <param help="Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Maximum count for non reference" name="max_nref" optional="true" type="integer" value="" />
122 <param help="file of samples to include (or exclude with &quot;^&quot; prefix)" label="Samples to include or exclude" name="samples" optional="true" type="text" value="" />
123 <param help="select comma-separated list of variant types: snps,indels,mnps,other" label="Select variant types" multiple="true" name="include_types" optional="true" type="select">
124 <option value="snps">snps</option>
125 <option value="indels">indels</option>
126 <option value="mnps">mnps</option>
127 <option value="other">other</option>
128 </param>
129 <param help="Listed FILTER strings (e.g. &quot;PASS, . &quot;)" label="FILTER strings" name="filters" optional="true" type="text" value="" />
130 <param help="select known/novel sites only (ID is not/is '.')" label="Select known/novel sites" multiple="true" name="select_sites" optional="true" type="select">
131 <option value="known">Known</option>
132 <option value="novel">novel</option>
133 </param>
134 <param label="select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples" multiple="true" name="private" optional="true" type="select">
135 <option value="private">private</option>
136 <option value="exclude">Exclude private</option>
137 </param>
138 </inputs>
139 <outputs>
140 <data format="bcf" name="output">
141 <change_format>
142 <when format="vcf" input="output_format" value="vcf_uncompressed" />
143 <when format="vcf_bgzip" input="output_format" value="vcf_compressed" />
144 <when format="bcf_bgzip" input="output_format" value="bcf_compressed" />
145 </change_format>
146 </data>
147 </outputs>
148 <tests>
149 <test>
150 <param name="input" value="input1.bcf.gz" />
151 <param name="output_format" value="vcf_uncompressed" />
152 <output file="result1.vcf" ftype="vcf" lines_diff="2" name="output" />
153 </test>
154 <test>
155 <param name="input" value="result1.vcf" />
156 <param name="output_format" value="bcf_compressed" />
157 <output compare="sim_size" delta="100" file="result3.bcf.gz" ftype="bcf_bgzip" name="output" />
158 </test>
159 <test>
160 <param name="input" value="input1.bcf.gz" />
161 <param name="header_option" value="no_header" />
162 <param name="output_format" value="vcf_compressed" />
163 <output file="result2.vcf.gz" ftype="vcf_bgzip" lines_diff="2" name="output" />
164 </test>
165 <test>
166 <param name="input" value="merge.a.bcf" />
167 <param name="header_option" value="no_header" />
168 <param name="output_format" value="vcf_uncompressed" />
169 <param name="region" value="2:3199812-3199812" />
170 <output file="tabix.2.3199812.out" ftype="vcf" name="output" />
171 </test>
172 <test>
173 <param name="input" value="merge.a.bcf" />
174 <param name="header_option" value="no_header" />
175 <param name="output_format" value="vcf_uncompressed" />
176 <param name="region" value="1:3000151-3000151" />
177 <output file="tabix.1.3000151.out" ftype="vcf" name="output" />
178 </test>
179 <test>
180 <param name="input" value="large_chrom_tbi_limit.bcf" />
181 <param name="header_option" value="no_header" />
182 <param name="output_format" value="vcf_uncompressed" />
183 <param name="region" value="chr11:1-536870912" />
184 <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
185 </test>
186 <test>
187 <param name="input" value="large_chrom_csi_limit.vcf.gz" />
188 <param name="header_option" value="no_header" />
189 <param name="output_format" value="vcf_uncompressed" />
190 <param name="region" value="chr20:1-2147483647" />
191 <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
192 </test>
193 <test>
194 <param name="input" value="large_chrom_tbi_limit.vcf.gz" />
195 <param name="header_option" value="no_header" />
196 <param name="output_format" value="vcf_uncompressed" />
197 <param name="region" value="chr11:1-536870912" />
198 <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
199 </test>
200 <test>
201 <param name="input" value="large_chrom_csi_limit.bcf" />
202 <param name="header_option" value="no_header" />
203 <param name="output_format" value="vcf_uncompressed" />
204 <param name="region" value="chr20:1-2147483647" />
205 <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
206 </test>
207 <test>
208 <param name="input" value="large_chrom_csi_limit.bcf" />
209 <param name="header_option" value="no_header" />
210 <param name="output_format" value="vcf_uncompressed" />
211 <param name="region" value="chr20" />
212 <output file="large_chrom.20.1.2147483647.out" ftype="vcf" name="output" />
213 </test>
214 <test>
215 <param name="input" value="view.vcf.gz" />
216 <param name="trim_alt_alleles" value="True" />
217 <param name="sites_no_genotype" value="False" />
218 <param name="samples" value="NA00002" />
219 <param name="min_nref" value="1" />
220 <param name="max_nref" value="1" />
221 <param name="include_types" value="snps" />
222 <param name="output_format" value="vcf_uncompressed" />
223 <output file="view.1.out" ftype="vcf" lines_diff="2" name="output" />
224 </test>
225 <test>
226 <param name="input" value="view.vcf.gz" />
227 <param name="filters" value="PASS" />
228 <param name="select_sites" value="known" />
229 <param name="private" value="exclude" />
230 <param name="samples" value="NA00003" />
231 <param name="region" value="20" />
232 <param name="output_format" value="vcf_uncompressed" />
233 <output file="view.2.out" ftype="vcf" lines_diff="2" name="output" />
234 </test>
235 <test>
236 <param name="input" value="view.vcf.gz" />
237 <param name="private" value="private" />
238 <param name="samples" value="NA00003" />
239 <param name="output_format" value="vcf_uncompressed" />
240 <output file="view.3.out" ftype="vcf" lines_diff="2" name="output" />
241 </test>
242 </tests>
243 <help>
244
245 About: VCF/BCF conversion, view, subset and filter VCF/BCF files.
246
247 Usage: bcftools view [options] &lt;in.vcf.gz&gt;; [region1 [...]]
248
249 **Output options:**
250
251 -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set)
252
253 -h/H, --header-only/--no-header print the header only/suppress the header in VCF output
254
255 -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [-1]
256
257 -o, --output-file &lt;file&gt;; output file name [stdout]
258
259 -O, --output-type &lt;b|u|z|v&gt;; b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]
260
261 -r, --regions &lt;region&gt;; restrict to comma-separated list of regions
262
263 -R, --regions-file &lt;file&gt;; restrict to regions listed in a file
264
265 -t, --targets [^]&lt;region&gt;; similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix
266
267 -T, --targets-file [^]&lt;file&gt;; similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix
268
269
270 **Subset options:**
271
272
273 -a, --trim-alt-alleles trim alternate alleles not seen in the subset
274
275 -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)
276
277 -s, --samples [^]&lt;list&gt;; comma separated list of samples to include (or exclude with "^" prefix)
278
279 --force-samples only warn about unknown subset samples
280 -S, --samples-file [^]&lt;file&gt;; file of samples to include (or exclude with "^" prefix)
281
282
283
284
285 **Filter options:**
286
287 -c/C, --min-ac/--max-ac &lt;int&gt;;[:&lt;type&gt;;] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
288
289 -f, --apply-filters &lt;list&gt;; require at least one of the listed FILTER strings (e.g. "PASS,.")
290
291 -g, --genotype [^]&lt;hom|het|miss&gt;; require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes
292
293 -i/e, --include/--exclude &lt;expr&gt;; select/exclude sites for which the expression is true (see man page for details)
294
295 -k/n, --known/--novel select known/novel sites only (ID is not/is '.')
296
297 -m/M, --min-alleles/--max-alleles &lt;int&gt;; minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)
298
299 -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased
300
301 -q/Q, --min-af/--max-af &lt;float&gt;;[:&lt;type&gt;;] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
302
303 -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype
304
305 -v/V, --types/--exclude-types &lt;list&gt;; select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]
306
307 -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples
308
309 </help>
310 <citations>
311 <citation type="doi">10.1093/bioinformatics/btp352</citation>
312 <citation type="doi">10.1093/bioinformatics/btr509</citation>
313 <citation type="doi">10.1093/bioinformatics/btr076</citation>
314 </citations>
315 </tool>