Mercurial > repos > takadonet > bcftools_view
comparison bcftools_view.xml @ 0:667b2d503ba3 draft default tip
Uploaded
| author | takadonet |
|---|---|
| date | Wed, 08 Apr 2015 12:09:16 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:667b2d503ba3 |
|---|---|
| 1 <tool id="bcftools_view" name="bcftools_view" version="0.1.0"> | |
| 2 <requirements> | |
| 3 <requirement type="package" version="1.0">bcftools</requirement> | |
| 4 <requirement type="package" version="1.0">tabix</requirement> | |
| 5 </requirements> | |
| 6 <stdio> | |
| 7 <exit_code range="1:" /> | |
| 8 </stdio> | |
| 9 <command> | |
| 10 | |
| 11 #if str($input.ext) == 'vcf': | |
| 12 cp $input input.vcf && bgzip input.vcf && | |
| 13 #set $input="input.vcf.gz" | |
| 14 #end if | |
| 15 | |
| 16 bcftools index $input && | |
| 17 bcftools view | |
| 18 | |
| 19 #if str($output_format) == 'vcf_uncompressed': | |
| 20 -O v | |
| 21 #elif str($output_format) =='vcf_compressed': | |
| 22 -O z | |
| 23 #elif str($output_format) =='bcf_uncompressed': | |
| 24 -O u | |
| 25 #elif str($output_format) =='bcf_compressed': | |
| 26 -O b | |
| 27 #end if | |
| 28 | |
| 29 #if str($header_option) == 'header_only': | |
| 30 --header-only | |
| 31 #elif str($header_option) == 'no_header': | |
| 32 --no-header | |
| 33 #end if | |
| 34 | |
| 35 -o $output | |
| 36 $input | |
| 37 | |
| 38 #if str($region): | |
| 39 -r $region | |
| 40 #end if | |
| 41 | |
| 42 | |
| 43 #if str($trim_alt_alleles) == "True" then "-a" else "" # | |
| 44 | |
| 45 #if str($sites_no_genotype) == "True": | |
| 46 "-u" | |
| 47 elif str($sites_no_genotype) == "False": | |
| 48 "-U" | |
| 49 #end if | |
| 50 | |
| 51 #if $min_nref: | |
| 52 --min-ac "$min_nref" | |
| 53 #end if | |
| 54 | |
| 55 #if $max_nref: | |
| 56 --max-ac "$max_nref" | |
| 57 #end if | |
| 58 | |
| 59 #if $samples: | |
| 60 -s "$samples" | |
| 61 #end if | |
| 62 | |
| 63 #if $include_types: | |
| 64 -v "$include_types" | |
| 65 #end if | |
| 66 | |
| 67 #if $filters: | |
| 68 --apply-filters "$filters" | |
| 69 #end if | |
| 70 | |
| 71 | |
| 72 #if $select_sites: | |
| 73 #set $list = str($select_sites).split(',') | |
| 74 #for $i, $s in enumerate( $list ) | |
| 75 #if str($s) == "known": | |
| 76 -k | |
| 77 #elif str($s) == "novel": | |
| 78 -n | |
| 79 #end if | |
| 80 #end for | |
| 81 #end if | |
| 82 | |
| 83 #if $private: | |
| 84 #set $list = str($private).split(',') | |
| 85 #for $i, $s in enumerate( $list ) | |
| 86 #if str($s) == "private": | |
| 87 -x | |
| 88 #elif str($s) == "exclude": | |
| 89 -X | |
| 90 #end if | |
| 91 #end for | |
| 92 #end if | |
| 93 | |
| 94 | |
| 95 | |
| 96 </command> | |
| 97 <inputs> | |
| 98 <param format="bcf,vcf" label="VCF/BCF file(s) to merged" name="input" optional="false" type="data" /> | |
| 99 <param label="Choose the output format" name="output_format" type="select"> | |
| 100 <option selected="true" value="vcf_uncompressed">UnCompressedVCF</option> | |
| 101 <option value="vcf_compressed">Compressed VCF</option> | |
| 102 <option value="bcf_uncompressed">UnCompressed BCF</option> | |
| 103 <option value="bcf_compressed">Compressed BCF</option> | |
| 104 </param> | |
| 105 <param label="Choose the output everything, only header or no header" name="header_option" type="select"> | |
| 106 <option selected="true" value="all">Print All</option> | |
| 107 <option value="header_only">Header only</option> | |
| 108 <option value="no_header">No Header</option> | |
| 109 </param> | |
| 110 <param help="Accept following format: CHROM:START-END" label="Region to view" name="region" size="30" type="text" value="" /> | |
| 111 <param help="trim alternate alleles not seen in the subset" label="Trim alternate alleles" name="trim_alt_alleles" optional="true" type="select"> | |
| 112 <option value="False">False</option> | |
| 113 <option value="True">True</option> | |
| 114 </param> | |
| 115 <param help="select/exclude sites without a called genotype" label="Sites without a called genotype" name="sites_no_genotype" optional="true" type="select"> | |
| 116 <option value="off">Turn off completely</option> | |
| 117 <option value="False">False</option> | |
| 118 <option value="True">True</option> | |
| 119 </param> | |
| 120 <param help="minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Minimum count for non reference" name="min_nref" optional="true" type="integer" value="" /> | |
| 121 <param help="Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Maximum count for non reference" name="max_nref" optional="true" type="integer" value="" /> | |
| 122 <param help="file of samples to include (or exclude with "^" prefix)" label="Samples to include or exclude" name="samples" optional="true" type="text" value="" /> | |
| 123 <param help="select comma-separated list of variant types: snps,indels,mnps,other" label="Select variant types" multiple="true" name="include_types" optional="true" type="select"> | |
| 124 <option value="snps">snps</option> | |
| 125 <option value="indels">indels</option> | |
| 126 <option value="mnps">mnps</option> | |
| 127 <option value="other">other</option> | |
| 128 </param> | |
| 129 <param help="Listed FILTER strings (e.g. "PASS, . ")" label="FILTER strings" name="filters" optional="true" type="text" value="" /> | |
| 130 <param help="select known/novel sites only (ID is not/is '.')" label="Select known/novel sites" multiple="true" name="select_sites" optional="true" type="select"> | |
| 131 <option value="known">Known</option> | |
| 132 <option value="novel">novel</option> | |
| 133 </param> | |
| 134 <param label="select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples" multiple="true" name="private" optional="true" type="select"> | |
| 135 <option value="private">private</option> | |
| 136 <option value="exclude">Exclude private</option> | |
| 137 </param> | |
| 138 </inputs> | |
| 139 <outputs> | |
| 140 <data format="bcf" name="output"> | |
| 141 <change_format> | |
| 142 <when format="vcf" input="output_format" value="vcf_uncompressed" /> | |
| 143 <when format="vcf_bgzip" input="output_format" value="vcf_compressed" /> | |
| 144 <when format="bcf_bgzip" input="output_format" value="bcf_compressed" /> | |
| 145 </change_format> | |
| 146 </data> | |
| 147 </outputs> | |
| 148 <tests> | |
| 149 <test> | |
| 150 <param name="input" value="input1.bcf.gz" /> | |
| 151 <param name="output_format" value="vcf_uncompressed" /> | |
| 152 <output file="result1.vcf" ftype="vcf" lines_diff="2" name="output" /> | |
| 153 </test> | |
| 154 <test> | |
| 155 <param name="input" value="result1.vcf" /> | |
| 156 <param name="output_format" value="bcf_compressed" /> | |
| 157 <output compare="sim_size" delta="100" file="result3.bcf.gz" ftype="bcf_bgzip" name="output" /> | |
| 158 </test> | |
| 159 <test> | |
| 160 <param name="input" value="input1.bcf.gz" /> | |
| 161 <param name="header_option" value="no_header" /> | |
| 162 <param name="output_format" value="vcf_compressed" /> | |
| 163 <output file="result2.vcf.gz" ftype="vcf_bgzip" lines_diff="2" name="output" /> | |
| 164 </test> | |
| 165 <test> | |
| 166 <param name="input" value="merge.a.bcf" /> | |
| 167 <param name="header_option" value="no_header" /> | |
| 168 <param name="output_format" value="vcf_uncompressed" /> | |
| 169 <param name="region" value="2:3199812-3199812" /> | |
| 170 <output file="tabix.2.3199812.out" ftype="vcf" name="output" /> | |
| 171 </test> | |
| 172 <test> | |
| 173 <param name="input" value="merge.a.bcf" /> | |
| 174 <param name="header_option" value="no_header" /> | |
| 175 <param name="output_format" value="vcf_uncompressed" /> | |
| 176 <param name="region" value="1:3000151-3000151" /> | |
| 177 <output file="tabix.1.3000151.out" ftype="vcf" name="output" /> | |
| 178 </test> | |
| 179 <test> | |
| 180 <param name="input" value="large_chrom_tbi_limit.bcf" /> | |
| 181 <param name="header_option" value="no_header" /> | |
| 182 <param name="output_format" value="vcf_uncompressed" /> | |
| 183 <param name="region" value="chr11:1-536870912" /> | |
| 184 <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" /> | |
| 185 </test> | |
| 186 <test> | |
| 187 <param name="input" value="large_chrom_csi_limit.vcf.gz" /> | |
| 188 <param name="header_option" value="no_header" /> | |
| 189 <param name="output_format" value="vcf_uncompressed" /> | |
| 190 <param name="region" value="chr20:1-2147483647" /> | |
| 191 <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" /> | |
| 192 </test> | |
| 193 <test> | |
| 194 <param name="input" value="large_chrom_tbi_limit.vcf.gz" /> | |
| 195 <param name="header_option" value="no_header" /> | |
| 196 <param name="output_format" value="vcf_uncompressed" /> | |
| 197 <param name="region" value="chr11:1-536870912" /> | |
| 198 <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" /> | |
| 199 </test> | |
| 200 <test> | |
| 201 <param name="input" value="large_chrom_csi_limit.bcf" /> | |
| 202 <param name="header_option" value="no_header" /> | |
| 203 <param name="output_format" value="vcf_uncompressed" /> | |
| 204 <param name="region" value="chr20:1-2147483647" /> | |
| 205 <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" /> | |
| 206 </test> | |
| 207 <test> | |
| 208 <param name="input" value="large_chrom_csi_limit.bcf" /> | |
| 209 <param name="header_option" value="no_header" /> | |
| 210 <param name="output_format" value="vcf_uncompressed" /> | |
| 211 <param name="region" value="chr20" /> | |
| 212 <output file="large_chrom.20.1.2147483647.out" ftype="vcf" name="output" /> | |
| 213 </test> | |
| 214 <test> | |
| 215 <param name="input" value="view.vcf.gz" /> | |
| 216 <param name="trim_alt_alleles" value="True" /> | |
| 217 <param name="sites_no_genotype" value="False" /> | |
| 218 <param name="samples" value="NA00002" /> | |
| 219 <param name="min_nref" value="1" /> | |
| 220 <param name="max_nref" value="1" /> | |
| 221 <param name="include_types" value="snps" /> | |
| 222 <param name="output_format" value="vcf_uncompressed" /> | |
| 223 <output file="view.1.out" ftype="vcf" lines_diff="2" name="output" /> | |
| 224 </test> | |
| 225 <test> | |
| 226 <param name="input" value="view.vcf.gz" /> | |
| 227 <param name="filters" value="PASS" /> | |
| 228 <param name="select_sites" value="known" /> | |
| 229 <param name="private" value="exclude" /> | |
| 230 <param name="samples" value="NA00003" /> | |
| 231 <param name="region" value="20" /> | |
| 232 <param name="output_format" value="vcf_uncompressed" /> | |
| 233 <output file="view.2.out" ftype="vcf" lines_diff="2" name="output" /> | |
| 234 </test> | |
| 235 <test> | |
| 236 <param name="input" value="view.vcf.gz" /> | |
| 237 <param name="private" value="private" /> | |
| 238 <param name="samples" value="NA00003" /> | |
| 239 <param name="output_format" value="vcf_uncompressed" /> | |
| 240 <output file="view.3.out" ftype="vcf" lines_diff="2" name="output" /> | |
| 241 </test> | |
| 242 </tests> | |
| 243 <help> | |
| 244 | |
| 245 About: VCF/BCF conversion, view, subset and filter VCF/BCF files. | |
| 246 | |
| 247 Usage: bcftools view [options] <in.vcf.gz>; [region1 [...]] | |
| 248 | |
| 249 **Output options:** | |
| 250 | |
| 251 -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set) | |
| 252 | |
| 253 -h/H, --header-only/--no-header print the header only/suppress the header in VCF output | |
| 254 | |
| 255 -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [-1] | |
| 256 | |
| 257 -o, --output-file <file>; output file name [stdout] | |
| 258 | |
| 259 -O, --output-type <b|u|z|v>; b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v] | |
| 260 | |
| 261 -r, --regions <region>; restrict to comma-separated list of regions | |
| 262 | |
| 263 -R, --regions-file <file>; restrict to regions listed in a file | |
| 264 | |
| 265 -t, --targets [^]<region>; similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix | |
| 266 | |
| 267 -T, --targets-file [^]<file>; similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix | |
| 268 | |
| 269 | |
| 270 **Subset options:** | |
| 271 | |
| 272 | |
| 273 -a, --trim-alt-alleles trim alternate alleles not seen in the subset | |
| 274 | |
| 275 -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN) | |
| 276 | |
| 277 -s, --samples [^]<list>; comma separated list of samples to include (or exclude with "^" prefix) | |
| 278 | |
| 279 --force-samples only warn about unknown subset samples | |
| 280 -S, --samples-file [^]<file>; file of samples to include (or exclude with "^" prefix) | |
| 281 | |
| 282 | |
| 283 | |
| 284 | |
| 285 **Filter options:** | |
| 286 | |
| 287 -c/C, --min-ac/--max-ac <int>;[:<type>;] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] | |
| 288 | |
| 289 -f, --apply-filters <list>; require at least one of the listed FILTER strings (e.g. "PASS,.") | |
| 290 | |
| 291 -g, --genotype [^]<hom|het|miss>; require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes | |
| 292 | |
| 293 -i/e, --include/--exclude <expr>; select/exclude sites for which the expression is true (see man page for details) | |
| 294 | |
| 295 -k/n, --known/--novel select known/novel sites only (ID is not/is '.') | |
| 296 | |
| 297 -m/M, --min-alleles/--max-alleles <int>; minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites) | |
| 298 | |
| 299 -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased | |
| 300 | |
| 301 -q/Q, --min-af/--max-af <float>;[:<type>;] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] | |
| 302 | |
| 303 -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype | |
| 304 | |
| 305 -v/V, --types/--exclude-types <list>; select/exclude comma-separated list of variant types: snps,indels,mnps,other [null] | |
| 306 | |
| 307 -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples | |
| 308 | |
| 309 </help> | |
| 310 <citations> | |
| 311 <citation type="doi">10.1093/bioinformatics/btp352</citation> | |
| 312 <citation type="doi">10.1093/bioinformatics/btr509</citation> | |
| 313 <citation type="doi">10.1093/bioinformatics/btr076</citation> | |
| 314 </citations> | |
| 315 </tool> |
