Mercurial > repos > takadonet > bcftools_view
diff bcftools_view.xml @ 0:667b2d503ba3 draft default tip
Uploaded
| author | takadonet |
|---|---|
| date | Wed, 08 Apr 2015 12:09:16 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcftools_view.xml Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,315 @@ +<tool id="bcftools_view" name="bcftools_view" version="0.1.0"> + <requirements> + <requirement type="package" version="1.0">bcftools</requirement> + <requirement type="package" version="1.0">tabix</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + + #if str($input.ext) == 'vcf': + cp $input input.vcf && bgzip input.vcf && + #set $input="input.vcf.gz" + #end if + + bcftools index $input && + bcftools view + + #if str($output_format) == 'vcf_uncompressed': + -O v + #elif str($output_format) =='vcf_compressed': + -O z + #elif str($output_format) =='bcf_uncompressed': + -O u + #elif str($output_format) =='bcf_compressed': + -O b + #end if + + #if str($header_option) == 'header_only': + --header-only + #elif str($header_option) == 'no_header': + --no-header + #end if + + -o $output + $input + + #if str($region): + -r $region + #end if + + + #if str($trim_alt_alleles) == "True" then "-a" else "" # + + #if str($sites_no_genotype) == "True": + "-u" + elif str($sites_no_genotype) == "False": + "-U" + #end if + + #if $min_nref: + --min-ac "$min_nref" + #end if + + #if $max_nref: + --max-ac "$max_nref" + #end if + + #if $samples: + -s "$samples" + #end if + + #if $include_types: + -v "$include_types" + #end if + + #if $filters: + --apply-filters "$filters" + #end if + + + #if $select_sites: + #set $list = str($select_sites).split(',') + #for $i, $s in enumerate( $list ) + #if str($s) == "known": + -k + #elif str($s) == "novel": + -n + #end if + #end for + #end if + + #if $private: + #set $list = str($private).split(',') + #for $i, $s in enumerate( $list ) + #if str($s) == "private": + -x + #elif str($s) == "exclude": + -X + #end if + #end for + #end if + + + + </command> + <inputs> + <param format="bcf,vcf" label="VCF/BCF file(s) to merged" name="input" optional="false" type="data" /> + <param label="Choose the output format" name="output_format" type="select"> + <option selected="true" value="vcf_uncompressed">UnCompressedVCF</option> + <option value="vcf_compressed">Compressed VCF</option> + <option value="bcf_uncompressed">UnCompressed BCF</option> + <option value="bcf_compressed">Compressed BCF</option> + </param> + <param label="Choose the output everything, only header or no header" name="header_option" type="select"> + <option selected="true" value="all">Print All</option> + <option value="header_only">Header only</option> + <option value="no_header">No Header</option> + </param> + <param help="Accept following format: CHROM:START-END" label="Region to view" name="region" size="30" type="text" value="" /> + <param help="trim alternate alleles not seen in the subset" label="Trim alternate alleles" name="trim_alt_alleles" optional="true" type="select"> + <option value="False">False</option> + <option value="True">True</option> + </param> + <param help="select/exclude sites without a called genotype" label="Sites without a called genotype" name="sites_no_genotype" optional="true" type="select"> + <option value="off">Turn off completely</option> + <option value="False">False</option> + <option value="True">True</option> + </param> + <param help="minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Minimum count for non reference" name="min_nref" optional="true" type="integer" value="" /> + <param help="Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Maximum count for non reference" name="max_nref" optional="true" type="integer" value="" /> + <param help="file of samples to include (or exclude with "^" prefix)" label="Samples to include or exclude" name="samples" optional="true" type="text" value="" /> + <param help="select comma-separated list of variant types: snps,indels,mnps,other" label="Select variant types" multiple="true" name="include_types" optional="true" type="select"> + <option value="snps">snps</option> + <option value="indels">indels</option> + <option value="mnps">mnps</option> + <option value="other">other</option> + </param> + <param help="Listed FILTER strings (e.g. "PASS, . ")" label="FILTER strings" name="filters" optional="true" type="text" value="" /> + <param help="select known/novel sites only (ID is not/is '.')" label="Select known/novel sites" multiple="true" name="select_sites" optional="true" type="select"> + <option value="known">Known</option> + <option value="novel">novel</option> + </param> + <param label="select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples" multiple="true" name="private" optional="true" type="select"> + <option value="private">private</option> + <option value="exclude">Exclude private</option> + </param> + </inputs> + <outputs> + <data format="bcf" name="output"> + <change_format> + <when format="vcf" input="output_format" value="vcf_uncompressed" /> + <when format="vcf_bgzip" input="output_format" value="vcf_compressed" /> + <when format="bcf_bgzip" input="output_format" value="bcf_compressed" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input" value="input1.bcf.gz" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="result1.vcf" ftype="vcf" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="result1.vcf" /> + <param name="output_format" value="bcf_compressed" /> + <output compare="sim_size" delta="100" file="result3.bcf.gz" ftype="bcf_bgzip" name="output" /> + </test> + <test> + <param name="input" value="input1.bcf.gz" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_compressed" /> + <output file="result2.vcf.gz" ftype="vcf_bgzip" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="merge.a.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="2:3199812-3199812" /> + <output file="tabix.2.3199812.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="merge.a.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="1:3000151-3000151" /> + <output file="tabix.1.3000151.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_tbi_limit.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr11:1-536870912" /> + <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_csi_limit.vcf.gz" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr20:1-2147483647" /> + <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_tbi_limit.vcf.gz" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr11:1-536870912" /> + <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_csi_limit.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr20:1-2147483647" /> + <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_csi_limit.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr20" /> + <output file="large_chrom.20.1.2147483647.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="view.vcf.gz" /> + <param name="trim_alt_alleles" value="True" /> + <param name="sites_no_genotype" value="False" /> + <param name="samples" value="NA00002" /> + <param name="min_nref" value="1" /> + <param name="max_nref" value="1" /> + <param name="include_types" value="snps" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="view.1.out" ftype="vcf" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="view.vcf.gz" /> + <param name="filters" value="PASS" /> + <param name="select_sites" value="known" /> + <param name="private" value="exclude" /> + <param name="samples" value="NA00003" /> + <param name="region" value="20" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="view.2.out" ftype="vcf" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="view.vcf.gz" /> + <param name="private" value="private" /> + <param name="samples" value="NA00003" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="view.3.out" ftype="vcf" lines_diff="2" name="output" /> + </test> + </tests> + <help> + +About: VCF/BCF conversion, view, subset and filter VCF/BCF files. + +Usage: bcftools view [options] <in.vcf.gz>; [region1 [...]] + +**Output options:** + + -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set) + + -h/H, --header-only/--no-header print the header only/suppress the header in VCF output + + -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [-1] + + -o, --output-file <file>; output file name [stdout] + + -O, --output-type <b|u|z|v>; b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v] + + -r, --regions <region>; restrict to comma-separated list of regions + + -R, --regions-file <file>; restrict to regions listed in a file + + -t, --targets [^]<region>; similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix + + -T, --targets-file [^]<file>; similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix + + +**Subset options:** + + + -a, --trim-alt-alleles trim alternate alleles not seen in the subset + + -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN) + + -s, --samples [^]<list>; comma separated list of samples to include (or exclude with "^" prefix) + + --force-samples only warn about unknown subset samples + -S, --samples-file [^]<file>; file of samples to include (or exclude with "^" prefix) + + + + +**Filter options:** + + -c/C, --min-ac/--max-ac <int>;[:<type>;] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] + + -f, --apply-filters <list>; require at least one of the listed FILTER strings (e.g. "PASS,.") + + -g, --genotype [^]<hom|het|miss>; require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes + + -i/e, --include/--exclude <expr>; select/exclude sites for which the expression is true (see man page for details) + + -k/n, --known/--novel select known/novel sites only (ID is not/is '.') + + -m/M, --min-alleles/--max-alleles <int>; minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites) + + -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased + + -q/Q, --min-af/--max-af <float>;[:<type>;] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] + + -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype + + -v/V, --types/--exclude-types <list>; select/exclude comma-separated list of variant types: snps,indels,mnps,other [null] + + -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples + + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + </citations> +</tool>
