# HG changeset patch # User takadonet # Date 1428509356 14400 # Node ID 667b2d503ba30b1df61db453fef82303842ff66f Uploaded diff -r 000000000000 -r 667b2d503ba3 bcftools_view.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcftools_view.xml Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,315 @@ + + + bcftools + tabix + + + + + + + #if str($input.ext) == 'vcf': + cp $input input.vcf && bgzip input.vcf && + #set $input="input.vcf.gz" + #end if + + bcftools index $input && + bcftools view + + #if str($output_format) == 'vcf_uncompressed': + -O v + #elif str($output_format) =='vcf_compressed': + -O z + #elif str($output_format) =='bcf_uncompressed': + -O u + #elif str($output_format) =='bcf_compressed': + -O b + #end if + + #if str($header_option) == 'header_only': + --header-only + #elif str($header_option) == 'no_header': + --no-header + #end if + + -o $output + $input + + #if str($region): + -r $region + #end if + + + #if str($trim_alt_alleles) == "True" then "-a" else "" # + + #if str($sites_no_genotype) == "True": + "-u" + elif str($sites_no_genotype) == "False": + "-U" + #end if + + #if $min_nref: + --min-ac "$min_nref" + #end if + + #if $max_nref: + --max-ac "$max_nref" + #end if + + #if $samples: + -s "$samples" + #end if + + #if $include_types: + -v "$include_types" + #end if + + #if $filters: + --apply-filters "$filters" + #end if + + + #if $select_sites: + #set $list = str($select_sites).split(',') + #for $i, $s in enumerate( $list ) + #if str($s) == "known": + -k + #elif str($s) == "novel": + -n + #end if + #end for + #end if + + #if $private: + #set $list = str($private).split(',') + #for $i, $s in enumerate( $list ) + #if str($s) == "private": + -x + #elif str($s) == "exclude": + -X + #end if + #end for + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +About: VCF/BCF conversion, view, subset and filter VCF/BCF files. + +Usage: bcftools view [options] <in.vcf.gz>; [region1 [...]] + +**Output options:** + + -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set) + + -h/H, --header-only/--no-header print the header only/suppress the header in VCF output + + -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [-1] + + -o, --output-file <file>; output file name [stdout] + + -O, --output-type <b|u|z|v>; b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v] + + -r, --regions <region>; restrict to comma-separated list of regions + + -R, --regions-file <file>; restrict to regions listed in a file + + -t, --targets [^]<region>; similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix + + -T, --targets-file [^]<file>; similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix + + +**Subset options:** + + + -a, --trim-alt-alleles trim alternate alleles not seen in the subset + + -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN) + + -s, --samples [^]<list>; comma separated list of samples to include (or exclude with "^" prefix) + + --force-samples only warn about unknown subset samples + -S, --samples-file [^]<file>; file of samples to include (or exclude with "^" prefix) + + + + +**Filter options:** + + -c/C, --min-ac/--max-ac <int>;[:<type>;] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] + + -f, --apply-filters <list>; require at least one of the listed FILTER strings (e.g. "PASS,.") + + -g, --genotype [^]<hom|het|miss>; require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes + + -i/e, --include/--exclude <expr>; select/exclude sites for which the expression is true (see man page for details) + + -k/n, --known/--novel select known/novel sites only (ID is not/is '.') + + -m/M, --min-alleles/--max-alleles <int>; minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites) + + -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased + + -q/Q, --min-af/--max-af <float>;[:<type>;] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] + + -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype + + -v/V, --types/--exclude-types <list>; select/exclude comma-separated list of variant types: snps,indels,mnps,other [null] + + -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples + + + + 10.1093/bioinformatics/btp352 + 10.1093/bioinformatics/btr509 + 10.1093/bioinformatics/btr076 + + diff -r 000000000000 -r 667b2d503ba3 test-data/input1.bcf.gz Binary file test-data/input1.bcf.gz has changed diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom.20.1.2147483647.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/large_chrom.20.1.2147483647.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,9 @@ +chr20 76962 . T C 999 PASS . +chr20 126310 . ACC A 999 PASS . +chr20 138125 . G T 999 PASS . +chr20 138148 . C T 999 PASS . +chr20 271225 . T TTTA,TA 999 PASS . +chr20 304568 . C T 999 PASS . +chr20 620255100 . AG T 999 PASS . +chr20 630255200 . G C 999 PASS . +chr20 2147483647 . A T 999 PASS . diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_csi_limit.20.1.2147483647.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/large_chrom_csi_limit.20.1.2147483647.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,9 @@ +chr20 76962 . T C 999 PASS . +chr20 126310 . ACC A 999 PASS . +chr20 138125 . G T 999 PASS . +chr20 138148 . C T 999 PASS . +chr20 271225 . T TTTA,TA 999 PASS . +chr20 304568 . C T 999 PASS . +chr20 620255100 . AG T 999 PASS . +chr20 630255200 . G C 999 PASS . +chr20 2147483647 . A T 999 PASS . diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_csi_limit.bcf Binary file test-data/large_chrom_csi_limit.bcf has changed diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_csi_limit.vcf.gz Binary file test-data/large_chrom_csi_limit.vcf.gz has changed diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_tbi_limit.20.1.536870912.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/large_chrom_tbi_limit.20.1.536870912.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,3 @@ +chr11 2343543 . A . 999 PASS . +chr11 5464562 . C T 999 PASS . +chr11 116870911 . C G 999 PASS . diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_tbi_limit.bcf Binary file test-data/large_chrom_tbi_limit.bcf has changed diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_tbi_limit.vcf.gz Binary file test-data/large_chrom_tbi_limit.vcf.gz has changed diff -r 000000000000 -r 667b2d503ba3 test-data/merge.a.bcf Binary file test-data/merge.a.bcf has changed diff -r 000000000000 -r 667b2d503ba3 test-data/result1.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result1.vcf Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,60 @@ +##fileformat=VCFv4.1 +##FILTER= +##contig= +##fileDate=20150323 +##source=freeBayes version 0.9.8 +##reference=/home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta +##phasing=none +##commandline="/share/apps/freebayes/bin/freebayes --bam /home/phil/cholera-files-subsample/test/bam/3554-08.bam --vcf /home/phil/cholera-files-subsample/test/vcf/3554-08.vcf --fasta-reference /home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta --min-coverage 15 --pvar 0 --ploidy 1 --left-align-indels --min-mapping-quality 30 --min-base-quality 30 --min-alternate-fraction 0.75" +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##bcftools_viewVersion=1.2+htslib-1.2.1 +##bcftools_viewCommand=view -O b -o /home/phil/cholera-files-subsample/test/vcf-split/3554-08.bcf.gz +##bcftools_viewCommand=view input.bcf.gz +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +gi|360034408|ref|NC_016445.1|_2000000_2400000 149638 . T C 4549.55 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=83;CIGAR=1X;DP=84;DPRA=0;EPP=5.12945;EPPR=0;HWE=-0;LEN=1;MEANALT=2;MQM=54.1084;MQMR=0;NS=1;NUMALT=1;ODDS=1047.57;PAIRED=0;PAIREDR=0;RO=0;RPP=7.43173;RPPR=0;RUN=1;SAP=3.24576;SRP=0;TYPE=snp;XAI=0.00379215;XAM=0.0144638;XAS=0.0106717;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:84:0:0:83:5052:-458.255,-3.3 +gi|360034408|ref|NC_016445.1|_2000000_2400000 151395 . A G 2231.3 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=50;CIGAR=1X;DP=62;DPRA=0;EPP=3.70517;EPPR=3.87889;HWE=-0;LEN=1;MEANALT=3;MQM=54.06;MQMR=54;NS=1;NUMALT=1;ODDS=513.775;PAIRED=0;PAIREDR=0;RO=10;RPP=5.78978;RPPR=16.9077;RUN=1;SAP=7.35324;SRP=6.48466;TYPE=snp;XAI=0.00378432;XAM=0.0186315;XAS=0.0148472;XRI=0.00625034;XRM=0.0151692;XRS=0.00891891;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:62:10:636:50:3115:-287.894,-64.7642 +gi|360034408|ref|NC_016445.1|_2000000_2400000 151608 . A G 2818.36 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=65;CIGAR=1X;DP=83;DPRA=0;EPP=3.31097;EPPR=3.63072;HWE=-0;LEN=1;MEANALT=5;MQM=54.8308;MQMR=54.8571;NS=1;NUMALT=1;ODDS=648.95;PAIRED=0;PAIREDR=0;RO=14;RPP=4.64726;RPPR=33.4109;RUN=1;SAP=23.8898;SRP=3.63072;TYPE=snp;XAI=0.00600565;XAM=0.0269139;XAS=0.0209083;XRI=0.00934969;XRM=0.017822;XRS=0.00847229;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:83:14:850:65:3981:-371.848,-90.0122 diff -r 000000000000 -r 667b2d503ba3 test-data/result2.vcf.gz Binary file test-data/result2.vcf.gz has changed diff -r 000000000000 -r 667b2d503ba3 test-data/result3.bcf.gz Binary file test-data/result3.bcf.gz has changed diff -r 000000000000 -r 667b2d503ba3 test-data/tabix.1.3000151.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tabix.1.3000151.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,1 @@ +1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245 diff -r 000000000000 -r 667b2d503ba3 test-data/tabix.2.3199812.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tabix.2.3199812.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,1 @@ +2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26 diff -r 000000000000 -r 667b2d503ba3 test-data/view.1.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/view.1.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,39 @@ +##fileformat=VCFv4.1 +##FILTER= +##reference=file:///seq/references/1000Genomes-NCBI37.fasta +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00002 +20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=1 GT:PL:DP:GQ 0/1:140,0,255:71:99 +20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=1 GT:PL:DP:GQ 0/1:192,0,255:82:99 +20 304568 . C T 999 PASS DP4=16413,4543,945,156;DP=43557;Dels=0;FS=3200;HWE=0.076855;ICF=0.0213;MQ0=0;MQ=50;PV4=0,0,0,1;QD=15.45;AN=2;AC=1 GT:PL:DP:GQ 0|1:192,0,255:13:99 +X 2942109 rs5939407 T C 999 PASS DP4=23273,27816,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:33:99 +X 3048719 . T C 999 PASS DP4=13263,27466,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:33:99 +Y 8657215 . C A 999 PASS DP4=74915,114274,1948,2955;DP=195469;Dels=0;FS=3.181;MQ0=0;MQ=50;PV4=0.86,1,0,1;QD=33.77;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:64:99 +Y 10011673 rs78249411 G A 999 MinAB DP4=47351,30839,178796,279653;DP=550762;Dels=0;FS=41.028;MQ0=37362;MQ=26;PV4=0,0,0,1;QD=17.45;AN=1;AC=1 GT:PL:DP:GQ 1:95,0:130:99 diff -r 000000000000 -r 667b2d503ba3 test-data/view.2.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/view.2.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,35 @@ +##fileformat=VCFv4.1 +##FILTER= +##reference=file:///seq/references/1000Genomes-NCBI37.fasta +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00003 +20 76962 rs6111385 T C 999 PASS DP4=110138,70822,421911,262673;DP=911531;Dels=0;FS=21.447;HWE=0.491006;ICF=-0.01062;MQ0=1;MQ=46;PV4=2.5e-09,0,0,1;QD=22.31;AC=2;AN=2 GT:PL:DP:GQ 1/1:255,255,0:182:99 +20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=2 GT:PL:DP:GQ 1/1:255,199,0:66:99 +20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=2 GT:PL:DP:GQ 1/1:255,235,0:78:99 diff -r 000000000000 -r 667b2d503ba3 test-data/view.3.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/view.3.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,34 @@ +##fileformat=VCFv4.1 +##FILTER= +##reference=file:///seq/references/1000Genomes-NCBI37.fasta +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00003 +X 2928329 rs62584840 C T 999 PASS DP4=302,9137,32,1329;DP=11020;Dels=0;FS=13.38;HWE=0.284332;ICF=0.0253;MQ0=0;MQ=49;PV4=0.094,0,0,1;QD=18.61;AN=2;AC=1 GT:PL:DP:GQ 0/1:73,0,19:4:30 +X 2933066 rs61746890 G C 999 PASS DP4=69865,100561,461,783;DP=173729;Dels=0;FS=10.833;MQ0=0;MQ=50;PV4=0.005,3.6e-14,0,1;QD=15.33;AN=2;AC=1 GT:PL:DP:GQ 0/1:255,255,255:62:99 diff -r 000000000000 -r 667b2d503ba3 test-data/view.vcf.gz Binary file test-data/view.vcf.gz has changed diff -r 000000000000 -r 667b2d503ba3 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,9 @@ + + + + + + + + +