# HG changeset patch
# User takadonet
# Date 1428509356 14400
# Node ID 667b2d503ba30b1df61db453fef82303842ff66f
Uploaded
diff -r 000000000000 -r 667b2d503ba3 bcftools_view.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_view.xml Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,315 @@
+
+
+ bcftools
+ tabix
+
+
+
+
+
+
+ #if str($input.ext) == 'vcf':
+ cp $input input.vcf && bgzip input.vcf &&
+ #set $input="input.vcf.gz"
+ #end if
+
+ bcftools index $input &&
+ bcftools view
+
+ #if str($output_format) == 'vcf_uncompressed':
+ -O v
+ #elif str($output_format) =='vcf_compressed':
+ -O z
+ #elif str($output_format) =='bcf_uncompressed':
+ -O u
+ #elif str($output_format) =='bcf_compressed':
+ -O b
+ #end if
+
+ #if str($header_option) == 'header_only':
+ --header-only
+ #elif str($header_option) == 'no_header':
+ --no-header
+ #end if
+
+ -o $output
+ $input
+
+ #if str($region):
+ -r $region
+ #end if
+
+
+ #if str($trim_alt_alleles) == "True" then "-a" else "" #
+
+ #if str($sites_no_genotype) == "True":
+ "-u"
+ elif str($sites_no_genotype) == "False":
+ "-U"
+ #end if
+
+ #if $min_nref:
+ --min-ac "$min_nref"
+ #end if
+
+ #if $max_nref:
+ --max-ac "$max_nref"
+ #end if
+
+ #if $samples:
+ -s "$samples"
+ #end if
+
+ #if $include_types:
+ -v "$include_types"
+ #end if
+
+ #if $filters:
+ --apply-filters "$filters"
+ #end if
+
+
+ #if $select_sites:
+ #set $list = str($select_sites).split(',')
+ #for $i, $s in enumerate( $list )
+ #if str($s) == "known":
+ -k
+ #elif str($s) == "novel":
+ -n
+ #end if
+ #end for
+ #end if
+
+ #if $private:
+ #set $list = str($private).split(',')
+ #for $i, $s in enumerate( $list )
+ #if str($s) == "private":
+ -x
+ #elif str($s) == "exclude":
+ -X
+ #end if
+ #end for
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+About: VCF/BCF conversion, view, subset and filter VCF/BCF files.
+
+Usage: bcftools view [options] <in.vcf.gz>; [region1 [...]]
+
+**Output options:**
+
+ -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set)
+
+ -h/H, --header-only/--no-header print the header only/suppress the header in VCF output
+
+ -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [-1]
+
+ -o, --output-file <file>; output file name [stdout]
+
+ -O, --output-type <b|u|z|v>; b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]
+
+ -r, --regions <region>; restrict to comma-separated list of regions
+
+ -R, --regions-file <file>; restrict to regions listed in a file
+
+ -t, --targets [^]<region>; similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix
+
+ -T, --targets-file [^]<file>; similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix
+
+
+**Subset options:**
+
+
+ -a, --trim-alt-alleles trim alternate alleles not seen in the subset
+
+ -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)
+
+ -s, --samples [^]<list>; comma separated list of samples to include (or exclude with "^" prefix)
+
+ --force-samples only warn about unknown subset samples
+ -S, --samples-file [^]<file>; file of samples to include (or exclude with "^" prefix)
+
+
+
+
+**Filter options:**
+
+ -c/C, --min-ac/--max-ac <int>;[:<type>;] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
+
+ -f, --apply-filters <list>; require at least one of the listed FILTER strings (e.g. "PASS,.")
+
+ -g, --genotype [^]<hom|het|miss>; require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes
+
+ -i/e, --include/--exclude <expr>; select/exclude sites for which the expression is true (see man page for details)
+
+ -k/n, --known/--novel select known/novel sites only (ID is not/is '.')
+
+ -m/M, --min-alleles/--max-alleles <int>; minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)
+
+ -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased
+
+ -q/Q, --min-af/--max-af <float>;[:<type>;] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
+
+ -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype
+
+ -v/V, --types/--exclude-types <list>; select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]
+
+ -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples
+
+
+
+ 10.1093/bioinformatics/btp352
+ 10.1093/bioinformatics/btr509
+ 10.1093/bioinformatics/btr076
+
+
diff -r 000000000000 -r 667b2d503ba3 test-data/input1.bcf.gz
Binary file test-data/input1.bcf.gz has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom.20.1.2147483647.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/large_chrom.20.1.2147483647.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,9 @@
+chr20 76962 . T C 999 PASS .
+chr20 126310 . ACC A 999 PASS .
+chr20 138125 . G T 999 PASS .
+chr20 138148 . C T 999 PASS .
+chr20 271225 . T TTTA,TA 999 PASS .
+chr20 304568 . C T 999 PASS .
+chr20 620255100 . AG T 999 PASS .
+chr20 630255200 . G C 999 PASS .
+chr20 2147483647 . A T 999 PASS .
diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_csi_limit.20.1.2147483647.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/large_chrom_csi_limit.20.1.2147483647.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,9 @@
+chr20 76962 . T C 999 PASS .
+chr20 126310 . ACC A 999 PASS .
+chr20 138125 . G T 999 PASS .
+chr20 138148 . C T 999 PASS .
+chr20 271225 . T TTTA,TA 999 PASS .
+chr20 304568 . C T 999 PASS .
+chr20 620255100 . AG T 999 PASS .
+chr20 630255200 . G C 999 PASS .
+chr20 2147483647 . A T 999 PASS .
diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_csi_limit.bcf
Binary file test-data/large_chrom_csi_limit.bcf has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_csi_limit.vcf.gz
Binary file test-data/large_chrom_csi_limit.vcf.gz has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_tbi_limit.20.1.536870912.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/large_chrom_tbi_limit.20.1.536870912.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,3 @@
+chr11 2343543 . A . 999 PASS .
+chr11 5464562 . C T 999 PASS .
+chr11 116870911 . C G 999 PASS .
diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_tbi_limit.bcf
Binary file test-data/large_chrom_tbi_limit.bcf has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/large_chrom_tbi_limit.vcf.gz
Binary file test-data/large_chrom_tbi_limit.vcf.gz has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/merge.a.bcf
Binary file test-data/merge.a.bcf has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/result1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result1.vcf Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,60 @@
+##fileformat=VCFv4.1
+##FILTER=
+##contig=
+##fileDate=20150323
+##source=freeBayes version 0.9.8
+##reference=/home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta
+##phasing=none
+##commandline="/share/apps/freebayes/bin/freebayes --bam /home/phil/cholera-files-subsample/test/bam/3554-08.bam --vcf /home/phil/cholera-files-subsample/test/vcf/3554-08.vcf --fasta-reference /home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta --min-coverage 15 --pvar 0 --ploidy 1 --left-align-indels --min-mapping-quality 30 --min-base-quality 30 --min-alternate-fraction 0.75"
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##bcftools_viewVersion=1.2+htslib-1.2.1
+##bcftools_viewCommand=view -O b -o /home/phil/cholera-files-subsample/test/vcf-split/3554-08.bcf.gz
+##bcftools_viewCommand=view input.bcf.gz
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown
+gi|360034408|ref|NC_016445.1|_2000000_2400000 149638 . T C 4549.55 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=83;CIGAR=1X;DP=84;DPRA=0;EPP=5.12945;EPPR=0;HWE=-0;LEN=1;MEANALT=2;MQM=54.1084;MQMR=0;NS=1;NUMALT=1;ODDS=1047.57;PAIRED=0;PAIREDR=0;RO=0;RPP=7.43173;RPPR=0;RUN=1;SAP=3.24576;SRP=0;TYPE=snp;XAI=0.00379215;XAM=0.0144638;XAS=0.0106717;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:84:0:0:83:5052:-458.255,-3.3
+gi|360034408|ref|NC_016445.1|_2000000_2400000 151395 . A G 2231.3 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=50;CIGAR=1X;DP=62;DPRA=0;EPP=3.70517;EPPR=3.87889;HWE=-0;LEN=1;MEANALT=3;MQM=54.06;MQMR=54;NS=1;NUMALT=1;ODDS=513.775;PAIRED=0;PAIREDR=0;RO=10;RPP=5.78978;RPPR=16.9077;RUN=1;SAP=7.35324;SRP=6.48466;TYPE=snp;XAI=0.00378432;XAM=0.0186315;XAS=0.0148472;XRI=0.00625034;XRM=0.0151692;XRS=0.00891891;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:62:10:636:50:3115:-287.894,-64.7642
+gi|360034408|ref|NC_016445.1|_2000000_2400000 151608 . A G 2818.36 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=65;CIGAR=1X;DP=83;DPRA=0;EPP=3.31097;EPPR=3.63072;HWE=-0;LEN=1;MEANALT=5;MQM=54.8308;MQMR=54.8571;NS=1;NUMALT=1;ODDS=648.95;PAIRED=0;PAIREDR=0;RO=14;RPP=4.64726;RPPR=33.4109;RUN=1;SAP=23.8898;SRP=3.63072;TYPE=snp;XAI=0.00600565;XAM=0.0269139;XAS=0.0209083;XRI=0.00934969;XRM=0.017822;XRS=0.00847229;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:83:14:850:65:3981:-371.848,-90.0122
diff -r 000000000000 -r 667b2d503ba3 test-data/result2.vcf.gz
Binary file test-data/result2.vcf.gz has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/result3.bcf.gz
Binary file test-data/result3.bcf.gz has changed
diff -r 000000000000 -r 667b2d503ba3 test-data/tabix.1.3000151.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tabix.1.3000151.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,1 @@
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
diff -r 000000000000 -r 667b2d503ba3 test-data/tabix.2.3199812.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tabix.2.3199812.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,1 @@
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
diff -r 000000000000 -r 667b2d503ba3 test-data/view.1.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.1.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.1
+##FILTER=
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=
+##contig=
+##contig=
+##contig=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00002
+20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=1 GT:PL:DP:GQ 0/1:140,0,255:71:99
+20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=1 GT:PL:DP:GQ 0/1:192,0,255:82:99
+20 304568 . C T 999 PASS DP4=16413,4543,945,156;DP=43557;Dels=0;FS=3200;HWE=0.076855;ICF=0.0213;MQ0=0;MQ=50;PV4=0,0,0,1;QD=15.45;AN=2;AC=1 GT:PL:DP:GQ 0|1:192,0,255:13:99
+X 2942109 rs5939407 T C 999 PASS DP4=23273,27816,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:33:99
+X 3048719 . T C 999 PASS DP4=13263,27466,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:33:99
+Y 8657215 . C A 999 PASS DP4=74915,114274,1948,2955;DP=195469;Dels=0;FS=3.181;MQ0=0;MQ=50;PV4=0.86,1,0,1;QD=33.77;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:64:99
+Y 10011673 rs78249411 G A 999 MinAB DP4=47351,30839,178796,279653;DP=550762;Dels=0;FS=41.028;MQ0=37362;MQ=26;PV4=0,0,0,1;QD=17.45;AN=1;AC=1 GT:PL:DP:GQ 1:95,0:130:99
diff -r 000000000000 -r 667b2d503ba3 test-data/view.2.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.2.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,35 @@
+##fileformat=VCFv4.1
+##FILTER=
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=
+##contig=
+##contig=
+##contig=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00003
+20 76962 rs6111385 T C 999 PASS DP4=110138,70822,421911,262673;DP=911531;Dels=0;FS=21.447;HWE=0.491006;ICF=-0.01062;MQ0=1;MQ=46;PV4=2.5e-09,0,0,1;QD=22.31;AC=2;AN=2 GT:PL:DP:GQ 1/1:255,255,0:182:99
+20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=2 GT:PL:DP:GQ 1/1:255,199,0:66:99
+20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=2 GT:PL:DP:GQ 1/1:255,235,0:78:99
diff -r 000000000000 -r 667b2d503ba3 test-data/view.3.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.3.out Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,34 @@
+##fileformat=VCFv4.1
+##FILTER=
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=
+##contig=
+##contig=
+##contig=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00003
+X 2928329 rs62584840 C T 999 PASS DP4=302,9137,32,1329;DP=11020;Dels=0;FS=13.38;HWE=0.284332;ICF=0.0253;MQ0=0;MQ=49;PV4=0.094,0,0,1;QD=18.61;AN=2;AC=1 GT:PL:DP:GQ 0/1:73,0,19:4:30
+X 2933066 rs61746890 G C 999 PASS DP4=69865,100561,461,783;DP=173729;Dels=0;FS=10.833;MQ0=0;MQ=50;PV4=0.005,3.6e-14,0,1;QD=15.33;AN=2;AC=1 GT:PL:DP:GQ 0/1:255,255,255:62:99
diff -r 000000000000 -r 667b2d503ba3 test-data/view.vcf.gz
Binary file test-data/view.vcf.gz has changed
diff -r 000000000000 -r 667b2d503ba3 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+