changeset 0:667b2d503ba3 draft default tip

Uploaded
author takadonet
date Wed, 08 Apr 2015 12:09:16 -0400
parents
children
files bcftools_view.xml test-data/input1.bcf.gz test-data/large_chrom.20.1.2147483647.out test-data/large_chrom_csi_limit.20.1.2147483647.out test-data/large_chrom_csi_limit.bcf test-data/large_chrom_csi_limit.vcf.gz test-data/large_chrom_tbi_limit.20.1.536870912.out test-data/large_chrom_tbi_limit.bcf test-data/large_chrom_tbi_limit.vcf.gz test-data/merge.a.bcf test-data/result1.vcf test-data/result2.vcf.gz test-data/result3.bcf.gz test-data/tabix.1.3000151.out test-data/tabix.2.3199812.out test-data/view.1.out test-data/view.2.out test-data/view.3.out test-data/view.vcf.gz tool_dependencies.xml
diffstat 20 files changed, 515 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_view.xml	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,315 @@
+<tool id="bcftools_view" name="bcftools_view" version="0.1.0">
+    <requirements>
+        <requirement type="package" version="1.0">bcftools</requirement>
+        <requirement type="package" version="1.0">tabix</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command>
+      
+           #if str($input.ext) == 'vcf':
+           cp $input input.vcf &amp;&amp; bgzip input.vcf &amp;&amp;
+           #set $input="input.vcf.gz"
+           #end if
+
+           bcftools index $input &amp;&amp; 
+           bcftools view
+
+        #if str($output_format) == 'vcf_uncompressed':
+        -O v
+        #elif str($output_format) =='vcf_compressed':
+        -O z
+        #elif str($output_format) =='bcf_uncompressed':
+        -O u
+        #elif str($output_format) =='bcf_compressed':
+        -O b        
+        #end if
+
+        #if str($header_option) == 'header_only':
+         --header-only
+        #elif str($header_option) == 'no_header':
+        --no-header
+        #end if
+        
+ 	-o $output
+	$input
+        
+        #if str($region):
+         -r $region
+        #end if
+
+          
+        #if str($trim_alt_alleles) == "True" then "-a" else "" #
+        
+        #if str($sites_no_genotype) == "True":
+        "-u"
+        elif str($sites_no_genotype) == "False":
+        "-U" 
+        #end if
+        
+        #if $min_nref:
+        --min-ac "$min_nref"
+        #end if
+
+        #if $max_nref:
+        --max-ac "$max_nref"
+        #end if
+
+        #if $samples:
+        -s "$samples"
+        #end if
+
+       #if $include_types:
+       -v "$include_types"
+       #end if
+
+        #if $filters:
+        --apply-filters "$filters"
+        #end if
+
+
+       #if $select_sites:
+       #set $list = str($select_sites).split(',')
+       #for $i, $s in enumerate( $list )
+           #if str($s) == "known":
+          -k
+          #elif str($s) == "novel":
+          -n 
+          #end if
+        #end for
+       #end if
+
+       #if $private:
+       #set $list = str($private).split(',')
+       #for $i, $s in enumerate( $list )
+           #if str($s) == "private":
+          -x
+          #elif str($s) == "exclude":
+          -X 
+          #end if
+        #end for
+       #end if
+
+
+        
+    </command>
+    <inputs>
+        <param format="bcf,vcf" label="VCF/BCF file(s) to merged" name="input" optional="false" type="data" />
+        <param label="Choose the output format" name="output_format" type="select">
+            <option selected="true" value="vcf_uncompressed">UnCompressedVCF</option>
+            <option value="vcf_compressed">Compressed VCF</option>
+            <option value="bcf_uncompressed">UnCompressed BCF</option>
+            <option value="bcf_compressed">Compressed BCF</option>
+        </param>
+        <param label="Choose the output everything, only header or no header" name="header_option" type="select">
+            <option selected="true" value="all">Print All</option>
+            <option value="header_only">Header only</option>
+            <option value="no_header">No Header</option>
+        </param>
+        <param help="Accept following format: CHROM:START-END" label="Region to view" name="region" size="30" type="text" value="" />
+        <param help="trim alternate alleles not seen in the subset" label="Trim alternate alleles" name="trim_alt_alleles" optional="true" type="select">
+            <option value="False">False</option>
+            <option value="True">True</option>
+        </param>
+        <param help="select/exclude sites without a called genotype" label="Sites without a called genotype" name="sites_no_genotype" optional="true" type="select">
+            <option value="off">Turn off completely</option>
+            <option value="False">False</option>
+            <option value="True">True</option>
+        </param>
+        <param help="minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Minimum count for non reference" name="min_nref" optional="true" type="integer" value="" />
+        <param help="Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Maximum count for non reference" name="max_nref" optional="true" type="integer" value="" />
+        <param help="file of samples to include (or exclude with &quot;^&quot; prefix)" label="Samples to include or exclude" name="samples" optional="true" type="text" value="" />
+        <param help="select comma-separated list of variant types: snps,indels,mnps,other" label="Select variant types" multiple="true" name="include_types" optional="true" type="select">
+            <option value="snps">snps</option>
+            <option value="indels">indels</option>
+            <option value="mnps">mnps</option>
+            <option value="other">other</option>
+        </param>
+        <param help="Listed FILTER strings (e.g. &quot;PASS, . &quot;)" label="FILTER strings" name="filters" optional="true" type="text" value="" />
+        <param help="select known/novel sites only (ID is not/is '.')" label="Select known/novel sites" multiple="true" name="select_sites" optional="true" type="select">
+            <option value="known">Known</option>
+            <option value="novel">novel</option>
+        </param>
+        <param label="select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples" multiple="true" name="private" optional="true" type="select">
+            <option value="private">private</option>
+            <option value="exclude">Exclude private</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="bcf" name="output">
+            <change_format>
+                <when format="vcf" input="output_format" value="vcf_uncompressed" />
+                <when format="vcf_bgzip" input="output_format" value="vcf_compressed" />
+                <when format="bcf_bgzip" input="output_format" value="bcf_compressed" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="input1.bcf.gz" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <output file="result1.vcf" ftype="vcf" lines_diff="2" name="output" />
+        </test>
+        <test>
+            <param name="input" value="result1.vcf" />
+            <param name="output_format" value="bcf_compressed" />
+            <output compare="sim_size" delta="100" file="result3.bcf.gz" ftype="bcf_bgzip" name="output" />
+        </test>
+        <test>
+            <param name="input" value="input1.bcf.gz" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_compressed" />
+            <output file="result2.vcf.gz" ftype="vcf_bgzip" lines_diff="2" name="output" />
+        </test>
+        <test>
+            <param name="input" value="merge.a.bcf" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <param name="region" value="2:3199812-3199812" />
+            <output file="tabix.2.3199812.out" ftype="vcf" name="output" />
+        </test>
+        <test>
+            <param name="input" value="merge.a.bcf" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <param name="region" value="1:3000151-3000151" />
+            <output file="tabix.1.3000151.out" ftype="vcf" name="output" />
+        </test>
+        <test>
+            <param name="input" value="large_chrom_tbi_limit.bcf" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <param name="region" value="chr11:1-536870912" />
+            <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
+        </test>
+        <test>
+            <param name="input" value="large_chrom_csi_limit.vcf.gz" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <param name="region" value="chr20:1-2147483647" />
+            <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
+        </test>
+        <test>
+            <param name="input" value="large_chrom_tbi_limit.vcf.gz" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <param name="region" value="chr11:1-536870912" />
+            <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
+        </test>
+        <test>
+            <param name="input" value="large_chrom_csi_limit.bcf" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <param name="region" value="chr20:1-2147483647" />
+            <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
+        </test>
+        <test>
+            <param name="input" value="large_chrom_csi_limit.bcf" />
+            <param name="header_option" value="no_header" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <param name="region" value="chr20" />
+            <output file="large_chrom.20.1.2147483647.out" ftype="vcf" name="output" />
+        </test>
+        <test>
+            <param name="input" value="view.vcf.gz" />
+            <param name="trim_alt_alleles" value="True" />
+            <param name="sites_no_genotype" value="False" />
+            <param name="samples" value="NA00002" />
+            <param name="min_nref" value="1" />
+            <param name="max_nref" value="1" />
+            <param name="include_types" value="snps" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <output file="view.1.out" ftype="vcf" lines_diff="2" name="output" />
+        </test>
+        <test>
+            <param name="input" value="view.vcf.gz" />
+            <param name="filters" value="PASS" />
+            <param name="select_sites" value="known" />
+            <param name="private" value="exclude" />
+            <param name="samples" value="NA00003" />
+            <param name="region" value="20" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <output file="view.2.out" ftype="vcf" lines_diff="2" name="output" />
+        </test>
+        <test>
+            <param name="input" value="view.vcf.gz" />
+            <param name="private" value="private" />
+            <param name="samples" value="NA00003" />
+            <param name="output_format" value="vcf_uncompressed" />
+            <output file="view.3.out" ftype="vcf" lines_diff="2" name="output" />
+        </test>
+    </tests>
+    <help>
+            
+About:   VCF/BCF conversion, view, subset and filter VCF/BCF files.
+
+Usage:   bcftools view [options] &lt;in.vcf.gz&gt;; [region1 [...]]
+
+**Output options:**
+
+    -G,   --drop-genotypes              drop individual genotype information (after subsetting if -s option set)
+
+    -h/H, --header-only/--no-header     print the header only/suppress the header in VCF output
+
+    -l,   --compression-level [0-9]     compression level: 0 uncompressed, 1 best speed, 9 best compression [-1]
+
+    -o,   --output-file &lt;file&gt;;          output file name [stdout]
+
+    -O,   --output-type &lt;b|u|z|v&gt;;       b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]
+
+    -r, --regions &lt;region&gt;;              restrict to comma-separated list of regions
+
+    -R, --regions-file &lt;file&gt;;           restrict to regions listed in a file
+
+    -t, --targets [^]&lt;region&gt;;           similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix
+
+    -T, --targets-file [^]&lt;file&gt;;        similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix
+
+
+**Subset options:**
+
+
+    -a,   --trim-alt-alleles            trim alternate alleles not seen in the subset
+
+    -I,   --no-update                   do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)
+
+    -s, --samples [^]&lt;list&gt;;       comma separated list of samples to include (or exclude with "^" prefix)
+
+    --force-samples           only warn about unknown subset samples
+    -S, --samples-file [^]&lt;file&gt;;  file of samples to include (or exclude with "^" prefix)
+
+
+
+
+**Filter options:**
+
+    -c/C, --min-ac/--max-ac &lt;int&gt;;[:&lt;type&gt;;]      minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
+
+    -f,   --apply-filters &lt;list&gt;;                require at least one of the listed FILTER strings (e.g. "PASS,.")
+
+    -g,   --genotype [^]&lt;hom|het|miss&gt;;          require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes
+
+    -i/e, --include/--exclude &lt;expr&gt;;            select/exclude sites for which the expression is true (see man page for details)
+
+    -k/n, --known/--novel                       select known/novel sites only (ID is not/is '.')
+
+    -m/M, --min-alleles/--max-alleles &lt;int&gt;;     minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)
+
+    -p/P, --phased/--exclude-phased             select/exclude sites where all samples are phased
+
+    -q/Q, --min-af/--max-af &lt;float&gt;;[:&lt;type&gt;;]    minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]
+
+    -u/U, --uncalled/--exclude-uncalled         select/exclude sites without a called genotype
+
+    -v/V, --types/--exclude-types &lt;list&gt;;        select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]
+
+    -x/X, --private/--exclude-private           select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples
+        
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btp352</citation>
+        <citation type="doi">10.1093/bioinformatics/btr509</citation>
+        <citation type="doi">10.1093/bioinformatics/btr076</citation>
+    </citations>
+</tool>
Binary file test-data/input1.bcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/large_chrom.20.1.2147483647.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,9 @@
+chr20	76962	.	T	C	999	PASS	.
+chr20	126310	.	ACC	A	999	PASS	.
+chr20	138125	.	G	T	999	PASS	.
+chr20	138148	.	C	T	999	PASS	.
+chr20	271225	.	T	TTTA,TA	999	PASS	.
+chr20	304568	.	C	T	999	PASS	.
+chr20	620255100	.	AG	T	999	PASS	.
+chr20	630255200	.	G	C	999	PASS	.
+chr20	2147483647	.	A	T	999	PASS	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/large_chrom_csi_limit.20.1.2147483647.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,9 @@
+chr20	76962	.	T	C	999	PASS	.
+chr20	126310	.	ACC	A	999	PASS	.
+chr20	138125	.	G	T	999	PASS	.
+chr20	138148	.	C	T	999	PASS	.
+chr20	271225	.	T	TTTA,TA	999	PASS	.
+chr20	304568	.	C	T	999	PASS	.
+chr20	620255100	.	AG	T	999	PASS	.
+chr20	630255200	.	G	C	999	PASS	.
+chr20	2147483647	.	A	T	999	PASS	.
Binary file test-data/large_chrom_csi_limit.bcf has changed
Binary file test-data/large_chrom_csi_limit.vcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/large_chrom_tbi_limit.20.1.536870912.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,3 @@
+chr11	2343543	.	A	.	999	PASS	.
+chr11	5464562	.	C	T	999	PASS	.
+chr11	116870911	.	C	G	999	PASS	.
Binary file test-data/large_chrom_tbi_limit.bcf has changed
Binary file test-data/large_chrom_tbi_limit.vcf.gz has changed
Binary file test-data/merge.a.bcf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result1.vcf	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,60 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=gi|360034408|ref|NC_016445.1|_2000000_2400000,length=400001>
+##fileDate=20150323
+##source=freeBayes version 0.9.8
+##reference=/home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta
+##phasing=none
+##commandline="/share/apps/freebayes/bin/freebayes --bam /home/phil/cholera-files-subsample/test/bam/3554-08.bam --vcf /home/phil/cholera-files-subsample/test/vcf/3554-08.vcf --fasta-reference /home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta --min-coverage 15 --pvar 0 --ploidy 1 --left-align-indels --min-mapping-quality 30 --min-base-quality 30 --min-alternate-fraction 0.75"
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
+##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observations">
+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations">
+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">
+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio.  Ratio between depth in samples with each called alternate allele and those without.">
+##INFO=<ID=XRM,Number=1,Type=Float,Description="Reference allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the reference allele.">
+##INFO=<ID=XRS,Number=1,Type=Float,Description="Reference allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the reference allele.">
+##INFO=<ID=XRI,Number=1,Type=Float,Description="Reference allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the reference allele.">
+##INFO=<ID=XAM,Number=A,Type=Float,Description="Alternate allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the alternate allele, excluding the called variant.">
+##INFO=<ID=XAS,Number=A,Type=Float,Description="Alternate allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the alternate allele, excluding the called variant.">
+##INFO=<ID=XAI,Number=A,Type=Float,Description="Alternate allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the alternate allele, excluding the called variant.">
+##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">
+##INFO=<ID=BVAR,Number=0,Type=Flag,Description="The best genotype combination in the posterior is variant (non homozygous).">
+##INFO=<ID=CpG,Number=0,Type=Flag,Description="CpG site (either CpG, TpG or CpA)">
+##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing.  Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">
+##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">
+##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">
+##INFO=<ID=HWE,Number=1,Type=Float,Description="Phred-scaled discrete HWE prior probability of the genotyping across all samples.">
+##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
+##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">
+##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">
+##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">
+##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=GLE,Number=1,Type=String,Description="Genotype Likelihood Explicit, same as GL, but with tags to indicate the specific genotype.  For instance, 0^-75.22|1^-223.42|0/0^-323.03|1/0^-99.29|1/1^-802.53 represents both haploid and diploid genotype likilehoods in a biallelic context">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">
+##bcftools_viewVersion=1.2+htslib-1.2.1
+##bcftools_viewCommand=view -O b -o /home/phil/cholera-files-subsample/test/vcf-split/3554-08.bcf.gz
+##bcftools_viewCommand=view input.bcf.gz
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	unknown
+gi|360034408|ref|NC_016445.1|_2000000_2400000	149638	.	T	C	4549.55	.	AB=0;ABP=0;AC=1;AF=1;AN=1;AO=83;CIGAR=1X;DP=84;DPRA=0;EPP=5.12945;EPPR=0;HWE=-0;LEN=1;MEANALT=2;MQM=54.1084;MQMR=0;NS=1;NUMALT=1;ODDS=1047.57;PAIRED=0;PAIREDR=0;RO=0;RPP=7.43173;RPPR=0;RUN=1;SAP=3.24576;SRP=0;TYPE=snp;XAI=0.00379215;XAM=0.0144638;XAS=0.0106717;XRI=0;XRM=0;XRS=0;BVAR	GT:GQ:DP:RO:QR:AO:QA:GL	1:50000:84:0:0:83:5052:-458.255,-3.3
+gi|360034408|ref|NC_016445.1|_2000000_2400000	151395	.	A	G	2231.3	.	AB=0;ABP=0;AC=1;AF=1;AN=1;AO=50;CIGAR=1X;DP=62;DPRA=0;EPP=3.70517;EPPR=3.87889;HWE=-0;LEN=1;MEANALT=3;MQM=54.06;MQMR=54;NS=1;NUMALT=1;ODDS=513.775;PAIRED=0;PAIREDR=0;RO=10;RPP=5.78978;RPPR=16.9077;RUN=1;SAP=7.35324;SRP=6.48466;TYPE=snp;XAI=0.00378432;XAM=0.0186315;XAS=0.0148472;XRI=0.00625034;XRM=0.0151692;XRS=0.00891891;BVAR	GT:GQ:DP:RO:QR:AO:QA:GL	1:50000:62:10:636:50:3115:-287.894,-64.7642
+gi|360034408|ref|NC_016445.1|_2000000_2400000	151608	.	A	G	2818.36	.	AB=0;ABP=0;AC=1;AF=1;AN=1;AO=65;CIGAR=1X;DP=83;DPRA=0;EPP=3.31097;EPPR=3.63072;HWE=-0;LEN=1;MEANALT=5;MQM=54.8308;MQMR=54.8571;NS=1;NUMALT=1;ODDS=648.95;PAIRED=0;PAIREDR=0;RO=14;RPP=4.64726;RPPR=33.4109;RUN=1;SAP=23.8898;SRP=3.63072;TYPE=snp;XAI=0.00600565;XAM=0.0269139;XAS=0.0209083;XRI=0.00934969;XRM=0.017822;XRS=0.00847229;BVAR	GT:GQ:DP:RO:QR:AO:QA:GL	1:50000:83:14:850:65:3981:-371.848,-90.0122
Binary file test-data/result2.vcf.gz has changed
Binary file test-data/result3.bcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tabix.1.3000151.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,1 @@
+1	3000151	.	C	T	59.2	PASS	AN=4;AC=2	GT:DP:GQ	0/1:32:245	0/1:32:245
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tabix.2.3199812.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,1 @@
+2	3199812	.	G	GTT,GT	82.7	PASS	AN=4;AC=2,2	GT:GQ:DP	1/2:322:26	1/2:322:26
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.1.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=<ID=11,length=135006516>
+##contig=<ID=20,length=63025520>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction">
+##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)">
+##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads">
+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
+##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
+##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
+##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00002
+20	138125	rs2298108	G	T	999	PASS	DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=1	GT:PL:DP:GQ	0/1:140,0,255:71:99
+20	138148	rs2298109	C	T	999	PASS	DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=1	GT:PL:DP:GQ	0/1:192,0,255:82:99
+20	304568	.	C	T	999	PASS	DP4=16413,4543,945,156;DP=43557;Dels=0;FS=3200;HWE=0.076855;ICF=0.0213;MQ0=0;MQ=50;PV4=0,0,0,1;QD=15.45;AN=2;AC=1	GT:PL:DP:GQ	0|1:192,0,255:13:99
+X	2942109	rs5939407	T	C	999	PASS	DP4=23273,27816,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1	GT:PL:DP:GQ	1:255,0:33:99
+X	3048719	.	T	C	999	PASS	DP4=13263,27466,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1	GT:PL:DP:GQ	1:255,0:33:99
+Y	8657215	.	C	A	999	PASS	DP4=74915,114274,1948,2955;DP=195469;Dels=0;FS=3.181;MQ0=0;MQ=50;PV4=0.86,1,0,1;QD=33.77;AN=1;AC=1	GT:PL:DP:GQ	1:255,0:64:99
+Y	10011673	rs78249411	G	A	999	MinAB	DP4=47351,30839,178796,279653;DP=550762;Dels=0;FS=41.028;MQ0=37362;MQ=26;PV4=0,0,0,1;QD=17.45;AN=1;AC=1	GT:PL:DP:GQ	1:95,0:130:99
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.2.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,35 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=<ID=11,length=135006516>
+##contig=<ID=20,length=63025520>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction">
+##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)">
+##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads">
+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
+##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
+##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
+##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00003
+20	76962	rs6111385	T	C	999	PASS	DP4=110138,70822,421911,262673;DP=911531;Dels=0;FS=21.447;HWE=0.491006;ICF=-0.01062;MQ0=1;MQ=46;PV4=2.5e-09,0,0,1;QD=22.31;AC=2;AN=2	GT:PL:DP:GQ	1/1:255,255,0:182:99
+20	138125	rs2298108	G	T	999	PASS	DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=2	GT:PL:DP:GQ	1/1:255,199,0:66:99
+20	138148	rs2298109	C	T	999	PASS	DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=2	GT:PL:DP:GQ	1/1:255,235,0:78:99
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.3.out	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,34 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=<ID=11,length=135006516>
+##contig=<ID=20,length=63025520>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction">
+##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)">
+##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads">
+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
+##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
+##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
+##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00003
+X	2928329	rs62584840	C	T	999	PASS	DP4=302,9137,32,1329;DP=11020;Dels=0;FS=13.38;HWE=0.284332;ICF=0.0253;MQ0=0;MQ=49;PV4=0.094,0,0,1;QD=18.61;AN=2;AC=1	GT:PL:DP:GQ	0/1:73,0,19:4:30
+X	2933066	rs61746890	G	C	999	PASS	DP4=69865,100561,461,783;DP=173729;Dels=0;FS=10.833;MQ0=0;MQ=50;PV4=0.005,3.6e-14,0,1;QD=15.33;AN=2;AC=1	GT:PL:DP:GQ	0/1:255,255,255:62:99
Binary file test-data/view.vcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Apr 08 12:09:16 2015 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bcftools" version="1.0">
+        <repository changeset_revision="cb6cd479c02e" name="package_bcftools_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="tabix" version="1.0">
+        <repository changeset_revision="54f8b9bf9b6c" name="package_tabix_0_2_6" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>    
+</tool_dependency>