Mercurial > repos > takadonet > bcftools_view
changeset 0:667b2d503ba3 draft default tip
Uploaded
| author | takadonet |
|---|---|
| date | Wed, 08 Apr 2015 12:09:16 -0400 |
| parents | |
| children | |
| files | bcftools_view.xml test-data/input1.bcf.gz test-data/large_chrom.20.1.2147483647.out test-data/large_chrom_csi_limit.20.1.2147483647.out test-data/large_chrom_csi_limit.bcf test-data/large_chrom_csi_limit.vcf.gz test-data/large_chrom_tbi_limit.20.1.536870912.out test-data/large_chrom_tbi_limit.bcf test-data/large_chrom_tbi_limit.vcf.gz test-data/merge.a.bcf test-data/result1.vcf test-data/result2.vcf.gz test-data/result3.bcf.gz test-data/tabix.1.3000151.out test-data/tabix.2.3199812.out test-data/view.1.out test-data/view.2.out test-data/view.3.out test-data/view.vcf.gz tool_dependencies.xml |
| diffstat | 20 files changed, 515 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcftools_view.xml Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,315 @@ +<tool id="bcftools_view" name="bcftools_view" version="0.1.0"> + <requirements> + <requirement type="package" version="1.0">bcftools</requirement> + <requirement type="package" version="1.0">tabix</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + + #if str($input.ext) == 'vcf': + cp $input input.vcf && bgzip input.vcf && + #set $input="input.vcf.gz" + #end if + + bcftools index $input && + bcftools view + + #if str($output_format) == 'vcf_uncompressed': + -O v + #elif str($output_format) =='vcf_compressed': + -O z + #elif str($output_format) =='bcf_uncompressed': + -O u + #elif str($output_format) =='bcf_compressed': + -O b + #end if + + #if str($header_option) == 'header_only': + --header-only + #elif str($header_option) == 'no_header': + --no-header + #end if + + -o $output + $input + + #if str($region): + -r $region + #end if + + + #if str($trim_alt_alleles) == "True" then "-a" else "" # + + #if str($sites_no_genotype) == "True": + "-u" + elif str($sites_no_genotype) == "False": + "-U" + #end if + + #if $min_nref: + --min-ac "$min_nref" + #end if + + #if $max_nref: + --max-ac "$max_nref" + #end if + + #if $samples: + -s "$samples" + #end if + + #if $include_types: + -v "$include_types" + #end if + + #if $filters: + --apply-filters "$filters" + #end if + + + #if $select_sites: + #set $list = str($select_sites).split(',') + #for $i, $s in enumerate( $list ) + #if str($s) == "known": + -k + #elif str($s) == "novel": + -n + #end if + #end for + #end if + + #if $private: + #set $list = str($private).split(',') + #for $i, $s in enumerate( $list ) + #if str($s) == "private": + -x + #elif str($s) == "exclude": + -X + #end if + #end for + #end if + + + + </command> + <inputs> + <param format="bcf,vcf" label="VCF/BCF file(s) to merged" name="input" optional="false" type="data" /> + <param label="Choose the output format" name="output_format" type="select"> + <option selected="true" value="vcf_uncompressed">UnCompressedVCF</option> + <option value="vcf_compressed">Compressed VCF</option> + <option value="bcf_uncompressed">UnCompressed BCF</option> + <option value="bcf_compressed">Compressed BCF</option> + </param> + <param label="Choose the output everything, only header or no header" name="header_option" type="select"> + <option selected="true" value="all">Print All</option> + <option value="header_only">Header only</option> + <option value="no_header">No Header</option> + </param> + <param help="Accept following format: CHROM:START-END" label="Region to view" name="region" size="30" type="text" value="" /> + <param help="trim alternate alleles not seen in the subset" label="Trim alternate alleles" name="trim_alt_alleles" optional="true" type="select"> + <option value="False">False</option> + <option value="True">True</option> + </param> + <param help="select/exclude sites without a called genotype" label="Sites without a called genotype" name="sites_no_genotype" optional="true" type="select"> + <option value="off">Turn off completely</option> + <option value="False">False</option> + <option value="True">True</option> + </param> + <param help="minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Minimum count for non reference" name="min_nref" optional="true" type="integer" value="" /> + <param help="Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Maximum count for non reference" name="max_nref" optional="true" type="integer" value="" /> + <param help="file of samples to include (or exclude with "^" prefix)" label="Samples to include or exclude" name="samples" optional="true" type="text" value="" /> + <param help="select comma-separated list of variant types: snps,indels,mnps,other" label="Select variant types" multiple="true" name="include_types" optional="true" type="select"> + <option value="snps">snps</option> + <option value="indels">indels</option> + <option value="mnps">mnps</option> + <option value="other">other</option> + </param> + <param help="Listed FILTER strings (e.g. "PASS, . ")" label="FILTER strings" name="filters" optional="true" type="text" value="" /> + <param help="select known/novel sites only (ID is not/is '.')" label="Select known/novel sites" multiple="true" name="select_sites" optional="true" type="select"> + <option value="known">Known</option> + <option value="novel">novel</option> + </param> + <param label="select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples" multiple="true" name="private" optional="true" type="select"> + <option value="private">private</option> + <option value="exclude">Exclude private</option> + </param> + </inputs> + <outputs> + <data format="bcf" name="output"> + <change_format> + <when format="vcf" input="output_format" value="vcf_uncompressed" /> + <when format="vcf_bgzip" input="output_format" value="vcf_compressed" /> + <when format="bcf_bgzip" input="output_format" value="bcf_compressed" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input" value="input1.bcf.gz" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="result1.vcf" ftype="vcf" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="result1.vcf" /> + <param name="output_format" value="bcf_compressed" /> + <output compare="sim_size" delta="100" file="result3.bcf.gz" ftype="bcf_bgzip" name="output" /> + </test> + <test> + <param name="input" value="input1.bcf.gz" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_compressed" /> + <output file="result2.vcf.gz" ftype="vcf_bgzip" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="merge.a.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="2:3199812-3199812" /> + <output file="tabix.2.3199812.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="merge.a.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="1:3000151-3000151" /> + <output file="tabix.1.3000151.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_tbi_limit.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr11:1-536870912" /> + <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_csi_limit.vcf.gz" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr20:1-2147483647" /> + <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_tbi_limit.vcf.gz" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr11:1-536870912" /> + <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_csi_limit.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr20:1-2147483647" /> + <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="large_chrom_csi_limit.bcf" /> + <param name="header_option" value="no_header" /> + <param name="output_format" value="vcf_uncompressed" /> + <param name="region" value="chr20" /> + <output file="large_chrom.20.1.2147483647.out" ftype="vcf" name="output" /> + </test> + <test> + <param name="input" value="view.vcf.gz" /> + <param name="trim_alt_alleles" value="True" /> + <param name="sites_no_genotype" value="False" /> + <param name="samples" value="NA00002" /> + <param name="min_nref" value="1" /> + <param name="max_nref" value="1" /> + <param name="include_types" value="snps" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="view.1.out" ftype="vcf" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="view.vcf.gz" /> + <param name="filters" value="PASS" /> + <param name="select_sites" value="known" /> + <param name="private" value="exclude" /> + <param name="samples" value="NA00003" /> + <param name="region" value="20" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="view.2.out" ftype="vcf" lines_diff="2" name="output" /> + </test> + <test> + <param name="input" value="view.vcf.gz" /> + <param name="private" value="private" /> + <param name="samples" value="NA00003" /> + <param name="output_format" value="vcf_uncompressed" /> + <output file="view.3.out" ftype="vcf" lines_diff="2" name="output" /> + </test> + </tests> + <help> + +About: VCF/BCF conversion, view, subset and filter VCF/BCF files. + +Usage: bcftools view [options] <in.vcf.gz>; [region1 [...]] + +**Output options:** + + -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set) + + -h/H, --header-only/--no-header print the header only/suppress the header in VCF output + + -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [-1] + + -o, --output-file <file>; output file name [stdout] + + -O, --output-type <b|u|z|v>; b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v] + + -r, --regions <region>; restrict to comma-separated list of regions + + -R, --regions-file <file>; restrict to regions listed in a file + + -t, --targets [^]<region>; similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix + + -T, --targets-file [^]<file>; similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix + + +**Subset options:** + + + -a, --trim-alt-alleles trim alternate alleles not seen in the subset + + -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN) + + -s, --samples [^]<list>; comma separated list of samples to include (or exclude with "^" prefix) + + --force-samples only warn about unknown subset samples + -S, --samples-file [^]<file>; file of samples to include (or exclude with "^" prefix) + + + + +**Filter options:** + + -c/C, --min-ac/--max-ac <int>;[:<type>;] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] + + -f, --apply-filters <list>; require at least one of the listed FILTER strings (e.g. "PASS,.") + + -g, --genotype [^]<hom|het|miss>; require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes + + -i/e, --include/--exclude <expr>; select/exclude sites for which the expression is true (see man page for details) + + -k/n, --known/--novel select known/novel sites only (ID is not/is '.') + + -m/M, --min-alleles/--max-alleles <int>; minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites) + + -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased + + -q/Q, --min-af/--max-af <float>;[:<type>;] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref] + + -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype + + -v/V, --types/--exclude-types <list>; select/exclude comma-separated list of variant types: snps,indels,mnps,other [null] + + -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples + + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/large_chrom.20.1.2147483647.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,9 @@ +chr20 76962 . T C 999 PASS . +chr20 126310 . ACC A 999 PASS . +chr20 138125 . G T 999 PASS . +chr20 138148 . C T 999 PASS . +chr20 271225 . T TTTA,TA 999 PASS . +chr20 304568 . C T 999 PASS . +chr20 620255100 . AG T 999 PASS . +chr20 630255200 . G C 999 PASS . +chr20 2147483647 . A T 999 PASS .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/large_chrom_csi_limit.20.1.2147483647.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,9 @@ +chr20 76962 . T C 999 PASS . +chr20 126310 . ACC A 999 PASS . +chr20 138125 . G T 999 PASS . +chr20 138148 . C T 999 PASS . +chr20 271225 . T TTTA,TA 999 PASS . +chr20 304568 . C T 999 PASS . +chr20 620255100 . AG T 999 PASS . +chr20 630255200 . G C 999 PASS . +chr20 2147483647 . A T 999 PASS .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/large_chrom_tbi_limit.20.1.536870912.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,3 @@ +chr11 2343543 . A . 999 PASS . +chr11 5464562 . C T 999 PASS . +chr11 116870911 . C G 999 PASS .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result1.vcf Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,60 @@ +##fileformat=VCFv4.1 +##FILTER=<ID=PASS,Description="All filters passed"> +##contig=<ID=gi|360034408|ref|NC_016445.1|_2000000_2400000,length=400001> +##fileDate=20150323 +##source=freeBayes version 0.9.8 +##reference=/home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta +##phasing=none +##commandline="/share/apps/freebayes/bin/freebayes --bam /home/phil/cholera-files-subsample/test/bam/3554-08.bam --vcf /home/phil/cholera-files-subsample/test/vcf/3554-08.vcf --fasta-reference /home/phil/cholera-files-subsample/test/reference/3554-08.2010EL-1786-c1_2000_2400kb.fasta --min-coverage 15 --pvar 0 --ploidy 1 --left-align-indels --min-mapping-quality 30 --min-base-quality 30 --min-alternate-fraction 0.75" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observations"> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=XRM,Number=1,Type=Float,Description="Reference allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the reference allele."> +##INFO=<ID=XRS,Number=1,Type=Float,Description="Reference allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the reference allele."> +##INFO=<ID=XRI,Number=1,Type=Float,Description="Reference allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the reference allele."> +##INFO=<ID=XAM,Number=A,Type=Float,Description="Alternate allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the alternate allele, excluding the called variant."> +##INFO=<ID=XAS,Number=A,Type=Float,Description="Alternate allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the alternate allele, excluding the called variant."> +##INFO=<ID=XAI,Number=A,Type=Float,Description="Alternate allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the alternate allele, excluding the called variant."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=BVAR,Number=0,Type=Flag,Description="The best genotype combination in the posterior is variant (non homozygous)."> +##INFO=<ID=CpG,Number=0,Type=Flag,Description="CpG site (either CpG, TpG or CpA)"> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=HWE,Number=1,Type=Float,Description="Phred-scaled discrete HWE prior probability of the genotyping across all samples."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=GLE,Number=1,Type=String,Description="Genotype Likelihood Explicit, same as GL, but with tags to indicate the specific genotype. For instance, 0^-75.22|1^-223.42|0/0^-323.03|1/0^-99.29|1/1^-802.53 represents both haploid and diploid genotype likilehoods in a biallelic context"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##bcftools_viewVersion=1.2+htslib-1.2.1 +##bcftools_viewCommand=view -O b -o /home/phil/cholera-files-subsample/test/vcf-split/3554-08.bcf.gz +##bcftools_viewCommand=view input.bcf.gz +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +gi|360034408|ref|NC_016445.1|_2000000_2400000 149638 . T C 4549.55 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=83;CIGAR=1X;DP=84;DPRA=0;EPP=5.12945;EPPR=0;HWE=-0;LEN=1;MEANALT=2;MQM=54.1084;MQMR=0;NS=1;NUMALT=1;ODDS=1047.57;PAIRED=0;PAIREDR=0;RO=0;RPP=7.43173;RPPR=0;RUN=1;SAP=3.24576;SRP=0;TYPE=snp;XAI=0.00379215;XAM=0.0144638;XAS=0.0106717;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:84:0:0:83:5052:-458.255,-3.3 +gi|360034408|ref|NC_016445.1|_2000000_2400000 151395 . A G 2231.3 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=50;CIGAR=1X;DP=62;DPRA=0;EPP=3.70517;EPPR=3.87889;HWE=-0;LEN=1;MEANALT=3;MQM=54.06;MQMR=54;NS=1;NUMALT=1;ODDS=513.775;PAIRED=0;PAIREDR=0;RO=10;RPP=5.78978;RPPR=16.9077;RUN=1;SAP=7.35324;SRP=6.48466;TYPE=snp;XAI=0.00378432;XAM=0.0186315;XAS=0.0148472;XRI=0.00625034;XRM=0.0151692;XRS=0.00891891;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:62:10:636:50:3115:-287.894,-64.7642 +gi|360034408|ref|NC_016445.1|_2000000_2400000 151608 . A G 2818.36 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=65;CIGAR=1X;DP=83;DPRA=0;EPP=3.31097;EPPR=3.63072;HWE=-0;LEN=1;MEANALT=5;MQM=54.8308;MQMR=54.8571;NS=1;NUMALT=1;ODDS=648.95;PAIRED=0;PAIREDR=0;RO=14;RPP=4.64726;RPPR=33.4109;RUN=1;SAP=23.8898;SRP=3.63072;TYPE=snp;XAI=0.00600565;XAM=0.0269139;XAS=0.0209083;XRI=0.00934969;XRM=0.017822;XRS=0.00847229;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:83:14:850:65:3981:-371.848,-90.0122
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tabix.1.3000151.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,1 @@ +1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tabix.2.3199812.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,1 @@ +2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/view.1.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,39 @@ +##fileformat=VCFv4.1 +##FILTER=<ID=PASS,Description="All filters passed"> +##reference=file:///seq/references/1000Genomes-NCBI37.fasta +##contig=<ID=11,length=135006516> +##contig=<ID=20,length=63025520> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction"> +##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)"> +##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads"> +##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads"> +##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> +##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]"> +##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]"> +##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]"> +##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]"> +##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00002 +20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=1 GT:PL:DP:GQ 0/1:140,0,255:71:99 +20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=1 GT:PL:DP:GQ 0/1:192,0,255:82:99 +20 304568 . C T 999 PASS DP4=16413,4543,945,156;DP=43557;Dels=0;FS=3200;HWE=0.076855;ICF=0.0213;MQ0=0;MQ=50;PV4=0,0,0,1;QD=15.45;AN=2;AC=1 GT:PL:DP:GQ 0|1:192,0,255:13:99 +X 2942109 rs5939407 T C 999 PASS DP4=23273,27816,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:33:99 +X 3048719 . T C 999 PASS DP4=13263,27466,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:33:99 +Y 8657215 . C A 999 PASS DP4=74915,114274,1948,2955;DP=195469;Dels=0;FS=3.181;MQ0=0;MQ=50;PV4=0.86,1,0,1;QD=33.77;AN=1;AC=1 GT:PL:DP:GQ 1:255,0:64:99 +Y 10011673 rs78249411 G A 999 MinAB DP4=47351,30839,178796,279653;DP=550762;Dels=0;FS=41.028;MQ0=37362;MQ=26;PV4=0,0,0,1;QD=17.45;AN=1;AC=1 GT:PL:DP:GQ 1:95,0:130:99
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/view.2.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,35 @@ +##fileformat=VCFv4.1 +##FILTER=<ID=PASS,Description="All filters passed"> +##reference=file:///seq/references/1000Genomes-NCBI37.fasta +##contig=<ID=11,length=135006516> +##contig=<ID=20,length=63025520> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction"> +##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)"> +##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads"> +##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads"> +##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> +##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]"> +##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]"> +##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]"> +##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]"> +##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00003 +20 76962 rs6111385 T C 999 PASS DP4=110138,70822,421911,262673;DP=911531;Dels=0;FS=21.447;HWE=0.491006;ICF=-0.01062;MQ0=1;MQ=46;PV4=2.5e-09,0,0,1;QD=22.31;AC=2;AN=2 GT:PL:DP:GQ 1/1:255,255,0:182:99 +20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=2;AC=2 GT:PL:DP:GQ 1/1:255,199,0:66:99 +20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=2;AC=2 GT:PL:DP:GQ 1/1:255,235,0:78:99
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/view.3.out Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,34 @@ +##fileformat=VCFv4.1 +##FILTER=<ID=PASS,Description="All filters passed"> +##reference=file:///seq/references/1000Genomes-NCBI37.fasta +##contig=<ID=11,length=135006516> +##contig=<ID=20,length=63025520> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction"> +##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)"> +##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads"> +##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads"> +##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> +##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]"> +##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]"> +##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]"> +##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]"> +##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00003 +X 2928329 rs62584840 C T 999 PASS DP4=302,9137,32,1329;DP=11020;Dels=0;FS=13.38;HWE=0.284332;ICF=0.0253;MQ0=0;MQ=49;PV4=0.094,0,0,1;QD=18.61;AN=2;AC=1 GT:PL:DP:GQ 0/1:73,0,19:4:30 +X 2933066 rs61746890 G C 999 PASS DP4=69865,100561,461,783;DP=173729;Dels=0;FS=10.833;MQ0=0;MQ=50;PV4=0.005,3.6e-14,0,1;QD=15.33;AN=2;AC=1 GT:PL:DP:GQ 0/1:255,255,255:62:99
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Apr 08 12:09:16 2015 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bcftools" version="1.0"> + <repository changeset_revision="cb6cd479c02e" name="package_bcftools_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="tabix" version="1.0"> + <repository changeset_revision="54f8b9bf9b6c" name="package_tabix_0_2_6" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
