Mercurial > repos > jjohnson > snpsift
changeset 3:1739678def32
Add vcfCheck and test cases
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 23 Oct 2014 06:06:25 -0500 |
parents | 49b5bd3dc316 |
children | baf6602903e1 |
files | snpSift_vcfCheck.xml test-data/test_bad.vcf test-data/test_rmEff.vcf test-data/test_rmInfo.vcf |
diffstat | 4 files changed, 61 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpSift_vcfCheck.xml Thu Oct 23 06:06:25 2014 -0500 @@ -0,0 +1,39 @@ +<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="4.0.0"> + <description>basic checks for Vcf specification compliance</description> + <expand macro="requirements" /> + <macros> + <import>snpEff_macros.xml</import> + </macros> + <command> + java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar vcfCheck $input > $output + </command> + <inputs> + <param format="vcf" name="input" type="data" label="Variant input file in VCF format to check"/> + </inputs> + <outputs> + <data format="vcf" name="output" /> + </outputs> + <expand macro="stdio" /> + <tests> + <test> + <param name="input" ftype="vcf" value="test-data/test_bad.vcf"/> + <output name="output"> + <assert_contents> + <has_text text="Errors" /> + </assert_contents> + </output> + </test> + </tests> + <help> + +Perform some basic check ups on VCF files to spot common problems. + +SnpSift vcfCheck checks for some common problems where VCF files are not following the specification. Given that many common VCF problems cause analysis tools and pipelines to behave unexpectedly, this command is intended as a simple debugging tool. + +@EXTERNAL_DOCUMENTATION@ + http://snpeff.sourceforge.net/SnpSift.html#vcfCheck + +@CITATION_SECTION@ + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bad.vcf Thu Oct 23 06:06:25 2014 -0500 @@ -0,0 +1,11 @@ +##fileformat=VCFv4.1 +##samtoolsVersion=0.1.18 (r982:295) +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank | Genotype_Number [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO +chr4 100239319 rs1229984 T C 94.3 . DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1) +chr12 32491626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1) +chr12 3249626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1) +chrX 153010066 rs11803 C T 73.8 . DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_rmEff.vcf Thu Oct 23 06:06:25 2014 -0500 @@ -0,0 +1,10 @@ +##fileformat=VCFv4.1 +##samtoolsVersion=0.1.18 (r982:295) +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank | Genotype_Number [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO +chr4 100239319 rs1229984 T C 94.3 . DP=29 +chr12 32491626 rs1471909 G A 124.0 . DP=22 +chrX 153010066 rs11803 C T 73.8 . DP=34
--- a/test-data/test_rmInfo.vcf Tue Oct 21 12:17:25 2014 -0500 +++ b/test-data/test_rmInfo.vcf Thu Oct 23 06:06:25 2014 -0500 @@ -4,7 +4,7 @@ ##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani" ##SnpEffCmd="SnpEff -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat " ##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank | Genotype_Number [ | ERRORS | WARNINGS ] )' "> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /data/sequencing/output/biotec4/mapping/L774.q1.s.bam /data/sequencing/output/biotec4/mapping/L775.q1.s.bam +#CHROM POS ID REF ALT QUAL FILTER INFO chr4 100239319 rs1229984 T C 94.3 . DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1) chr12 32491626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1) chrX 153010066 rs11803 C T 73.8 . DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1)