Previous changeset 2:49b5bd3dc316 (2014-10-21) Next changeset 4:baf6602903e1 (2015-12-09) |
Commit message:
Add vcfCheck and test cases |
modified:
test-data/test_rmInfo.vcf |
added:
snpSift_vcfCheck.xml test-data/test_bad.vcf test-data/test_rmEff.vcf |
b |
diff -r 49b5bd3dc316 -r 1739678def32 snpSift_vcfCheck.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpSift_vcfCheck.xml Thu Oct 23 06:06:25 2014 -0500 |
b |
@@ -0,0 +1,39 @@ +<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="4.0.0"> + <description>basic checks for Vcf specification compliance</description> + <expand macro="requirements" /> + <macros> + <import>snpEff_macros.xml</import> + </macros> + <command> + java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar vcfCheck $input > $output + </command> + <inputs> + <param format="vcf" name="input" type="data" label="Variant input file in VCF format to check"/> + </inputs> + <outputs> + <data format="vcf" name="output" /> + </outputs> + <expand macro="stdio" /> + <tests> + <test> + <param name="input" ftype="vcf" value="test-data/test_bad.vcf"/> + <output name="output"> + <assert_contents> + <has_text text="Errors" /> + </assert_contents> + </output> + </test> + </tests> + <help> + +Perform some basic check ups on VCF files to spot common problems. + +SnpSift vcfCheck checks for some common problems where VCF files are not following the specification. Given that many common VCF problems cause analysis tools and pipelines to behave unexpectedly, this command is intended as a simple debugging tool. + +@EXTERNAL_DOCUMENTATION@ + http://snpeff.sourceforge.net/SnpSift.html#vcfCheck + +@CITATION_SECTION@ + + </help> +</tool> |
b |
diff -r 49b5bd3dc316 -r 1739678def32 test-data/test_bad.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bad.vcf Thu Oct 23 06:06:25 2014 -0500 |
[ |
@@ -0,0 +1,11 @@ +##fileformat=VCFv4.1 +##samtoolsVersion=0.1.18 (r982:295) +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank | Genotype_Number [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO +chr4 100239319 rs1229984 T C 94.3 . DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1) +chr12 32491626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1) +chr12 3249626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1) +chrX 153010066 rs11803 C T 73.8 . DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1) |
b |
diff -r 49b5bd3dc316 -r 1739678def32 test-data/test_rmEff.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_rmEff.vcf Thu Oct 23 06:06:25 2014 -0500 |
[ |
@@ -0,0 +1,10 @@ +##fileformat=VCFv4.1 +##samtoolsVersion=0.1.18 (r982:295) +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank | Genotype_Number [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO +chr4 100239319 rs1229984 T C 94.3 . DP=29 +chr12 32491626 rs1471909 G A 124.0 . DP=22 +chrX 153010066 rs11803 C T 73.8 . DP=34 |
b |
diff -r 49b5bd3dc316 -r 1739678def32 test-data/test_rmInfo.vcf --- a/test-data/test_rmInfo.vcf Tue Oct 21 12:17:25 2014 -0500 +++ b/test-data/test_rmInfo.vcf Thu Oct 23 06:06:25 2014 -0500 |
[ |
@@ -4,7 +4,7 @@ ##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani" ##SnpEffCmd="SnpEff -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat " ##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank | Genotype_Number [ | ERRORS | WARNINGS ] )' "> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /data/sequencing/output/biotec4/mapping/L774.q1.s.bam /data/sequencing/output/biotec4/mapping/L775.q1.s.bam +#CHROM POS ID REF ALT QUAL FILTER INFO chr4 100239319 rs1229984 T C 94.3 . DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1) chr12 32491626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1) chrX 153010066 rs11803 C T 73.8 . DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1) |