changeset 3:1739678def32

Add vcfCheck and test cases
author Jim Johnson <jj@umn.edu>
date Thu, 23 Oct 2014 06:06:25 -0500
parents 49b5bd3dc316
children baf6602903e1
files snpSift_vcfCheck.xml test-data/test_bad.vcf test-data/test_rmEff.vcf test-data/test_rmInfo.vcf
diffstat 4 files changed, 61 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_vcfCheck.xml	Thu Oct 23 06:06:25 2014 -0500
@@ -0,0 +1,39 @@
+<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="4.0.0">
+    <description>basic checks for Vcf specification compliance</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+      java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar vcfCheck $input > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format to check"/>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="test-data/test_bad.vcf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Errors" />
+		</assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+Perform some basic check ups on VCF files to spot common problems.
+
+SnpSift vcfCheck checks for some common problems where VCF files are not following the specification. Given that many common VCF problems cause analysis tools and pipelines to behave unexpectedly, this command is intended as a simple debugging tool. 
+
+@EXTERNAL_DOCUMENTATION@
+	http://snpeff.sourceforge.net/SnpSift.html#vcfCheck
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_bad.vcf	Thu Oct 23 06:06:25 2014 -0500
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##samtoolsVersion=0.1.18 (r982:295)
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank  | Genotype_Number [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+chr4	100239319	rs1229984	T	C	94.3	.	DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1)
+chr12	32491626	rs1471909	G	A	124.0	.	DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1)
+chr12	3249626	rs1471909	G	A	124.0	.	DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1)
+chrX	153010066	rs11803	C	T	73.8	.	DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_rmEff.vcf	Thu Oct 23 06:06:25 2014 -0500
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.1
+##samtoolsVersion=0.1.18 (r982:295)
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank  | Genotype_Number [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+chr4	100239319	rs1229984	T	C	94.3	.	DP=29
+chr12	32491626	rs1471909	G	A	124.0	.	DP=22
+chrX	153010066	rs11803	C	T	73.8	.	DP=34
--- a/test-data/test_rmInfo.vcf	Tue Oct 21 12:17:25 2014 -0500
+++ b/test-data/test_rmInfo.vcf	Thu Oct 23 06:06:25 2014 -0500
@@ -4,7 +4,7 @@
 ##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani"
 ##SnpEffCmd="SnpEff  -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat "
 ##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank  | Genotype_Number [ | ERRORS | WARNINGS ] )' ">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	/data/sequencing/output/biotec4/mapping/L774.q1.s.bam	/data/sequencing/output/biotec4/mapping/L775.q1.s.bam
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
 chr4	100239319	rs1229984	T	C	94.3	.	DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1)
 chr12	32491626	rs1471909	G	A	124.0	.	DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1)
 chrX	153010066	rs11803	C	T	73.8	.	DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1)