changeset 2:49b5bd3dc316

Add rmInfo tool
author Jim Johnson <jj@umn.edu>
date Tue, 21 Oct 2014 12:17:25 -0500
parents 796388c291d3
children 1739678def32
files snpSift_rmInfo.xml test-data/test_rmInfo.vcf
diffstat 2 files changed, 66 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_rmInfo.xml	Tue Oct 21 12:17:25 2014 -0500
@@ -0,0 +1,56 @@
+<tool id="snpSift_rmInfo" name="SnpSift rmInfo" version="4.0.0">
+    <description>remove INFO field annotations</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+      java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar rmInfo $input ' '.join($info_fields.split(',')) > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param name="info_fields" type="text" value="" label="Info fields to remove, e.g. EFF">
+          <help>Separate multiple INFO fields with a comma, e.g.: EFF,DP</help>
+          <validator type="empty_field" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="test-data/test_rmInfo.vcf"/>
+            <param name="info_fields" value="EFF"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="DP=29" />
+		    <not_has_text text="EFF=EXON" />
+		</assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input" ftype="vcf" value="test-data/test_rmInfo.vcf"/>
+            <param name="info_fields" value="EFF"/>
+            <output name="output">
+                <assert_contents>
+		   <not_has_text text="DP=29;EFF=EXON" />
+		</assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+This command removes INFO fields from a VCF file (i.e. removes annotations)
+
+Removing INFO fields is usually done because you want to re-annotate the VCF file, thus removing old INFO fields in order to add new ones later. 
+
+SnpEff &amp; SnpSift only add annotations and do not change current ones. So, in order to re-annotate a file, you should first remove the old annotations and then re-annotate. 
+The reason for this behavior is simply because replacing annotation values is considered a bad practice. Imagine that you have a VCF entry  in your re-annotated file having the value "AA=1": How do you know if this is from the old annotations or from the new ones? This confusion often leads to problems in downstream steps of your pipelines, so it's better to avoid the problem by first removing all the previous annotations and then adding the new ones. 
+
+@EXTERNAL_DOCUMENTATION@
+	http://snpeff.sourceforge.net/SnpSift.html#rmInfo
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_rmInfo.vcf	Tue Oct 21 12:17:25 2014 -0500
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.1
+##samtoolsVersion=0.1.18 (r982:295)
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank  | Genotype_Number [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	/data/sequencing/output/biotec4/mapping/L774.q1.s.bam	/data/sequencing/output/biotec4/mapping/L775.q1.s.bam
+chr4	100239319	rs1229984	T	C	94.3	.	DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1)
+chr12	32491626	rs1471909	G	A	124.0	.	DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1)
+chrX	153010066	rs11803	C	T	73.8	.	DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1)