diff snpSift_annotate.xml @ 5:8952990fcab9

Update to snpEff version 3.4 and add data managers to download snpEff genome reference databases
author Jim Johnson <jj@umn.edu>
date Wed, 27 Nov 2013 09:11:32 -0600
parents 6ad9205c1307
children 0ad9733e22a4
line wrap: on
line diff
--- a/snpSift_annotate.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/snpSift_annotate.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,11 +1,11 @@
-<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.3">
+<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.4">
 	<description>Annotate SNPs from dbSnp</description>
 	<!-- 
 	    You will need to change the path to wherever your installation is.
 		You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
 	-->
 	<requirements>
-                <requirement type="package" version="3.3">snpEff</requirement>
+                <requirement type="package" version="3.4">snpEff</requirement>
 	</requirements>
 	<command>
         java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar $annotate_cmd 
@@ -20,18 +20,22 @@
 		<param format="vcf" name="input" type="data" label="VCF input"/>
 		<param format="vcf" name="dbSnp" type="data" label="VCF File with ID field annotated (e.g. dnSNP.vcf)" 
                        help="The ID field for a variant in input will be assigned from a matching variant in this file."/>
-		<param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Annotate in Memory" 
-                       help="allows unsorted VCF files, but it loads the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files)"/>
                 <conditional name="annotate">
 			<param name="id" type="boolean" truevalue="id" falsevalue="info" checked="True" label="Only annotate ID field (do not add INFO field)" help=""/>
 			<when value="id"/>
 			<when value="info">
-				<param name="info_ids" type="text" value="" optional="true" label="Limit INFO annotation to these INFO IDs"
+				<param name="info_ids" type="text" value="" size="60" optional="true" label="Limit INFO annotation to these INFO IDs"
                                        help="list is a comma separated list of fields. When blank, all INFO fields are included">	
 					<validator type="regex" message="IDs separted by commas">^(([a-zA-Z][a-zA-Z0-9_-]*)(,[a-zA-Z][a-zA-Z0-9_-]*)*)?$</validator>
 				</param>
 			</when>
                 </conditional>
+		<param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Annotate in Memory"> 
+                       <help>
+                       Allows unsorted VCF files, but it loads the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files).
+                       Otherwise, both the database and the input VCF files should be sorted by position (Chromosome sort order can differ between files). 
+                       </help>
+                </param>
 	</inputs>
         <stdio>
           <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
@@ -60,6 +64,44 @@
 
 For details about this tool, please go to http://snpeff.sourceforge.net/SnpSift.html#annotate
 
+Annotatating only the ID field from dbSnp137.vcf ::
+
+    Input VCF:
+    CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192
+
+
+
+Annotatating both the ID and INFO fields from dbSnp137.vcf ::
+
+    Input VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244;RSPOS=16346045;GMAF=0.162248628884826;dbSNPBuildID=129;SSR=0;SAO=0;VP=050100000000000100000100;WGT=0;VC=SNV;SLO;GNO
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192;RSPOS=16350245;GMAF=0.230804387568556;dbSNPBuildID=101;SSR=1;SAO=0;VP=050000000000000100000140;WGT=0;VC=SNV;GNO
+
+
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
+
 	</help>
 </tool>